├── .github
    ├── ISSUE_TEMPLATE
    │   └── bug_report.md
    └── workflows
    │   └── python-package.yml
├── .gitignore
├── .readthedocs.yaml
├── LICENSE
├── README.md
├── benchmarks
    ├── Benchmark_1_Distributions.ipynb
    ├── Benchmark_2_General_Mixture_Models.ipynb
    ├── Benchmark_3_KMeans.ipynb
    ├── Benchmark_4_Bayes_Classifier.ipynb
    └── Benchmark_5_Hidden_Markov_Model.ipynb
├── docs
    ├── CODE_OF_CONDUCT.rst
    ├── Makefile
    ├── _static
    │   └── custom.css
    ├── _templates
    │   └── class.rst
    ├── api.rst
    ├── conf.py
    ├── faq.rst
    ├── index.rst
    ├── install.rst
    ├── logo
    │   ├── pomegranate-logo.png
    │   └── pomegranate_comparison.png
    ├── requirements.txt
    ├── tutorials
    │   ├── B_Model_Tutorial_1_Distributions.ipynb
    │   ├── B_Model_Tutorial_2_General_Mixture_Models.ipynb
    │   ├── B_Model_Tutorial_3_Bayes_Classifier.ipynb
    │   ├── B_Model_Tutorial_4_Hidden_Markov_Models.ipynb
    │   ├── B_Model_Tutorial_5_Markov_Chains.ipynb
    │   ├── B_Model_Tutorial_6_Bayesian_Networks.ipynb
    │   ├── B_Model_Tutorial_7_Factor_Graphs.ipynb
    │   ├── C_Feature_Tutorial_1_GPU_Usage.ipynb
    │   ├── C_Feature_Tutorial_2_Mixed_Precision_and_DataTypes.ipynb
    │   ├── C_Feature_Tutorial_3_Out_Of_Core_Learning.ipynb
    │   └── C_Feature_Tutorial_4_Priors_and_Semi-supervised_Learning.ipynb
    └── whats_new.rst
├── examples
    └── Bayesian_Network_Monty_Hall.ipynb
├── pomegranate
    ├── __init__.py
    ├── _bayes.py
    ├── _utils.py
    ├── bayes_classifier.py
    ├── bayesian_network.py
    ├── distributions
    │   ├── __init__.py
    │   ├── _distribution.py
    │   ├── bernoulli.py
    │   ├── categorical.py
    │   ├── conditional_categorical.py
    │   ├── dirac_delta.py
    │   ├── exponential.py
    │   ├── gamma.py
    │   ├── halfnormal.py
    │   ├── independent_components.py
    │   ├── joint_categorical.py
    │   ├── lognormal.py
    │   ├── normal.py
    │   ├── poisson.py
    │   ├── student_t.py
    │   ├── uniform.py
    │   └── zero_inflated.py
    ├── factor_graph.py
    ├── gmm.py
    ├── hmm
    │   ├── __init__.py
    │   ├── _base.py
    │   ├── dense_hmm.py
    │   └── sparse_hmm.py
    ├── kmeans.py
    └── markov_chain.py
├── requirements.txt
├── setup.py
├── slides
    ├── pomegranate ODSC East 2019.pdf
    ├── pomegranate ODSC Europe 2020.pdf
    ├── pomegranate ODSC West 2017.pdf
    ├── pomegranate ODSC West 2018.pdf
    ├── pomegranate ODSC West 2019.pdf
    ├── pomegranate PyData NYC 2017.pdf
    ├── pomegranate data intelligence 2017.pdf
    ├── pomegranate odsc east 2017 turorial.pdf
    ├── pomegranate pydata seattle 2017.pdf
    └── pomegranate scipy 2017.pdf
└── tests
    ├── __init__.py
    ├── distributions
        ├── __init__.py
        ├── _utils.py
        ├── test_bernoulli.py
        ├── test_categorical.py
        ├── test_conditional_categorical.py
        ├── test_dirac_delta.py
        ├── test_exponential.py
        ├── test_gamma.py
        ├── test_independent_component.py
        ├── test_joint_categorical.py
        ├── test_normal_diagonal.py
        ├── test_normal_full.py
        ├── test_poisson.py
        ├── test_student_t.py
        └── test_uniform.py
    ├── hmm
        ├── __init__.py
        ├── test_dense_hmm.py
        └── test_sparse_hmm.py
    ├── test_bayes_classifier.py
    ├── test_bayesian_network.py
    ├── test_bayesian_network_structure_learning.py
    ├── test_factor_graph.py
    ├── test_gmm.py
    ├── test_kmeans.py
    ├── test_markov_chain.py
    ├── test_semisupervised.py
    ├── test_utils.py
    └── tools.py


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG]"
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is, including what you were expecting to happen and what actually happened. Please report the version of pomegranate that you are using and the operating system. Also, please make sure that you have upgraded to the latest version of pomegranate before submitting the bug report.
12 | 
13 | **To Reproduce**
14 | Please provide a snippet of code that can reproduce this error. It is much easier for us to track down bugs and fix them if we have an example script that fails until we're successful.
15 | 
16 | **Response time**
17 | Although I will likely respond during weekdays if I am not on vacation, I am not likely to be able to merge PRs or write code until the weekend. 
18 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 3 | 
 4 | 
 5 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 6 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 7 | 
 8 | name: build
 9 | 
10 | on:
11 |   push:
12 |     branches: [ master ]
13 |   pull_request:
14 |     branches: [ master ]
15 | 
16 | jobs:
17 |   build:
18 |     name: ${{ matrix.os }} Python ${{ matrix.python-version }} 
19 |     runs-on: ${{ matrix.os }}
20 |     strategy:
21 |       matrix:
22 |         os: [ubuntu-latest, macOS-latest]
23 |         python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
24 | 
25 |     steps:
26 |     - uses: actions/checkout@v3
27 |     - name: Set up Python ${{ matrix.os }} ${{ matrix.python-version }}
28 |       uses: actions/setup-python@v3
29 |       with:
30 |         python-version: ${{ matrix.python-version }}
31 |     - name: Install dependencies
32 |       run: |
33 |         python -m pip install --upgrade pip
34 |         python -m pip install flake8 pytest
35 |         pip install -r requirements.txt
36 |     - name: Lint with flake8
37 |       run: |
38 |         # stop the build if there are Python syntax errors or undefined names
39 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
40 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
41 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
42 |     - name: Test with pytest
43 |       run: |
44 |         pytest -m "not sample"


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.c
 3 | .ipynb_checkpoints
 4 | *~
 5 | .DS_Store
 6 | build
 7 | *.so
 8 | .idea/
 9 | .vscode/
10 | dist/
11 | .eggs/
12 | *.egg-info/
13 | *.pyd
14 | .python-version
15 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.10"
13 | 
14 | 
15 | # Build documentation in the docs/ directory with Sphinx
16 | sphinx:
17 |    configuration: docs/conf.py
18 | 
19 | 
20 | # Optionally declare the Python requirements required to build your docs
21 | python:
22 |    install:
23 |    - requirements: docs/requirements.txt
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Jacob Schreiber
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/benchmarks/Benchmark_1_Distributions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "6bc2e9e8",
  7 |    "metadata": {},
  8 |    "outputs": [
  9 |     {
 10 |      "name": "stdout",
 11 |      "output_type": "stream",
 12 |      "text": [
 13 |       "numpy      : 1.23.4\n",
 14 |       "scipy      : 1.9.3\n",
 15 |       "torch      : 1.12.1\n",
 16 |       "pomegranate: 0.14.8\n",
 17 |       "\n",
 18 |       "Compiler    : GCC 11.2.0\n",
 19 |       "OS          : Linux\n",
 20 |       "Release     : 4.15.0-197-generic\n",
 21 |       "Machine     : x86_64\n",
 22 |       "Processor   : x86_64\n",
 23 |       "CPU cores   : 8\n",
 24 |       "Architecture: 64bit\n",
 25 |       "\n"
 26 |      ]
 27 |     }
 28 |    ],
 29 |    "source": [
 30 |     "import numpy\n",
 31 |     "import scipy\n",
 32 |     "import torch\n",
 33 |     "\n",
 34 |     "from torchegranate.distributions import *\n",
 35 |     "\n",
 36 |     "numpy.random.seed(0)\n",
 37 |     "numpy.set_printoptions(suppress=True)\n",
 38 |     "\n",
 39 |     "%load_ext watermark\n",
 40 |     "%watermark -m -n -p numpy,scipy,torch,pomegranate"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "id": "7dd56360",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "### Normal w/ Diagonal Covariance Distributions"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 2,
 54 |    "id": "5efcc291",
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "n, d = 100000, 500\n",
 59 |     "\n",
 60 |     "X = torch.randn(n, d)\n",
 61 |     "Xn = X.numpy()\n",
 62 |     "\n",
 63 |     "mus = torch.randn(d)\n",
 64 |     "covs = torch.abs(torch.randn(d))\n",
 65 |     "stds = torch.sqrt(covs)"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 3,
 71 |    "id": "1c3325d9",
 72 |    "metadata": {},
 73 |    "outputs": [
 74 |     {
 75 |      "name": "stdout",
 76 |      "output_type": "stream",
 77 |      "text": [
 78 |       "143 ms ± 12.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
 79 |       "227 ms ± 14.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
 80 |       "1.12 s ± 18.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
 81 |      ]
 82 |     }
 83 |    ],
 84 |    "source": [
 85 |     "%timeit Normal(mus, covs, covariance_type='diag').log_probability(X)\n",
 86 |     "%timeit torch.distributions.Normal(mus, stds).log_prob(X).sum(dim=-1)\n",
 87 |     "%timeit scipy.stats.norm.logpdf(Xn, mus, stds).sum(axis=1)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "id": "bd46b4b0",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "### Normal w/ Full Covariance Distribution"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 4,
101 |    "id": "07fab284",
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "d0 = Normal().fit(X)\n",
106 |     "\n",
107 |     "mu, cov = d0.means, d0.covs"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 5,
113 |    "id": "194d7679",
114 |    "metadata": {},
115 |    "outputs": [
116 |     {
117 |      "name": "stdout",
118 |      "output_type": "stream",
119 |      "text": [
120 |       "211 ms ± 19.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
121 |       "205 ms ± 22.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
122 |       "765 ms ± 36.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
123 |      ]
124 |     }
125 |    ],
126 |    "source": [
127 |     "%timeit Normal(mu, cov).log_probability(X)\n",
128 |     "%timeit torch.distributions.MultivariateNormal(mu, cov).log_prob(X).sum(dim=-1)\n",
129 |     "%timeit scipy.stats.multivariate_normal.logpdf(Xn, mu, cov).sum(axis=-1)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "id": "1a5adc6a",
135 |    "metadata": {},
136 |    "source": [
137 |     "### Exponential Distribution"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 6,
143 |    "id": "f70bb98d",
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "X = torch.abs(torch.randn(n, d))\n",
148 |     "Xn = X.numpy()\n",
149 |     "\n",
150 |     "means = torch.abs(torch.randn(d))"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 7,
156 |    "id": "ab3d0af3",
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "name": "stdout",
161 |      "output_type": "stream",
162 |      "text": [
163 |       "150 ms ± 1.31 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
164 |       "89 ms ± 3.47 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
165 |       "1.36 s ± 86.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
166 |      ]
167 |     }
168 |    ],
169 |    "source": [
170 |     "%timeit Exponential(means).log_probability(X)\n",
171 |     "%timeit torch.distributions.Exponential(means).log_prob(X)\n",
172 |     "%timeit scipy.stats.expon.logpdf(X, means)"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "id": "5108fce5",
178 |    "metadata": {},
179 |    "source": [
180 |     "### Gamma Distribution"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 8,
186 |    "id": "06865521",
187 |    "metadata": {},
188 |    "outputs": [],
189 |    "source": [
190 |     "shapes = torch.abs(torch.randn(d))\n",
191 |     "rates = torch.abs(torch.randn(d))"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 9,
197 |    "id": "2459f3f0",
198 |    "metadata": {},
199 |    "outputs": [
200 |     {
201 |      "name": "stdout",
202 |      "output_type": "stream",
203 |      "text": [
204 |       "270 ms ± 9.09 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
205 |       "250 ms ± 30.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
206 |       "2.67 s ± 75.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
207 |      ]
208 |     }
209 |    ],
210 |    "source": [
211 |     "%timeit Gamma(shapes, rates).log_probability(X)\n",
212 |     "%timeit torch.distributions.Gamma(shapes, rates).log_prob(X)\n",
213 |     "%timeit scipy.stats.gamma.logpdf(X, shapes, rates)"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "id": "c81f8f06",
219 |    "metadata": {},
220 |    "source": [
221 |     "### Bernoulli Distribution"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 10,
227 |    "id": "7cee5e63",
228 |    "metadata": {},
229 |    "outputs": [],
230 |    "source": [
231 |     "X = torch.tensor(numpy.random.choice(2, size=(n, d)), dtype=torch.float32)\n",
232 |     "probs = torch.mean(X, dim=0)"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": 11,
238 |    "id": "0f697993",
239 |    "metadata": {},
240 |    "outputs": [
241 |     {
242 |      "name": "stdout",
243 |      "output_type": "stream",
244 |      "text": [
245 |       "181 ms ± 8.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
246 |       "419 ms ± 20.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
247 |       "3.78 s ± 66.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
248 |      ]
249 |     }
250 |    ],
251 |    "source": [
252 |     "%timeit Bernoulli(probs).log_probability(X)\n",
253 |     "%timeit torch.distributions.Bernoulli(probs).log_prob(X)\n",
254 |     "%timeit scipy.stats.bernoulli.logpmf(X, probs)"
255 |    ]
256 |   }
257 |  ],
258 |  "metadata": {
259 |   "kernelspec": {
260 |    "display_name": "Python 3 (ipykernel)",
261 |    "language": "python",
262 |    "name": "python3"
263 |   },
264 |   "language_info": {
265 |    "codemirror_mode": {
266 |     "name": "ipython",
267 |     "version": 3
268 |    },
269 |    "file_extension": ".py",
270 |    "mimetype": "text/x-python",
271 |    "name": "python",
272 |    "nbconvert_exporter": "python",
273 |    "pygments_lexer": "ipython3",
274 |    "version": "3.9.13"
275 |   }
276 |  },
277 |  "nbformat": 4,
278 |  "nbformat_minor": 5
279 | }
280 | 


--------------------------------------------------------------------------------
/benchmarks/Benchmark_4_Bayes_Classifier.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "752ca88f",
  7 |    "metadata": {},
  8 |    "outputs": [
  9 |     {
 10 |      "name": "stdout",
 11 |      "output_type": "stream",
 12 |      "text": [
 13 |       "numpy      : 1.23.4\n",
 14 |       "scipy      : 1.9.3\n",
 15 |       "torch      : 1.12.1\n",
 16 |       "pomegranate: 0.14.8\n",
 17 |       "\n",
 18 |       "Compiler    : GCC 11.2.0\n",
 19 |       "OS          : Linux\n",
 20 |       "Release     : 4.15.0-197-generic\n",
 21 |       "Machine     : x86_64\n",
 22 |       "Processor   : x86_64\n",
 23 |       "CPU cores   : 8\n",
 24 |       "Architecture: 64bit\n",
 25 |       "\n"
 26 |      ]
 27 |     }
 28 |    ],
 29 |    "source": [
 30 |     "import numpy\n",
 31 |     "import scipy\n",
 32 |     "import torch\n",
 33 |     "\n",
 34 |     "from sklearn.datasets import make_blobs\n",
 35 |     "\n",
 36 |     "from torchegranate.distributions import *\n",
 37 |     "from torchegranate.bayes_classifier import BayesClassifier\n",
 38 |     "\n",
 39 |     "from sklearn.naive_bayes import GaussianNB, BernoulliNB\n",
 40 |     "\n",
 41 |     "import matplotlib.pyplot as plt\n",
 42 |     "import seaborn; seaborn.set_style('whitegrid')\n",
 43 |     "\n",
 44 |     "numpy.random.seed(0)\n",
 45 |     "numpy.set_printoptions(suppress=True)\n",
 46 |     "\n",
 47 |     "%load_ext watermark\n",
 48 |     "%watermark -m -n -p numpy,scipy,torch,pomegranate"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "id": "1d323e83",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "### Gaussian Naive Bayes"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 2,
 62 |    "id": "db6dc2d8",
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "n, d, k = 200000, 500, 50\n",
 67 |     "\n",
 68 |     "X, y = make_blobs(n_samples=n, n_features=d, centers=k, cluster_std=0.75, random_state=0)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 3,
 74 |    "id": "4a980a8a",
 75 |    "metadata": {},
 76 |    "outputs": [
 77 |     {
 78 |      "name": "stdout",
 79 |      "output_type": "stream",
 80 |      "text": [
 81 |       "787 ms ± 22.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
 82 |       "872 ms ± 16.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
 83 |      ]
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "%timeit model_sklearn = GaussianNB().fit(X, y)\n",
 88 |     "%timeit model_pom = BayesClassifier([Normal(covariance_type='diag') for i in range(k)]).fit(X, y)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 4,
 94 |    "id": "24f702bc",
 95 |    "metadata": {},
 96 |    "outputs": [
 97 |     {
 98 |      "name": "stdout",
 99 |      "output_type": "stream",
100 |      "text": [
101 |       "20.9 s ± 24.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
102 |       "15.6 s ± 152 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
103 |      ]
104 |     }
105 |    ],
106 |    "source": [
107 |     "model_sklearn = GaussianNB().fit(X, y)\n",
108 |     "model_pom = BayesClassifier([Normal(covariance_type='diag') for i in range(k)]).fit(X, y)\n",
109 |     "\n",
110 |     "%timeit model_sklearn.predict(X)\n",
111 |     "%timeit model_pom.predict(X)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "id": "f0f5959c",
117 |    "metadata": {},
118 |    "source": [
119 |     "### Bernoulli Naive Bayes"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 5,
125 |    "id": "1a73281c",
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "n, d, k = 200000, 200, 25\n",
130 |     "\n",
131 |     "X = numpy.random.choice(2, size=(n, d))\n",
132 |     "y = numpy.random.choice(k, size=(n,))"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 6,
138 |    "id": "f711516d",
139 |    "metadata": {},
140 |    "outputs": [
141 |     {
142 |      "name": "stdout",
143 |      "output_type": "stream",
144 |      "text": [
145 |       "14 s ± 242 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
146 |       "359 ms ± 905 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
147 |      ]
148 |     }
149 |    ],
150 |    "source": [
151 |     "%timeit model_sklearn = BernoulliNB().fit(X, y)\n",
152 |     "%timeit model_pom = BayesClassifier([Bernoulli() for i in range(k)]).fit(X, y)"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 7,
158 |    "id": "bab540b7",
159 |    "metadata": {},
160 |    "outputs": [
161 |     {
162 |      "name": "stdout",
163 |      "output_type": "stream",
164 |      "text": [
165 |       "628 ms ± 12.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
166 |       "3.01 s ± 35.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
167 |      ]
168 |     }
169 |    ],
170 |    "source": [
171 |     "model_sklearn = BernoulliNB().fit(X, y)\n",
172 |     "model_pom = BayesClassifier([Bernoulli() for i in range(k)]).fit(X, y)\n",
173 |     "\n",
174 |     "%timeit model_sklearn.predict(X)\n",
175 |     "%timeit model_pom.predict(X)"
176 |    ]
177 |   }
178 |  ],
179 |  "metadata": {
180 |   "kernelspec": {
181 |    "display_name": "Python 3 (ipykernel)",
182 |    "language": "python",
183 |    "name": "python3"
184 |   },
185 |   "language_info": {
186 |    "codemirror_mode": {
187 |     "name": "ipython",
188 |     "version": 3
189 |    },
190 |    "file_extension": ".py",
191 |    "mimetype": "text/x-python",
192 |    "name": "python",
193 |    "nbconvert_exporter": "python",
194 |    "pygments_lexer": "ipython3",
195 |    "version": "3.9.13"
196 |   }
197 |  },
198 |  "nbformat": 4,
199 |  "nbformat_minor": 5
200 | }
201 | 


--------------------------------------------------------------------------------
/docs/CODE_OF_CONDUCT.rst:
--------------------------------------------------------------------------------
 1 | ===============
 2 | Code of Conduct
 3 | ===============
 4 | 
 5 | Our Pledge
 6 | ----------
 7 | 
 8 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 9 | 
10 | Our Standards
11 | -------------
12 | 
13 | Examples of behavior that contributes to creating a positive environment include:
14 | 
15 | * Using welcoming and inclusive language
16 | * Being respectful of differing viewpoints and experiences
17 | * Gracefully accepting constructive criticism
18 | * Focusing on what is best for the community
19 | * Showing empathy towards other community members
20 | 
21 | Examples of unacceptable behavior by participants include:
22 | 
23 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
24 | * Trolling, insulting/derogatory comments, and personal or political attacks
25 | * Public or private harassment
26 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
27 | * Other conduct which could reasonably be considered inappropriate in a professional setting
28 | 
29 | Our Responsibilities
30 | --------------------
31 | 
32 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
33 | 
34 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
35 | 
36 | Scope
37 | -----
38 | 
39 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
40 | 
41 | Enforcement
42 | -----------
43 | 
44 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at jmschreiber91@gmail.com. Because the project team currently consists of only one member, that member shall investigate within one week whether a violation of the code of conduct occurred and what the appropriate response is. That member shall then contact the original reporter and any other affected parties to explain the response and note feedback for the record. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Should you wish to file a report anonymously you should fill out a report at https://goo.gl/forms/aQtlDdrhZf4Y8flk2. If your report involves any members of the project team, if you feel uncomfortable making a report to the project team for any reason, or you feel that the issue has not been adequately handled, you are encouraged to send `your report <https://numfocus.org/code-of-conduct#what-to-include>`_ to conduct@numfocus.org where it will be independently reviewed by the `NumFOCUS team <https://numfocus.org/code-of-conduct#persons-responsible>`_. 
45 | 
46 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
47 | 
48 | Attribution
49 | -----------
50 | 
51 | This Code of Conduct is adapted from the `Contributor Covenant homepage <http://contributor-covenant.org>`_, `version 1\.4 <http://contributor-covenant.org/version/1/4/>`_.
52 | 
53 | For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq.
54 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 49 | 
 50 | .PHONY: clean
 51 | clean:
 52 | 	rm -rf $(BUILDDIR)/*
 53 | 
 54 | .PHONY: html
 55 | html:
 56 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 57 | 	@echo
 58 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 59 | 
 60 | .PHONY: dirhtml
 61 | dirhtml:
 62 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 63 | 	@echo
 64 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 65 | 
 66 | .PHONY: singlehtml
 67 | singlehtml:
 68 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 69 | 	@echo
 70 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 71 | 
 72 | .PHONY: pickle
 73 | pickle:
 74 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 75 | 	@echo
 76 | 	@echo "Build finished; now you can process the pickle files."
 77 | 
 78 | .PHONY: json
 79 | json:
 80 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 81 | 	@echo
 82 | 	@echo "Build finished; now you can process the JSON files."
 83 | 
 84 | .PHONY: htmlhelp
 85 | htmlhelp:
 86 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 87 | 	@echo
 88 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 89 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 90 | 
 91 | .PHONY: qthelp
 92 | qthelp:
 93 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 94 | 	@echo
 95 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 96 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 97 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pomegranate.qhcp"
 98 | 	@echo "To view the help file:"
 99 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pomegranate.qhc"
100 | 
101 | .PHONY: applehelp
102 | applehelp:
103 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
104 | 	@echo
105 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
106 | 	@echo "N.B. You won't be able to view it unless you put it in" \
107 | 	      "~/Library/Documentation/Help or install it in your application" \
108 | 	      "bundle."
109 | 
110 | .PHONY: devhelp
111 | devhelp:
112 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
113 | 	@echo
114 | 	@echo "Build finished."
115 | 	@echo "To view the help file:"
116 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/pomegranate"
117 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pomegranate"
118 | 	@echo "# devhelp"
119 | 
120 | .PHONY: epub
121 | epub:
122 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
123 | 	@echo
124 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
125 | 
126 | .PHONY: latex
127 | latex:
128 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
129 | 	@echo
130 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
131 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
132 | 	      "(use \`make latexpdf' here to do that automatically)."
133 | 
134 | .PHONY: latexpdf
135 | latexpdf:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo "Running LaTeX files through pdflatex..."
138 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
139 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
140 | 
141 | .PHONY: latexpdfja
142 | latexpdfja:
143 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
144 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
145 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
146 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
147 | 
148 | .PHONY: text
149 | text:
150 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
151 | 	@echo
152 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
153 | 
154 | .PHONY: man
155 | man:
156 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
157 | 	@echo
158 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
159 | 
160 | .PHONY: texinfo
161 | texinfo:
162 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
163 | 	@echo
164 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
165 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
166 | 	      "(use \`make info' here to do that automatically)."
167 | 
168 | .PHONY: info
169 | info:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo "Running Texinfo files through makeinfo..."
172 | 	make -C $(BUILDDIR)/texinfo info
173 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
174 | 
175 | .PHONY: gettext
176 | gettext:
177 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
178 | 	@echo
179 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
180 | 
181 | .PHONY: changes
182 | changes:
183 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
184 | 	@echo
185 | 	@echo "The overview file is in $(BUILDDIR)/changes."
186 | 
187 | .PHONY: linkcheck
188 | linkcheck:
189 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
190 | 	@echo
191 | 	@echo "Link check complete; look for any errors in the above output " \
192 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
193 | 
194 | .PHONY: doctest
195 | doctest:
196 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
197 | 	@echo "Testing of doctests in the sources finished, look at the " \
198 | 	      "results in $(BUILDDIR)/doctest/output.txt."
199 | 
200 | .PHONY: coverage
201 | coverage:
202 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
203 | 	@echo "Testing of coverage in the sources finished, look at the " \
204 | 	      "results in $(BUILDDIR)/coverage/python.txt."
205 | 
206 | .PHONY: xml
207 | xml:
208 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
209 | 	@echo
210 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
211 | 
212 | .PHONY: pseudoxml
213 | pseudoxml:
214 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
215 | 	@echo
216 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
217 | 


--------------------------------------------------------------------------------
/docs/_static/custom.css:
--------------------------------------------------------------------------------
 1 | /* Sidebar header (and top-bar for mobile) */
 2 | .wy-side-nav-search, .wy-nav-top {
 3 |     background: #A91D47;
 4 | }
 5 | 
 6 | .wy-menu > .caption > span.caption-text {
 7 |   color: #A91D47;
 8 | }
 9 | 
10 | code.literal {
11 |     color: #A91D47 !important;
12 |     background-color: #fbfbfb !important;
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/docs/_templates/class.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname }}
 2 | {{ underline }}
 3 | 
 4 | .. currentmodule:: {{ module }}
 5 | 
 6 | .. autoclass:: {{ objname }}
 7 | 
 8 |    {% block methods %}
 9 |    .. automethod:: __init__
10 |    {% endblock %}
11 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | The API
 3 | =======
 4 | 
 5 | pomegranate has a minimal core API that is made possible because all models are treated as probability distributions regardless of their complexity. This point is repeated throughout the documentation because it has important consequences for how the package is designed and also for how one should think about designing probabilistic models. Although each model documentation page has an API reference showing the full set of methods and parameters for each model, each models has the following methods:. 
 6 | 
 7 | .. code-block:: python
 8 | 
 9 | 	>>> model.probability(X)
10 | 
11 | This method takes in a set of examples (either 2D or 3D depending on the model) and returns a vector of probabilities.
12 | 
13 | .. code-block:: python
14 | 
15 | 	>>> model.log_probability(X)
16 | 
17 | This method takes in a set of examples (either 2D or 3D depending on the model) and returns a vector of log probabilities. Log probabilities are more numerically stable and, in fact, calls to `model.probability` just exponentiate the value returned from this call.
18 | 
19 | .. code-block:: python
20 | 
21 | 	>>> model.fit(X, sample_weight=None)
22 | 
23 | This method will fit the model to the given data that is optionally weighted. If the model is a simple probability distribution, a Bayes classifier, or a Bayesian network with fully observed features, the method will use maximum likelihood estimates. For other models and settings, the method will use expectation-maximization to fit the model parameters. When a structure is not provided for hidden Markov models or Bayesian networks, this method will jointly learn the structure and the parameters of the model. The shape of data should be (n, d) or (n, l, d) depending on if there is a length dimension, where n is the number of samples, l is the length of the data, and d is the dimensionality. Sample weights should either be a vector of non-negative numbers of size (n,) or a matrix of size (n, d).
24 | 
25 | .. code-block:: python
26 | 
27 | 	>>> model.summarize(X, sample_weight=None)
28 | 
29 | This method is the first step of the two step out-of-core learning API. The method will take in a data and optional weights and extract the sufficient statistics that allow for an exact update and added to the cached values. Because these sufficient statistics are additive one can derive an exact update from multiple calls to this method without having to store an entire data set in memory.
30 | 
31 | .. code-block:: python
32 | 
33 | 	>>> model.from_summaries() 
34 | 
35 | This method is the second step in the out-of-core learning API. The method uses the extracted and aggregated sufficient statistics to derive exact parameter updates for the model. After the parameters are updated, the stored sufficient statistics will be zeroed out.
36 | 
37 | 
38 | Compositional Methods
39 | ---------------------
40 | 
41 | For models that are composed of other models/distributions, e.g. mixture models, hidden Markov models, and Bayesian networks, there are additional methods that relate to inferring how the data relates to each of these distributions. For example, instead of just calculating the log probability of an example under an entire mixture model, one might want to calculate the posterior probability that the data was generated by each of the distributions. These posterior probabilities are found by applying Bayes' rule, which connects prior probabilities and likelihoods to posterior probabilities.
42 | 
43 | .. code-block:: python
44 | 
45 | 	>>> model.predict(X)
46 | 
47 | This method will return the most likely inferred value for each example in the data. In the case of Bayesian networks operating on incomplete data, this inferred value is the most likely value that each variable takes given the structure of the model and the observed data. For all other methods, this is the most likely component that explains the data, P(M|D).
48 | 
49 | .. code-block:: python
50 | 
51 | 	>>> model.predict_proba(X)
52 | 
53 | This returns the matrix of posterior probabilities P(M|D) directly. The predict method simply runs an argmax over this matrix.
54 | 
55 | .. code-block:: python
56 | 
57 | 	>>> model.predict_log_proba(X)
58 | 
59 | This returns the matrix of log posterior probabilities for numerical stability.
60 | 
61 | 
62 | API Reference
63 | -------------
64 | 
65 | Distributions
66 | =============
67 | 
68 | .. automodule:: pomegranate.distributions
69 |    :members: Bernoulli, Categorical, ConditionalCategorical, JointCategorical, DiracDelta, Exponential, Gamma, Normal, Poisson, StudentT, Uniform, ZeroInflated
70 | 
71 | Models
72 | ======
73 | 
74 | .. autoclass:: pomegranate.bayes_classifier.BayesClassifier
75 | 
76 | .. autoclass:: pomegranate.gmm.GeneralMixtureModel
77 | 
78 | .. autoclass:: pomegranate.hmm.DenseHMM
79 | 
80 | .. autoclass:: pomegranate.hmm.SparseHMM
81 | 
82 | .. autoclass:: pomegranate.markov_chain.MarkovChain
83 | 
84 | .. autoclass:: pomegranate.bayesian_network.BayesianNetwork
85 | 
86 | .. autoclass:: pomegranate.factor_graph.FactorGraph


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | 
 9 | project = 'pomegranate'
10 | copyright = '2023, Jacob Schreiber'
11 | author = 'Jacob Schreiber'
12 | release = '1.0.0'
13 | 
14 | # -- General configuration ---------------------------------------------------
15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
16 | 
17 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'nbsphinx']
18 | 
19 | templates_path = ['_templates']
20 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
21 | 
22 | root_doc = 'index'
23 | master_doc = 'index'
24 | 
25 | # -- Options for HTML output -------------------------------------------------
26 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
27 | 
28 | html_theme = 'sphinx_rtd_theme'
29 | html_static_path = ['_static']
30 | html_css_files = ['custom.css']
31 | 


--------------------------------------------------------------------------------
/docs/faq.rst:
--------------------------------------------------------------------------------
 1 | .. _faq:
 2 | 
 3 | FAQ
 4 | ===
 5 | 
 6 | **Can I create a usable model if I already know the parameters and just want to do inference**
 7 | 
 8 | Yes! Each model allows you to either pass in the parameters, or to leave it uninitialized and fit it directly to data. If you pass in your own parameters you can do inference by calling methods like ``log_probability`` and ``predict``.
 9 | 
10 | **If I have an initial/pretrained model, can I fine-tune it using pomegranate?**
11 | 
12 | Yes! In the same way that you could just do inference after giving it parameters, you can fine-tune those parameters using the built-in fitting functions. You may want to modify the inertia or freeze some of the parameters for fine-tuning.
13 | 
14 | **If I have an initial/pretrained model, can I freeze some parameters and fine-tune the remainder?**
15 | 
16 | Yes! Do the same as above, but pass in ``frozen=True`` for the model components that you would like to remain frozen.
17 | 
18 | **How do I learn a model directly from data?**
19 | 
20 | pomegranate v1.0.0 follows the scikit-learn API in the sense that you pass all hyperparameters into the initialization and then fit the parameters using the ``fit`` function. All models allow you to use a signature similar to ``NormalDistribution().fit(X)``. Some models allow you to leave the initialization blank, but most models require at least one parameter, e.g. mixture models requires specifying the distributions and Markov chains require specifying the order. Other optional hyperparameters can be provided to alter the fitting process. the initialization is empty (or requires a few parameters, e.g. Markov chains setting the order. 
21 | 
22 | **My data set has missing values. Can I use pomegranate?**
23 | 
24 | Yes! Almost all algorithms in pomegranate can operate on incomplete data sets. All you need to do is pass in a ``torch.masked.MaskedTensor``, where the missing values are masked out (have a value of ``False``), in place of a normal tensor. 
25 | 
26 | **How can I use out-of-core learning in pomegranate?**
27 | 
28 | Once a model has been initialized the ``summarize`` method can be used on arbitrarily sized chunks of the data to reduce them into their sufficient statistics. These sufficient statistics are additive, meaning that if they are calculated for all chunks of a dataset and then added together they can yield exact updates. Once all chunks have been summarized then ``from_summaries`` is called to update the parameters of the model based on these added sufficient statistics. Out-of-core computing is supported by allowing the user to load up chunks of data from memory, summarize it, discard it, and move on to the next chunk.
29 | 
30 | **Does pomegranate support parallelization?**
31 | 
32 | Yes! Because pomegranate v1.0.0 is written in PyTorch which is natively multithreaded, all algorithms will use the available threads. See PyTorch documentation for controlling the number of threads to use.
33 | 
34 | **Does pomegranate support GPUs?**
35 | 
36 | Yes! Again, because pomegranate v1.0.0 is written in PyTorch, every algorithm has GPU support. The speed increase scales with the complexity of the algorithm, with simple probability distributions having approximately a ~2-3x speedup whereas the forward-backward algorithm for hidden Markov models can be up to ~5-10x faster by using a GPU.
37 | 
38 | **Does pomegranate support distributed computing?**
39 | 
40 | Currently pomegranate is not set up for a distributed environment, though the pieces are currently there to make this possible.
41 | 
42 | **How can I cite pomegranate?**
43 | 
44 | The research paper that presents pomegranate is:
45 | 
46 | *Schreiber, J. (2018). Pomegranate: fast and flexible probabilistic modeling in python. Journal of Machine Learning Research, 18(164), 1-6.*
47 | 
48 | which can be downloaded from `JML`_ or from `arXiv`_.
49 | 
50 |  .. _jml: http://www.jmlr.org/papers/volume18/17-636/17-636.pdf
51 |  .. _arxiv: https://arxiv.org/abs/1711.00137
52 | 
53 | The paper can be cited as:
54 | ::
55 | 
56 | 	@article{schreiber2018pomegranate,
57 | 		  title={Pomegranate: fast and flexible probabilistic modeling in python},
58 | 		  author={Schreiber, Jacob},
59 | 		  journal={Journal of Machine Learning Research},
60 | 		  volume={18},
61 | 		  number={164},
62 | 		  pages={1--6},
63 | 		  year={2018}
64 | 		}
65 | 
66 | Alternatively, the GitHub repository can be cited as:
67 | ::
68 | 
69 | 	@misc{Schreiber2016,
70 | 		author = {Jacob Schreiber},
71 | 		title = {pomegranate},
72 | 		year = {2016},
73 | 		publisher = {GitHub},
74 | 		journal = {GitHub repository},
75 | 		howpublished = {\url{https://github.com/jmschrei/pomegranate}},
76 | 		commit = {enter commit that you used}
77 | 	}
78 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. Introduction documentation master file, created by
 2 |    sphinx-quickstart on Sun Oct 30 18:10:26 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | 
 7 | .. image:: logo/pomegranate-logo.png
 8 | 	:width: 300px
 9 | 
10 | |
11 | 
12 | .. image:: https://readthedocs.org/projects/pomegranate/badge/?version=latest
13 |    :target: http://pomegranate.readthedocs.io/en/latest/?badge=latest
14 | 
15 | |
16 | 
17 | 
18 | Home
19 | ====
20 | 
21 | pomegranate is a Python package that implements fast and flexible probabilistic models ranging from individual probability distributions to compositional models such as Bayesian networks and hidden Markov models. The core philosophy behind pomegranate is that all probabilistic models can be viewed as a probability distribution in that they all yield probability estimates for samples and can be updated given samples and their associated weights. The primary consequence of this view is that the components that are implemented in pomegranate can be stacked more flexibly than other packages. For example, one can build a Gaussian mixture model just as easily as building an exponential or log normal mixture model. But that's not all! One can create a Bayes classifier that uses different types of distributions on each features, perhaps modeling time-associated features using an exponential distribution and counts using a Poisson distribution. Lastly, since these compositional models themselves can be viewed as probability distributions, one can build a mixture of Bayesian networks or a hidden Markov model Bayes' classifier that makes predictions over sequences. 
22 | 
23 | In addition to a variety of probability distributions and models, pomegranate has a variety of built-in features that are implemented for all of the models. These include different training strategies such as semi-supervised learning, learning with missing values, and mini-batch learning. It also includes support for massive data supports with out-of-core learning, multi-threaded parallelism, and GPU support. 
24 | 
25 | 
26 | Thank You
27 | =========
28 | 
29 | No good project is done alone, and so I'd like to thank all the previous contributors to YAHMM, all the current contributors to pomegranate, and the many graduate students whom I have pestered with ideas and questions. 
30 | 
31 | Contributions
32 | =============
33 | 
34 | Contributions are eagerly accepted! If you would like to contribute a feature then fork the master branch and be sure to run the tests before changing any code. Let us know what you want to do on the issue tracker just in case we're already working on an implementation of something similar. Also, please don't forget to add tests for any new functions. Please review the `Code of Conduct <https://pomegranate.readthedocs.io/en/latest/CODE_OF_CONDUCT.html>`_ before contributing. 
35 | 
36 | .. toctree::
37 |    :maxdepth: 1
38 |    :hidden:
39 |    :caption: Getting Started
40 | 
41 |    self
42 |    install.rst
43 |    api.rst
44 |    CODE_OF_CONDUCT.rst
45 |    faq.rst
46 |    whats_new.rst
47 | 
48 | .. toctree::
49 |    :maxdepth: 1
50 |    :hidden:
51 |    :caption: Features
52 | 
53 |    tutorials/C_Feature_Tutorial_1_GPU_Usage.ipynb
54 |    tutorials/C_Feature_Tutorial_2_Mixed_Precision_and_DataTypes.ipynb
55 |    tutorials/C_Feature_Tutorial_3_Out_Of_Core_Learning.ipynb
56 |    tutorials/C_Feature_Tutorial_4_Priors_and_Semi-supervised_Learning.ipynb
57 | 
58 | .. toctree::
59 |    :maxdepth: 1
60 |    :hidden:
61 |    :caption: Models
62 | 
63 |    tutorials/B_Model_Tutorial_1_Distributions.ipynb
64 |    tutorials/B_Model_Tutorial_2_General_Mixture_Models.ipynb
65 |    tutorials/B_Model_Tutorial_3_Bayes_Classifier.ipynb
66 |    tutorials/B_Model_Tutorial_4_Hidden_Markov_Models.ipynb
67 |    tutorials/B_Model_Tutorial_5_Markov_Chains.ipynb
68 |    tutorials/B_Model_Tutorial_6_Bayesian_Networks.ipynb
69 |    tutorials/B_Model_Tutorial_7_Factor_Graphs.ipynb
70 | 


--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
 1 | .. _install:
 2 | 
 3 | Installation
 4 | ============
 5 | 
 6 | The easiest way to get pomegranate is through pip using the command
 7 | 
 8 | .. code-block:: bash
 9 | 
10 | 	pip install pomegranate
11 | 
12 | This should install all the dependencies in addition to the package.
13 | 
14 | You can also get the bleeding edge from GitHub using the following commands:
15 | 
16 | .. code-block:: bash
17 | 
18 | 	git clone https://github.com/jmschrei/pomegranate
19 | 	cd pomegranate
20 | 	python setup.py install
21 | 
22 | Because pomegranate recently moved to a PyTorch backend, the most complicated installation step now is likely installing that and its CUDA dependencies. Please see the PyTorch documentation for help installing those.
23 | 


--------------------------------------------------------------------------------
/docs/logo/pomegranate-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/docs/logo/pomegranate-logo.png


--------------------------------------------------------------------------------
/docs/logo/pomegranate_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/docs/logo/pomegranate_comparison.png


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy >= 1.22.2 
 2 | scipy >= 1.6.2
 3 | scikit-learn >= 1.0.2
 4 | torch >= 1.9.0
 5 | apricot-select >= 0.6.1
 6 | networkx >= 2.8.4
 7 | pomegranate >= 1.0.0
 8 | sphinx-rtd-theme
 9 | pandoc
10 | nbsphinx
11 | jinja2==3.1.4
12 | 


--------------------------------------------------------------------------------
/docs/tutorials/C_Feature_Tutorial_3_Out_Of_Core_Learning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "692646c3",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Out-of-Core Learning"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "79d62d75",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "author: Jacob Schreiber <br>\n",
 17 |     "contact: jmschreiber91@gmail.com\n",
 18 |     "\n",
 19 |     "Out-of-core learning refers to the process of training a model on an amount of data that cannot fit in memory. There are several approaches that can be described as out-of-core, but here we refer to the ability to derive exact updates to a model from a massive data set, despite not being able to fit the entire thing in memory.\n",
 20 |     "\n",
 21 |     "This out-of-core learning approach is implemented for all of pomegranate's models using two methods. The first is a summarize method that will take in a batch of data and reduce it down to additive sufficient statistics. Because these summaries are additive, after the first call, these summaries are added to the previously stored summaries. Once the entire data set has been seen, the stored sufficient statistics will be identical to those that would have been derived if the entire data set had been seen at once. The second method is the from_summaries method, which uses the stored sufficient statistics to derive parameter updates for the model.\n",
 22 |     "\n",
 23 |     "A common solution to having too much data is to randomly select an amount of data that does fit in memory to use in the place of the full data set. While simple to implement, this approach is likely to yield lower performance models because it is exposed to less data. However, by using out-of-core learning, on can train their models on a massive amount of data without being limited by the amount of memory their computer has."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 1,
 29 |    "id": "732d90aa",
 30 |    "metadata": {},
 31 |    "outputs": [
 32 |     {
 33 |      "name": "stdout",
 34 |      "output_type": "stream",
 35 |      "text": [
 36 |       "Populating the interactive namespace from numpy and matplotlib\n",
 37 |       "torch      : 1.13.0\n",
 38 |       "pomegranate: 1.0.0\n",
 39 |       "\n",
 40 |       "Compiler    : GCC 11.2.0\n",
 41 |       "OS          : Linux\n",
 42 |       "Release     : 4.15.0-208-generic\n",
 43 |       "Machine     : x86_64\n",
 44 |       "Processor   : x86_64\n",
 45 |       "CPU cores   : 8\n",
 46 |       "Architecture: 64bit\n",
 47 |       "\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "%pylab inline\n",
 53 |     "import torch\n",
 54 |     "\n",
 55 |     "numpy.random.seed(0)\n",
 56 |     "numpy.set_printoptions(suppress=True)\n",
 57 |     "\n",
 58 |     "%load_ext watermark\n",
 59 |     "%watermark -m -n -p torch,pomegranate"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "id": "e77be408",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "### `summarize ` and `from_summaries`\n",
 68 |     "\n",
 69 |     "Let's start off simple with training a normal distribution in an out-of-core manner. First, we'll generate some random data."
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 2,
 75 |    "id": "40c81d88",
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "X = torch.randn(1000, 5)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "id": "68782e46",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "Then, we can initialize a distribution."
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 3,
 93 |    "id": "fec969dc",
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "from pomegranate.distributions import Normal\n",
 98 |     "\n",
 99 |     "dist = Normal()"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "id": "e18b3d50",
105 |    "metadata": {},
106 |    "source": [
107 |     "Now let's summarize through a few batches of data using the `summarize` method."
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 4,
113 |    "id": "8d181be6",
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "dist.summarize(X[:200])\n",
118 |     "dist.summarize(X[200:])"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "id": "6df91e38",
124 |    "metadata": {},
125 |    "source": [
126 |     "Importantly, summarizing data doesn't update parameters by itself. Rather, it extracts additive sufficient statistics from the data. Each time `summarize` is called, these statistics are added to the previously aggregated statistics.\n",
127 |     "\n",
128 |     "In order to update the parameters of the model, you need to call the `from_summaries` method. This method updates the parameters of the model given the stored sufficient statistics."
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 5,
134 |    "id": "9cbbe4dc",
135 |    "metadata": {},
136 |    "outputs": [
137 |     {
138 |      "data": {
139 |       "text/plain": [
140 |        "(Parameter containing:\n",
141 |        " tensor([ 0.0175,  0.0096,  0.0228,  0.0592, -0.0089]),\n",
142 |        " Parameter containing:\n",
143 |        " tensor([[ 0.9786, -0.0106,  0.0344,  0.0571,  0.0330],\n",
144 |        "         [-0.0106,  0.9970,  0.0165, -0.0330,  0.0021],\n",
145 |        "         [ 0.0344,  0.0165,  0.9405, -0.0075, -0.0374],\n",
146 |        "         [ 0.0571, -0.0330, -0.0075,  1.0399,  0.0333],\n",
147 |        "         [ 0.0330,  0.0021, -0.0374,  0.0333,  0.9978]]))"
148 |       ]
149 |      },
150 |      "execution_count": 5,
151 |      "metadata": {},
152 |      "output_type": "execute_result"
153 |     }
154 |    ],
155 |    "source": [
156 |     "dist.from_summaries()\n",
157 |     "dist.means, dist.covs"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "markdown",
162 |    "id": "2ae90d8d",
163 |    "metadata": {},
164 |    "source": [
165 |     "This update is exactly the same as one would get if they had trained on the entire data set."
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 6,
171 |    "id": "c33e1a42",
172 |    "metadata": {},
173 |    "outputs": [
174 |     {
175 |      "data": {
176 |       "text/plain": [
177 |        "(Parameter containing:\n",
178 |        " tensor([ 0.0175,  0.0096,  0.0228,  0.0592, -0.0089]),\n",
179 |        " Parameter containing:\n",
180 |        " tensor([[ 0.9786, -0.0106,  0.0344,  0.0571,  0.0330],\n",
181 |        "         [-0.0106,  0.9970,  0.0165, -0.0330,  0.0021],\n",
182 |        "         [ 0.0344,  0.0165,  0.9405, -0.0075, -0.0374],\n",
183 |        "         [ 0.0571, -0.0330, -0.0075,  1.0399,  0.0333],\n",
184 |        "         [ 0.0330,  0.0021, -0.0374,  0.0333,  0.9978]]))"
185 |       ]
186 |      },
187 |      "execution_count": 6,
188 |      "metadata": {},
189 |      "output_type": "execute_result"
190 |     }
191 |    ],
192 |    "source": [
193 |     "dist = Normal()\n",
194 |     "dist.summarize(X)\n",
195 |     "dist.from_summaries()\n",
196 |     "dist.means, dist.covs"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "id": "9b217107",
202 |    "metadata": {},
203 |    "source": [
204 |     "### Batched Training\n",
205 |     "\n",
206 |     "Sometimes your data is so large that it cannot fit in memory (either CPU or GPU). In these cases, we can use the out-of-core API to train on batches at a time. This is similar to how neural networks are trained except that, rather than updating after each batch (or aggregating gradients over a small number of batches), we can summarize over a much larger number of batches -- potentially even the entire data set to get an exact update. Let's see an example of how that might work."
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 7,
212 |    "id": "a6232d3c",
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": [
216 |     "dist = Normal()\n",
217 |     "\n",
218 |     "for i in range(10):\n",
219 |     "    X_batch = torch.randn(1000, 20) # This is meant to mimic loading a batch of data\n",
220 |     "    dist.summarize(X_batch)\n",
221 |     "    del X_batch # Now we can discard the batch \n",
222 |     "    \n",
223 |     "dist.from_summaries()"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "markdown",
228 |    "id": "7c30c9f4",
229 |    "metadata": {},
230 |    "source": [
231 |     "Batched training is easy to implement for simple probability distributions but it can also be done with more complicated models if you want to code your own expectation-maximization. For instance, let's try training a mixture model using a modified version of the training code."
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": 8,
237 |    "id": "14012265",
238 |    "metadata": {},
239 |    "outputs": [
240 |     {
241 |      "name": "stdout",
242 |      "output_type": "stream",
243 |      "text": [
244 |       "[1] Improvement: 1945.53125, Time: 0.01443s\n",
245 |       "[2] Improvement: 99.875, Time: 0.01562s\n",
246 |       "[3] Improvement: 34.1875, Time: 0.01019s\n",
247 |       "[4] Improvement: 17.65625, Time: 0.00994s\n"
248 |      ]
249 |     }
250 |    ],
251 |    "source": [
252 |     "from pomegranate.gmm import GeneralMixtureModel\n",
253 |     "\n",
254 |     "X = torch.randn(10000, 20)\n",
255 |     "\n",
256 |     "model = GeneralMixtureModel([Normal(), Normal()])\n",
257 |     "\n",
258 |     "logp = None\n",
259 |     "for i in range(5):\n",
260 |     "    start_time = time.time()\n",
261 |     "\n",
262 |     "    last_logp = logp\n",
263 |     "    \n",
264 |     "    logp = 0\n",
265 |     "    for j in range(0, X.shape[0], 1000): # Train on batches of size 1000\n",
266 |     "        logp += model.summarize(X[j:j+1000])\n",
267 |     "\n",
268 |     "    if i > 0:\n",
269 |     "        improvement = logp - last_logp\n",
270 |     "        duration = time.time() - start_time\n",
271 |     "        print(\"[{}] Improvement: {}, Time: {:4.4}s\".format(i, improvement, duration))\n",
272 |     "\n",
273 |     "    model.from_summaries()"
274 |    ]
275 |   }
276 |  ],
277 |  "metadata": {
278 |   "kernelspec": {
279 |    "display_name": "Python 3 (ipykernel)",
280 |    "language": "python",
281 |    "name": "python3"
282 |   },
283 |   "language_info": {
284 |    "codemirror_mode": {
285 |     "name": "ipython",
286 |     "version": 3
287 |    },
288 |    "file_extension": ".py",
289 |    "mimetype": "text/x-python",
290 |    "name": "python",
291 |    "nbconvert_exporter": "python",
292 |    "pygments_lexer": "ipython3",
293 |    "version": "3.9.13"
294 |   }
295 |  },
296 |  "nbformat": 4,
297 |  "nbformat_minor": 5
298 | }
299 | 


--------------------------------------------------------------------------------
/examples/Bayesian_Network_Monty_Hall.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "473b0cb3",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## The Monty Hall Problem\n",
  9 |     "\n",
 10 |     "The Monty Hall problem arose from the gameshow <i>Let's Make a Deal</i>, where a guest had to choose which one of three doors had a prize behind it. The twist was that after the guest chose, the host, originally Monty Hall, would then open one of the doors the guest did not pick that also did not have the prize behind it. Afterwards, Monty would ask if the guest wanted to switch which door they had picked. Initial inspection may lead you to believe that if there are only two doors left there is a 50-50 chance of you picking the right one, and so there is no advantage one way or the other. However, it has been proven both through simulations and analytically that there is in fact a 66% chance of getting the prize if the guest switches their door after Monty opens one, regardless of the door they initially went with."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "id": "76bedfe3",
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "name": "stdout",
 21 |      "output_type": "stream",
 22 |      "text": [
 23 |       "Populating the interactive namespace from numpy and matplotlib\n",
 24 |       "torch        : 1.13.0\n",
 25 |       "torchegranate: 0.4.0\n",
 26 |       "\n",
 27 |       "Compiler    : GCC 11.2.0\n",
 28 |       "OS          : Linux\n",
 29 |       "Release     : 4.15.0-206-generic\n",
 30 |       "Machine     : x86_64\n",
 31 |       "Processor   : x86_64\n",
 32 |       "CPU cores   : 8\n",
 33 |       "Architecture: 64bit\n",
 34 |       "\n"
 35 |      ]
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "%pylab inline\n",
 40 |     "import seaborn; seaborn.set_style('whitegrid')\n",
 41 |     "import torch\n",
 42 |     "\n",
 43 |     "%load_ext watermark\n",
 44 |     "%watermark -m -n -p torch,torchegranate"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "id": "8dccea1d",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "We can reproduce this result in pomegranate using Bayesian networks with three nodes, one for the guest, one for the prize, and one for the door Monty chooses to open. The door the guest initially chooses and the door the prize is behind are completely random processes across the three doors, but the door which Monty opens is dependent on both the door the guest chooses (it cannot be the door the guest chooses), and the door the prize is behind (it cannot be the door with the prize behind it).\n",
 53 |     "\n",
 54 |     "To create the Bayesian network in pomegranate, we first create the distributions which live in each node in the graph. For a categorical bayesian network we use Categorical distributions for the root nodes and ConditionalCategorical distributions for the inner and leaf nodes. \n",
 55 |     "\n",
 56 |     "First, we can create our \"prize\" and \"guest\" distributions. These are each Categorical distributions because they do not depend on anything, and they are uniform distributions because they are equally likely to be any of the three doors."
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 2,
 62 |    "id": "ce8a68c4",
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "from torchegranate.distributions import Categorical\n",
 67 |     "\n",
 68 |     "guest = Categorical([[1./3, 1./3, 1./3]])\n",
 69 |     "prize = Categorical([[1./3, 1./3, 1./3]])"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "id": "369339f5",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "You may notice that there is an additional dimension added to the probabilities. This is because all distributions in pomegranate have the potential to be multivariate even when being applied to univariate problems.\n",
 78 |     "\n",
 79 |     "Next, we need to create the conditional distribution describing the door that Monty will open. Because Monty can only open a door that is not selected by the contestant and also does not have the prize, sometimes this leaves Monty with only one door that can be opened. Overall, the distribution is a 3x3x3 tensor, with three possibilities from the guest, three independent possibilities from the prize, and three possible doors to open."
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 3,
 85 |    "id": "2b303ce6",
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "from torchegranate.distributions import ConditionalCategorical\n",
 90 |     "\n",
 91 |     "probs = numpy.array([[\n",
 92 |     "     [[0.0, 0.5, 0.5], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0]], \n",
 93 |     "     [[0.0, 0.0, 1.0], [0.5, 0.0, 0.5], [1.0, 0.0, 0.0]],\n",
 94 |     "     [[0.0, 1.0, 0.0], [1.0, 0.0, 0.0], [0.5, 0.5, 0.0]]\n",
 95 |     "]])\n",
 96 |     "\n",
 97 |     "monty = ConditionalCategorical(probs) "
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "id": "f693f22a",
103 |    "metadata": {},
104 |    "source": [
105 |     "Next, we can create the Bayesian network object in just one line by passing in the distribution objects and edges in the form of (parent, child) tuples. Previous versions of pomegranate required that you create State or Node objects and add them in using `add_edge` and `add_node` methods. State and Node objects no longer exist, and while those methods still exist if you would prefer to use them you no longer need to. The `bake` method has also been removed and is no longer required."
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 6,
111 |    "id": "cdd24ba0",
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "from torchegranate.bayesian_network import BayesianNetwork\n",
116 |     "\n",
117 |     "model = BayesianNetwork([guest, prize, monty], [(guest, monty), (prize, monty)])"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 9,
123 |    "id": "ec59baca",
124 |    "metadata": {},
125 |    "outputs": [
126 |     {
127 |      "data": {
128 |       "text/plain": [
129 |        "tensor([[0, 1, 2],\n",
130 |        "        [0, 2, 1],\n",
131 |        "        [2, 1, 0]])"
132 |       ]
133 |      },
134 |      "execution_count": 9,
135 |      "metadata": {},
136 |      "output_type": "execute_result"
137 |     }
138 |    ],
139 |    "source": [
140 |     "X = torch.tensor([[0, 1, -1],\n",
141 |     "                  [0, 2, -1],\n",
142 |     "                  [2, 1, -1]])\n",
143 |     "\n",
144 |     "X_masked = torch.masked.MaskedTensor(X, mask=X >= 0)\n",
145 |     "\n",
146 |     "\n",
147 |     "model.predict(X_masked)"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 16,
153 |    "id": "dac5d471",
154 |    "metadata": {},
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/plain": [
159 |        "Parameter containing:\n",
160 |        "tensor([1.6111])"
161 |       ]
162 |      },
163 |      "execution_count": 16,
164 |      "metadata": {},
165 |      "output_type": "execute_result"
166 |     }
167 |    ],
168 |    "source": [
169 |     "from torchegranate.distributions import Exponential\n",
170 |     "\n",
171 |     "X = torch.exp(torch.randn(100, 1))\n",
172 |     "mask = torch.ones(100, 1, dtype=bool)\n",
173 |     "mask[75:] = False\n",
174 |     "X_masked = torch.masked.MaskedTensor(X, mask=mask)\n",
175 |     "\n",
176 |     "Exponential().fit(X[:75]).scales"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 17,
182 |    "id": "6169a52f",
183 |    "metadata": {},
184 |    "outputs": [
185 |     {
186 |      "data": {
187 |       "text/plain": [
188 |        "Parameter containing:\n",
189 |        "tensor([1.6111])"
190 |       ]
191 |      },
192 |      "execution_count": 17,
193 |      "metadata": {},
194 |      "output_type": "execute_result"
195 |     }
196 |    ],
197 |    "source": [
198 |     "Exponential().fit(X_masked).scales"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "id": "1ea963c8",
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": []
208 |   }
209 |  ],
210 |  "metadata": {
211 |   "kernelspec": {
212 |    "display_name": "Python 3 (ipykernel)",
213 |    "language": "python",
214 |    "name": "python3"
215 |   },
216 |   "language_info": {
217 |    "codemirror_mode": {
218 |     "name": "ipython",
219 |     "version": 3
220 |    },
221 |    "file_extension": ".py",
222 |    "mimetype": "text/x-python",
223 |    "name": "python",
224 |    "nbconvert_exporter": "python",
225 |    "pygments_lexer": "ipython3",
226 |    "version": "3.9.13"
227 |   }
228 |  },
229 |  "nbformat": 4,
230 |  "nbformat_minor": 5
231 | }
232 | 


--------------------------------------------------------------------------------
/pomegranate/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "1.1.2"
2 | 


--------------------------------------------------------------------------------
/pomegranate/_bayes.py:
--------------------------------------------------------------------------------
  1 | # _bayes.py
  2 | # Author: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | 
  5 | import torch
  6 | 
  7 | from ._utils import _cast_as_tensor
  8 | from ._utils import _update_parameter
  9 | from ._utils import _check_parameter
 10 | from ._utils import _reshape_weights
 11 | 
 12 | from .distributions._distribution import Distribution
 13 | 
 14 | 
 15 | class BayesMixin(torch.nn.Module):
 16 | 	def _reset_cache(self):
 17 | 		"""Reset the internally stored statistics.
 18 | 
 19 | 		This method is meant to only be called internally. It resets the
 20 | 		stored statistics used to update the model parameters as well as
 21 | 		recalculates the cached values meant to speed up log probability
 22 | 		calculations.
 23 | 		"""
 24 | 
 25 | 		if self._initialized == False:
 26 | 			return
 27 | 
 28 | 		self.register_buffer("_w_sum", torch.zeros(self.k, device=self.device))
 29 | 		self.register_buffer("_log_priors", torch.log(self.priors))
 30 | 
 31 | 	def _emission_matrix(self, X, priors=None):
 32 | 		"""Return the emission/responsibility matrix.
 33 | 
 34 | 		This method returns the log probability of each example under each
 35 | 		distribution contained in the model with the log prior probability
 36 | 		of each component added.
 37 | 
 38 | 
 39 | 		Parameters
 40 | 		----------
 41 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
 42 | 			A set of examples to evaluate. 
 43 | 
 44 | 		priors: list, numpy.ndarray, torch.Tensor, shape=(-1, self.k)
 45 | 			Prior probabilities of assigning each symbol to each node. If not
 46 | 			provided, do not include in the calculations (conceptually
 47 | 			equivalent to a uniform probability, but without scaling the
 48 | 			probabilities). This can be used to assign labels to observatons
 49 | 			by setting one of the probabilities for an observation to 1.0.
 50 | 			Note that this can be used to assign hard labels, but does not
 51 | 			have the same semantics for soft labels, in that it only
 52 | 			influences the initial estimate of an observation being generated
 53 | 			by a component, not gives a target. Default is None.
 54 | 
 55 | 	
 56 | 		Returns
 57 | 		-------
 58 | 		e: torch.Tensor, shape=(-1, self.k)
 59 | 			A set of log probabilities for each example under each distribution.
 60 | 		"""
 61 | 
 62 | 		X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 
 63 | 			shape=(-1, self.d), check_parameter=self.check_data)
 64 | 
 65 | 		priors = _check_parameter(_cast_as_tensor(priors), "priors",
 66 | 			ndim=2, shape=(X.shape[0], self.k), min_value=0.0, max_value=1.0,
 67 | 			value_sum=1.0, value_sum_dim=-1, check_parameter=self.check_data)
 68 | 
 69 | 		d = X.shape[0]
 70 | 		e = torch.empty(d, self.k, device=self.device, dtype=self.dtype)
 71 | 		for i, d in enumerate(self.distributions):
 72 | 			e[:, i] = d.log_probability(X)
 73 | 
 74 | 		if priors is not None:
 75 | 			e += torch.log(priors)
 76 | 
 77 | 		return e + self._log_priors
 78 | 
 79 | 	def probability(self, X, priors=None):
 80 | 		"""Calculate the probability of each example.
 81 | 
 82 | 		This method calculates the probability of each example given the
 83 | 		parameters of the distribution. The examples must be given in a 2D
 84 | 		format.
 85 | 
 86 | 		Note: This differs from some other probability calculation
 87 | 		functions, like those in torch.distributions, because it is not
 88 | 		returning the probability of each feature independently, but rather
 89 | 		the total probability of the entire example.
 90 | 
 91 | 
 92 | 		Parameters
 93 | 		----------
 94 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
 95 | 			A set of examples to evaluate.
 96 | 
 97 | 		priors: list, numpy.ndarray, torch.Tensor, shape=(-1, self.k)
 98 | 			Prior probabilities of assigning each symbol to each node. If not
 99 | 			provided, do not include in the calculations (conceptually
100 | 			equivalent to a uniform probability, but without scaling the
101 | 			probabilities). This can be used to assign labels to observatons
102 | 			by setting one of the probabilities for an observation to 1.0.
103 | 			Note that this can be used to assign hard labels, but does not
104 | 			have the same semantics for soft labels, in that it only
105 | 			influences the initial estimate of an observation being generated
106 | 			by a component, not gives a target. Default is None.
107 | 
108 | 
109 | 		Returns
110 | 		-------
111 | 		prob: torch.Tensor, shape=(-1,)
112 | 			The probability of each example.
113 | 		"""
114 | 
115 | 		return torch.exp(self.log_probability(X, priors=priors))
116 | 
117 | 	def log_probability(self, X, priors=None):
118 | 		"""Calculate the log probability of each example.
119 | 
120 | 		This method calculates the log probability of each example given the
121 | 		parameters of the distribution. The examples must be given in a 2D
122 | 		format. For a Bernoulli distribution, each entry in the data must
123 | 		be either 0 or 1.
124 | 
125 | 		Note: This differs from some other log probability calculation
126 | 		functions, like those in torch.distributions, because it is not
127 | 		returning the log probability of each feature independently, but rather
128 | 		the total log probability of the entire example.
129 | 
130 | 
131 | 		Parameters
132 | 		----------
133 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
134 | 			A set of examples to evaluate.
135 | 
136 | 		priors: list, numpy.ndarray, torch.Tensor, shape=(-1, self.k)
137 | 			Prior probabilities of assigning each symbol to each node. If not
138 | 			provided, do not include in the calculations (conceptually
139 | 			equivalent to a uniform probability, but without scaling the
140 | 			probabilities). This can be used to assign labels to observatons
141 | 			by setting one of the probabilities for an observation to 1.0.
142 | 			Note that this can be used to assign hard labels, but does not
143 | 			have the same semantics for soft labels, in that it only
144 | 			influences the initial estimate of an observation being generated
145 | 			by a component, not gives a target. Default is None.
146 | 
147 | 
148 | 		Returns
149 | 		-------
150 | 		logp: torch.Tensor, shape=(-1,)
151 | 			The log probability of each example.
152 | 		"""
153 | 
154 | 		e = self._emission_matrix(X, priors=priors)
155 | 		return torch.logsumexp(e, dim=1)
156 | 
157 | 	def predict(self, X, priors=None):
158 | 		"""Calculate the label assignment for each example.
159 | 
160 | 		This method calculates the label for each example as the most likely
161 | 		component after factoring in the prior probability.
162 | 
163 | 
164 | 		Parameters
165 | 		----------
166 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
167 | 			A set of examples to summarize.
168 | 
169 | 		priors: list, numpy.ndarray, torch.Tensor, shape=(-1, self.k)
170 | 			Prior probabilities of assigning each symbol to each node. If not
171 | 			provided, do not include in the calculations (conceptually
172 | 			equivalent to a uniform probability, but without scaling the
173 | 			probabilities). This can be used to assign labels to observatons
174 | 			by setting one of the probabilities for an observation to 1.0.
175 | 			Note that this can be used to assign hard labels, but does not
176 | 			have the same semantics for soft labels, in that it only
177 | 			influences the initial estimate of an observation being generated
178 | 			by a component, not gives a target. Default is None.
179 | 
180 | 
181 | 		Returns
182 | 		-------
183 | 		y: torch.Tensor, shape=(-1,)
184 | 			The predicted label for each example.
185 | 		"""
186 | 
187 | 		e = self._emission_matrix(X, priors=priors)
188 | 		return torch.argmax(e, dim=1)
189 | 
190 | 	def predict_proba(self, X, priors=None):
191 | 		"""Calculate the posterior probabilities for each example.
192 | 
193 | 		This method calculates the posterior probabilities for each example
194 | 		under each component of the model after factoring in the prior 
195 | 		probability and normalizing across all the components.
196 | 
197 | 
198 | 		Parameters
199 | 		----------
200 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
201 | 			A set of examples to summarize.
202 | 
203 | 		priors: list, numpy.ndarray, torch.Tensor, shape=(-1, self.k)
204 | 			Prior probabilities of assigning each symbol to each node. If not
205 | 			provided, do not include in the calculations (conceptually
206 | 			equivalent to a uniform probability, but without scaling the
207 | 			probabilities). This can be used to assign labels to observatons
208 | 			by setting one of the probabilities for an observation to 1.0.
209 | 			Note that this can be used to assign hard labels, but does not
210 | 			have the same semantics for soft labels, in that it only
211 | 			influences the initial estimate of an observation being generated
212 | 			by a component, not gives a target. Default is None.
213 | 
214 | 
215 | 		Returns
216 | 		-------
217 | 		y: torch.Tensor, shape=(-1, self.k)
218 | 			The posterior probabilities for each example under each component.
219 | 		"""
220 | 
221 | 		e = self._emission_matrix(X, priors=priors)
222 | 		return torch.exp(e - torch.logsumexp(e, dim=1, keepdims=True))
223 | 		
224 | 	def predict_log_proba(self, X, priors=None):
225 | 		"""Calculate the log posterior probabilities for each example.
226 | 
227 | 		This method calculates the log posterior probabilities for each example
228 | 		under each component of the model after factoring in the prior 
229 | 		probability and normalizing across all the components.
230 | 
231 | 
232 | 		Parameters
233 | 		----------
234 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
235 | 			A set of examples to summarize.
236 | 
237 | 		priors: list, numpy.ndarray, torch.Tensor, shape=(-1, self.k)
238 | 			Prior probabilities of assigning each symbol to each node. If not
239 | 			provided, do not include in the calculations (conceptually
240 | 			equivalent to a uniform probability, but without scaling the
241 | 			probabilities). This can be used to assign labels to observatons
242 | 			by setting one of the probabilities for an observation to 1.0.
243 | 			Note that this can be used to assign hard labels, but does not
244 | 			have the same semantics for soft labels, in that it only
245 | 			influences the initial estimate of an observation being generated
246 | 			by a component, not gives a target. Default is None.
247 | 
248 | 
249 | 		Returns
250 | 		-------
251 | 		y: torch.Tensor, shape=(-1, self.k)
252 | 			The log posterior probabilities for each example under each 
253 | 			component.
254 | 		"""
255 | 
256 | 		e = self._emission_matrix(X, priors=priors) 
257 | 		return e - torch.logsumexp(e, dim=1, keepdims=True)
258 | 
259 | 	def from_summaries(self):
260 | 		"""Update the model parameters given the extracted statistics.
261 | 
262 | 		This method uses calculated statistics from calls to the `summarize`
263 | 		method to update the distribution parameters. Hyperparameters for the
264 | 		update are passed in at initialization time.
265 | 
266 | 		Note: Internally, a call to `fit` is just a successive call to the
267 | 		`summarize` method followed by the `from_summaries` method.
268 | 		"""
269 | 
270 | 		for d in self.distributions:
271 | 			d.from_summaries()
272 | 
273 | 		if self.frozen == True:
274 | 			return
275 | 
276 | 		priors = self._w_sum / torch.sum(self._w_sum)
277 | 
278 | 		_update_parameter(self.priors, priors, self.inertia)
279 | 		self._reset_cache()
280 | 


--------------------------------------------------------------------------------
/pomegranate/bayes_classifier.py:
--------------------------------------------------------------------------------
  1 | # BayesClassifier.py
  2 | # Author: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import numpy
  5 | import torch
  6 | 
  7 | from ._utils import _cast_as_tensor
  8 | from ._utils import _cast_as_parameter
  9 | from ._utils import _update_parameter
 10 | from ._utils import _check_parameter
 11 | from ._utils import _reshape_weights
 12 | 
 13 | from ._bayes import BayesMixin
 14 | 
 15 | from .distributions._distribution import Distribution
 16 | 
 17 | 
 18 | class BayesClassifier(BayesMixin, Distribution):
 19 | 	"""A Bayes classifier object.
 20 | 
 21 | 	A simple way to produce a classifier using probabilistic models is to plug
 22 | 	them into Bayes' rule. Basically, inference is the same as the 'E' step in
 23 | 	EM for mixture models. However, fitting can be significantly faster because
 24 | 	instead of having to iteratively infer labels and learn parameters, you can
 25 | 	just learn the parameters given the known labels. Because the learning step
 26 | 	for most models are simple MLE estimates, this can be done extremely
 27 | 	quickly.
 28 | 
 29 | 	Although the most common distribution to use is a Gaussian with a diagonal
 30 | 	covariance matrix, termed the Gaussian naive Bayes model, any probability
 31 | 	distribution can be used. Here, you can just drop any distributions or
 32 | 	probabilistic model in as long as it has the `log_probability`, `summarize`,
 33 | 	and `from_samples` methods implemented.
 34 | 
 35 | 	Further, the probabilistic models do not even need to be simple
 36 | 	distributions. The distributions can be mixture models or hidden Markov
 37 | 	models or Bayesian networks.
 38 | 
 39 | 
 40 | 	Parameters
 41 | 	----------
 42 | 	distributions: tuple or list
 43 | 		A set of distribution objects. These objects do not need to be
 44 | 		initialized, i.e., can be "Normal()". 
 45 | 
 46 | 	priors: tuple, numpy.ndarray, torch.Tensor, or None. shape=(k,), optional
 47 | 		The prior probabilities over the given distributions. Default is None.
 48 | 
 49 | 	inertia: float, [0, 1], optional
 50 | 		Indicates the proportion of the update to apply to the parameters
 51 | 		during training. When the inertia is 0.0, the update is applied in
 52 | 		its entirety and the previous parameters are ignored. When the
 53 | 		inertia is 1.0, the update is entirely ignored and the previous
 54 | 		parameters are kept, equivalently to if the parameters were frozen.
 55 | 
 56 | 	frozen: bool, optional
 57 | 		Whether all the parameters associated with this distribution are frozen.
 58 | 		If you want to freeze individual pameters, or individual values in those
 59 | 		parameters, you must modify the `frozen` attribute of the tensor or
 60 | 		parameter directly. Default is False.
 61 | 
 62 | 	check_data: bool, optional
 63 | 		Whether to check properties of the data and potentially recast it to
 64 | 		torch.tensors. This does not prevent checking of parameters but can
 65 | 		slightly speed up computation when you know that your inputs are valid.
 66 | 		Setting this to False is also necessary for compiling. Default is True.
 67 | 	"""
 68 | 
 69 | 	def __init__(self, distributions, priors=None, inertia=0.0, frozen=False,
 70 | 		check_data=True):
 71 | 		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
 72 | 		self.name = "BayesClassifier"
 73 | 
 74 | 		_check_parameter(distributions, "distributions", dtypes=(list, tuple, 
 75 | 			numpy.array, torch.nn.ModuleList))
 76 | 		self.distributions = torch.nn.ModuleList(distributions)
 77 | 
 78 | 		self.priors = _check_parameter(_cast_as_parameter(priors), "priors", 
 79 | 			min_value=0, max_value=1, ndim=1, value_sum=1.0, 
 80 | 			shape=(len(distributions),))
 81 | 
 82 | 		self.k = len(distributions)
 83 | 
 84 | 		if all(d._initialized for d in distributions):
 85 | 			self._initialized = True
 86 | 			self.d = distributions[0].d
 87 | 			if self.priors is None:
 88 | 				self.priors = _cast_as_parameter(torch.ones(self.k) / self.k)
 89 | 
 90 | 		else:
 91 | 			self._initialized = False
 92 | 			self.d = None
 93 | 		
 94 | 		self._reset_cache()
 95 | 
 96 | 	def _initialize(self, d):
 97 | 		"""Initialize the probability distribution.
 98 | 
 99 | 		This method is meant to only be called internally. It initializes the
100 | 		parameters of the distribution and stores its dimensionality. For more
101 | 		complex methods, this function will do more.
102 | 
103 | 
104 | 		Parameters
105 | 		----------
106 | 		d: int
107 | 			The dimensionality the distribution is being initialized to.
108 | 		"""
109 | 
110 | 		self.priors = _cast_as_parameter(torch.ones(self.k, dtype=self.dtype, 
111 | 			device=self.device) / self.k)
112 | 
113 | 		self._initialized = True
114 | 		super()._initialize(d)
115 | 
116 | 	def fit(self, X, y, sample_weight=None):
117 | 		"""Fit the model to optionally weighted examples.
118 | 
119 | 		This method implements the core of the learning process. For a
120 | 		general Bayes model, this involves fitting each component of the model
121 | 		using the labels that are provided. 
122 | 
123 | 		This method is largely a wrapper around the `summarize` and
124 | 		`from_summaries` methods. It's primary contribution is serving as a
125 | 		loop around these functions and to monitor convergence.
126 | 
127 | 
128 | 		Parameters
129 | 		----------
130 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
131 | 			A set of examples to evaluate. 
132 | 
133 | 		y: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1,)
134 | 			A set of labels, one per example.
135 | 
136 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
137 | 			A set of weights for the examples. This can be either of shape
138 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
139 | 
140 | 
141 | 		Returns
142 | 		-------
143 | 		self
144 | 		"""
145 | 
146 | 		self.summarize(X, y, sample_weight=sample_weight)
147 | 		self.from_summaries()
148 | 		return self
149 | 
150 | 	def summarize(self, X, y, sample_weight=None):
151 | 		"""Extract the sufficient statistics from a batch of data.
152 | 
153 | 		This method calculates the sufficient statistics from optionally
154 | 		weighted data and adds them to the stored cache. The examples must be
155 | 		given in a 2D format. Sample weights can either be provided as one
156 | 		value per example or as a 2D matrix of weights for each feature in
157 | 		each example.
158 | 
159 | 		For a Bayes' classifier, this step involves partitioning the data
160 | 		according to the labels and then training each component using MLE
161 | 		estimates.
162 | 
163 | 
164 | 		Parameters
165 | 		----------
166 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
167 | 			A set of examples to summarize.
168 | 
169 | 		y: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1,)
170 | 			A set of labels, one per example.
171 | 
172 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
173 | 			A set of weights for the examples. This can be either of shape
174 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
175 | 		"""
176 | 
177 | 		X, sample_weight = super().summarize(X, sample_weight=sample_weight)
178 | 		y = _check_parameter(_cast_as_tensor(y), "y", min_value=0, 
179 | 			max_value=self.k-1, ndim=1, shape=(len(X),), 
180 | 			check_parameter=self.check_data)
181 | 		sample_weight = _check_parameter(sample_weight, "sample_weight", 
182 | 			min_value=0, shape=(-1, self.d), check_parameter=self.check_data)
183 | 
184 | 		for j, d in enumerate(self.distributions):
185 | 			idx = y == j
186 | 			d.summarize(X[idx], sample_weight[idx])
187 | 
188 | 			if self.frozen == False:
189 | 				self._w_sum[j] = self._w_sum[j] + sample_weight[idx].mean(
190 | 					dim=-1).sum()
191 | 


--------------------------------------------------------------------------------
/pomegranate/distributions/__init__.py:
--------------------------------------------------------------------------------
 1 | from .bernoulli import Bernoulli
 2 | from .categorical import Categorical
 3 | from .conditional_categorical import ConditionalCategorical
 4 | from .dirac_delta import DiracDelta
 5 | from .exponential import Exponential
 6 | from .gamma import Gamma
 7 | from .independent_components import IndependentComponents
 8 | from .joint_categorical import JointCategorical
 9 | from .normal import Normal
10 | from .poisson import Poisson
11 | from .student_t import StudentT
12 | from .uniform import Uniform
13 | from .zero_inflated import ZeroInflated
14 | from .lognormal import LogNormal
15 | from .halfnormal import HalfNormal


--------------------------------------------------------------------------------
/pomegranate/distributions/_distribution.py:
--------------------------------------------------------------------------------
  1 | # _distribution.py
  2 | # Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import torch
  5 | 
  6 | from .._utils import _cast_as_tensor
  7 | from .._utils import _update_parameter
  8 | from .._utils import _cast_as_parameter
  9 | from .._utils import _check_parameter
 10 | from .._utils import _reshape_weights
 11 | 
 12 | 
 13 | class Distribution(torch.nn.Module):
 14 | 	"""A base distribution object.
 15 | 
 16 | 	This distribution is inherited by all the other distributions.
 17 | 	"""
 18 | 
 19 | 	def __init__(self, inertia, frozen, check_data):
 20 | 		super(Distribution, self).__init__()
 21 | 		self._device = _cast_as_parameter([0.0])
 22 | 
 23 | 		_check_parameter(inertia, "inertia", min_value=0, max_value=1, ndim=0)
 24 | 		_check_parameter(frozen, "frozen", value_set=[True, False], ndim=0)
 25 | 		_check_parameter(check_data, "check_data", value_set=[True, False],
 26 | 			ndim=0)
 27 | 
 28 | 		self.register_buffer("inertia", _cast_as_tensor(inertia))
 29 | 		self.register_buffer("frozen", _cast_as_tensor(frozen))
 30 | 		self.register_buffer("check_data", _cast_as_tensor(check_data))
 31 | 
 32 | 		self._initialized = False
 33 | 
 34 | 	@property
 35 | 	def device(self):
 36 | 		try:
 37 | 			return next(self.parameters()).device
 38 | 		except:
 39 | 			return 'cpu'
 40 | 
 41 | 	@property
 42 | 	def dtype(self):
 43 | 		return next(self.parameters()).dtype
 44 | 
 45 | 	def freeze(self):
 46 | 		self.register_buffer("frozen", _cast_as_tensor(True))
 47 | 		return self
 48 | 
 49 | 	def unfreeze(self):
 50 | 		self.register_buffer("frozen", _cast_as_tensor(False))
 51 | 		return self
 52 | 
 53 | 	def forward(self, X):
 54 | 		self.summarize(X)
 55 | 		return self.log_probability(X)
 56 | 
 57 | 	def backward(self, X):
 58 | 		self.from_summaries()
 59 | 		return X
 60 | 
 61 | 	def _initialize(self, d):
 62 | 		self.d = d
 63 | 		self._reset_cache()
 64 | 
 65 | 	def _reset_cache(self):
 66 | 		raise NotImplementedError
 67 | 
 68 | 	def probability(self, X):
 69 | 		return torch.exp(self.log_probability(X))
 70 | 
 71 | 	def log_probability(self, X):
 72 | 		raise NotImplementedError
 73 | 
 74 | 	def fit(self, X, sample_weight=None):
 75 | 		self.summarize(X, sample_weight=sample_weight)
 76 | 		self.from_summaries()
 77 | 		return self
 78 | 
 79 | 	def summarize(self, X, sample_weight=None):
 80 | 		if not self._initialized:
 81 | 			self._initialize(len(X[0]))
 82 | 
 83 | 		X = _cast_as_tensor(X)
 84 | 		_check_parameter(X, "X", ndim=2, shape=(-1, self.d), 
 85 | 			check_parameter=self.check_data)
 86 | 
 87 | 		sample_weight = _reshape_weights(X, _cast_as_tensor(sample_weight), 
 88 | 			device=self.device)
 89 | 
 90 | 		return X, sample_weight
 91 | 
 92 | 	def from_summaries(self):
 93 | 		raise NotImplementedError
 94 | 
 95 | 
 96 | class ConditionalDistribution(Distribution):
 97 | 	def __init__(self, inertia, frozen, check_data):
 98 | 		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
 99 | 
100 | 	def marginal(self, dim):
101 | 		raise NotImplementedError


--------------------------------------------------------------------------------
/pomegranate/distributions/bernoulli.py:
--------------------------------------------------------------------------------
  1 | # bernoulli.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import torch
  5 | 
  6 | from .._utils import _cast_as_tensor
  7 | from .._utils import _cast_as_parameter
  8 | from .._utils import _update_parameter
  9 | from .._utils import _check_parameter
 10 | from .._utils import eps
 11 | 
 12 | from ._distribution import Distribution
 13 | 
 14 | 
 15 | class Bernoulli(Distribution):
 16 | 	"""A Bernoulli distribution object.
 17 | 
 18 | 	A Bernoulli distribution models the probability of a binary variable
 19 | 	occurring. rates of discrete events, and has a probability parameter
 20 | 	describing this value. This distribution assumes that each feature is 
 21 | 	independent of the others.
 22 | 
 23 | 	There are two ways to initialize this object. The first is to pass in
 24 | 	the tensor of probability parameters, at which point they can immediately be
 25 | 	used. The second is to not pass in the rate parameters and then call
 26 | 	either `fit` or `summary` + `from_summaries`, at which point the probability
 27 | 	parameter will be learned from data.
 28 | 
 29 | 
 30 | 	Parameters
 31 | 	----------
 32 | 	probs: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional
 33 | 		The probability parameters for each feature. Default is None.
 34 | 
 35 | 	inertia: float, [0, 1], optional
 36 | 		Indicates the proportion of the update to apply to the parameters
 37 | 		during training. When the inertia is 0.0, the update is applied in
 38 | 		its entirety and the previous parameters are ignored. When the
 39 | 		inertia is 1.0, the update is entirely ignored and the previous
 40 | 		parameters are kept, equivalently to if the parameters were frozen.
 41 | 
 42 | 	frozen: bool, optional
 43 | 		Whether all the parameters associated with this distribution are frozen.
 44 | 		If you want to freeze individual pameters, or individual values in those
 45 | 		parameters, you must modify the `frozen` attribute of the tensor or
 46 | 		parameter directly. Default is False.
 47 | 
 48 | 	check_data: bool, optional
 49 | 		Whether to check properties of the data and potentially recast it to
 50 | 		torch.tensors. This does not prevent checking of parameters but can
 51 | 		slightly speed up computation when you know that your inputs are valid.
 52 | 		Setting this to False is also necessary for compiling.
 53 | 	"""
 54 | 
 55 | 	def __init__(self, probs=None, inertia=0.0, frozen=False, check_data=True):
 56 | 		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
 57 | 		self.name = "Bernoulli"
 58 | 
 59 | 		self.probs = _check_parameter(_cast_as_parameter(probs), "probs", 
 60 | 			min_value=eps, max_value=1-eps, ndim=1)
 61 | 
 62 | 		self._initialized = self.probs is not None
 63 | 		self.d = self.probs.shape[-1] if self._initialized else None
 64 | 		self._reset_cache()
 65 | 
 66 | 	def _initialize(self, d):
 67 | 		"""Initialize the probability distribution.
 68 | 
 69 | 		This method is meant to only be called internally. It initializes the
 70 | 		parameters of the distribution and stores its dimensionality. For more
 71 | 		complex methods, this function will do more.
 72 | 
 73 | 
 74 | 		Parameters
 75 | 		----------
 76 | 		d: int
 77 | 			The dimensionality the distribution is being initialized to.
 78 | 		"""
 79 | 
 80 | 		self.probs = _cast_as_parameter(torch.zeros(d, dtype=self.dtype,
 81 | 			device=self.device))
 82 | 
 83 | 		self._initialized = True
 84 | 		super()._initialize(d)
 85 | 
 86 | 	def _reset_cache(self):
 87 | 		"""Reset the internally stored statistics.
 88 | 
 89 | 		This method is meant to only be called internally. It resets the
 90 | 		stored statistics used to update the model parameters as well as
 91 | 		recalculates the cached values meant to speed up log probability
 92 | 		calculations.
 93 | 		"""
 94 | 
 95 | 		if self._initialized == False:
 96 | 			return
 97 | 
 98 | 		self.register_buffer("_w_sum", torch.zeros(self.d, device=self.device))
 99 | 		self.register_buffer("_xw_sum", torch.zeros(self.d, device=self.device))
100 | 
101 | 		self.register_buffer("_log_probs", torch.log(self.probs))
102 | 		self.register_buffer("_log_inv_probs", torch.log(-(self.probs-1)))
103 | 
104 | 	def sample(self, n):
105 | 		"""Sample from the probability distribution.
106 | 
107 | 		This method will return `n` samples generated from the underlying
108 | 		probability distribution.
109 | 
110 | 
111 | 		Parameters
112 | 		----------
113 | 		n: int
114 | 			The number of samples to generate.
115 | 		
116 | 
117 | 		Returns
118 | 		-------
119 | 		X: torch.tensor, shape=(n, self.d)
120 | 			Randomly generated samples.
121 | 		"""
122 | 
123 | 		return torch.distributions.Bernoulli(self.probs).sample([n])
124 | 
125 | 	def log_probability(self, X):
126 | 		"""Calculate the log probability of each example.
127 | 
128 | 		This method calculates the log probability of each example given the
129 | 		parameters of the distribution. The examples must be given in a 2D
130 | 		format. For a Bernoulli distribution, each entry in the data must
131 | 		be either 0 or 1.
132 | 
133 | 		Note: This differs from some other log probability calculation
134 | 		functions, like those in torch.distributions, because it is not
135 | 		returning the log probability of each feature independently, but rather
136 | 		the total log probability of the entire example.
137 | 
138 | 
139 | 		Parameters
140 | 		----------
141 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
142 | 			A set of examples to evaluate.
143 | 
144 | 
145 | 		Returns
146 | 		-------
147 | 		logp: torch.Tensor, shape=(-1,)
148 | 			The log probability of each example.
149 | 		"""
150 | 
151 | 		X = _check_parameter(_cast_as_tensor(X, dtype=self.probs.dtype), "X", 
152 | 			value_set=(0, 1), ndim=2, shape=(-1, self.d), 
153 | 			check_parameter=self.check_data)
154 | 
155 | 		return X.matmul(self._log_probs) + (1-X).matmul(self._log_inv_probs)
156 | 
157 | 	def summarize(self, X, sample_weight=None):
158 | 		"""Extract the sufficient statistics from a batch of data.
159 | 
160 | 		This method calculates the sufficient statistics from optionally
161 | 		weighted data and adds them to the stored cache. The examples must be
162 | 		given in a 2D format. Sample weights can either be provided as one
163 | 		value per example or as a 2D matrix of weights for each feature in
164 | 		each example.
165 | 
166 | 
167 | 		Parameters
168 | 		----------
169 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
170 | 			A set of examples to summarize.
171 | 
172 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
173 | 			A set of weights for the examples. This can be either of shape
174 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
175 | 		"""
176 | 
177 | 		if self.frozen == True:
178 | 			return
179 | 
180 | 		X, sample_weight = super().summarize(X, sample_weight=sample_weight)
181 | 		_check_parameter(X, "X", value_set=(0, 1), 
182 | 			check_parameter=self.check_data)
183 | 
184 | 		self._w_sum += torch.sum(sample_weight, dim=0)
185 | 		self._xw_sum += torch.sum(X * sample_weight, dim=0)
186 | 
187 | 	def from_summaries(self):
188 | 		"""Update the model parameters given the extracted statistics.
189 | 
190 | 		This method uses calculated statistics from calls to the `summarize`
191 | 		method to update the distribution parameters. Hyperparameters for the
192 | 		update are passed in at initialization time.
193 | 
194 | 		Note: Internally, a call to `fit` is just a successive call to the
195 | 		`summarize` method followed by the `from_summaries` method.
196 | 		"""
197 | 
198 | 		if self.frozen == True:
199 | 			return
200 | 
201 | 		probs = self._xw_sum / self._w_sum
202 | 		_update_parameter(self.probs, probs, self.inertia)
203 | 		self._reset_cache()
204 | 


--------------------------------------------------------------------------------
/pomegranate/distributions/categorical.py:
--------------------------------------------------------------------------------
  1 | # categorical.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import torch
  5 | 
  6 | from .._utils import _inplace_add
  7 | from .._utils import _cast_as_tensor
  8 | from .._utils import _cast_as_parameter
  9 | from .._utils import _update_parameter
 10 | from .._utils import _check_parameter
 11 | from .._utils import _reshape_weights
 12 | 
 13 | from ._distribution import Distribution
 14 | 
 15 | 
 16 | class Categorical(Distribution):
 17 | 	"""A categorical distribution object.
 18 | 
 19 | 	A categorical distribution models the probability of a set of distinct
 20 | 	values happening. It is an extension of the Bernoulli distribution to
 21 | 	multiple values. Sometimes it is referred to as a discrete distribution,
 22 | 	but this distribution does not enforce that the numeric values used for the
 23 | 	keys have any relationship based on their identity. Permuting the keys will
 24 | 	have no effect on the calculation. This distribution assumes that the
 25 | 	features are independent from each other.
 26 | 
 27 | 	The keys must be contiguous non-negative integers that begin at zero. 
 28 | 	Because the probabilities are represented as a single tensor, each feature
 29 | 	must have values for all keys up to the maximum key of any one distribution.
 30 | 	Specifically, if one feature has 10 keys and a second feature has only 4,
 31 | 	the tensor must go out to 10 for each feature but encode probabilities of
 32 | 	zero for the second feature. 
 33 | 
 34 | 
 35 | 	Parameters
 36 | 	----------
 37 | 	probs: list, numpy.ndarray, torch.tensor or None, shape=(k, d), optional
 38 | 		Probabilities for each key for each feature, where k is the largest
 39 | 		number of keys across all features. Default is None
 40 | 
 41 | 	n_categories: list, numpy.ndarray, torch.tensor or None, optional
 42 | 		The number of categories for each feature in the data. Only needs to
 43 | 		be provided when the parameters will be learned directly from data and
 44 | 		you want to make sure that right number of keys are included in each
 45 | 		dimension. Default is None.
 46 | 
 47 | 	pseudocount: float, optional
 48 | 		A value to add to the observed counts of each feature when training.
 49 | 		Setting this to a positive value ensures that no probabilities are
 50 | 		truly zero. Default is 0.
 51 | 
 52 | 	inertia: float, (0, 1), optional
 53 | 		Indicates the proportion of the update to apply to the parameters
 54 | 		during training. When the inertia is 0.0, the update is applied in
 55 | 		its entirety and the previous parameters are ignored. When the
 56 | 		inertia is 1.0, the update is entirely ignored and the previous
 57 | 		parameters are kept, equivalently to if the parameters were frozen.
 58 | 
 59 | 	frozen: bool, optional
 60 | 		Whether all the parameters associated with this distribution are frozen.
 61 | 		If you want to freeze individual pameters, or individual values in those
 62 | 		parameters, you must modify the `frozen` attribute of the tensor or
 63 | 		parameter directly. Default is False.
 64 | 
 65 | 	check_data: bool, optional
 66 | 		Whether to check properties of the data and potentially recast it to
 67 | 		torch.tensors. This does not prevent checking of parameters but can
 68 | 		slightly speed up computation when you know that your inputs are valid.
 69 | 		Setting this to False is also necessary for compiling.
 70 | 	"""
 71 | 
 72 | 	def __init__(self, probs=None, n_categories=None, pseudocount=0.0, 
 73 | 		inertia=0.0, frozen=False, check_data=True):
 74 | 		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
 75 | 		self.name = "Categorical"
 76 | 
 77 | 		self.probs = _check_parameter(_cast_as_parameter(probs), "probs", 
 78 | 			min_value=0, max_value=1, ndim=2)
 79 | 
 80 | 		self.pseudocount = pseudocount
 81 | 
 82 | 		self._initialized = probs is not None
 83 | 		self.d = self.probs.shape[-2] if self._initialized else None
 84 | 
 85 | 		if n_categories is not None:
 86 | 			self.n_keys = n_categories
 87 | 		else:
 88 | 			self.n_keys = self.probs.shape[-1] if self._initialized else None
 89 | 		
 90 | 		self._reset_cache()
 91 | 
 92 | 	def _initialize(self, d, n_keys):
 93 | 		"""Initialize the probability distribution.
 94 | 
 95 | 		This method is meant to only be called internally. It initializes the
 96 | 		parameters of the distribution and stores its dimensionality. For more
 97 | 		complex methods, this function will do more.
 98 | 
 99 | 
100 | 		Parameters
101 | 		----------
102 | 		d: int
103 | 			The dimensionality the distribution is being initialized to.
104 | 
105 | 		n_keys: int
106 | 			The number of keys the distribution is being initialized with.
107 | 		"""
108 | 
109 | 		self.probs = _cast_as_parameter(torch.zeros(d, n_keys, 
110 | 			dtype=self.dtype, device=self.device))
111 | 
112 | 		self.n_keys = n_keys
113 | 		self._initialized = True
114 | 		super()._initialize(d)
115 | 
116 | 	def _reset_cache(self):
117 | 		"""Reset the internally stored statistics.
118 | 
119 | 		This method is meant to only be called internally. It resets the
120 | 		stored statistics used to update the model parameters as well as
121 | 		recalculates the cached values meant to speed up log probability
122 | 		calculations.
123 | 		"""
124 | 
125 | 		if self._initialized == False:
126 | 			return
127 | 
128 | 		self.register_buffer("_w_sum", torch.zeros(self.d, device=self.device))
129 | 		self.register_buffer("_xw_sum", torch.zeros(self.d, self.n_keys, 
130 | 			device=self.device))
131 | 
132 | 		self.register_buffer("_log_probs", torch.log(self.probs))
133 | 
134 | 	def sample(self, n):
135 | 		"""Sample from the probability distribution.
136 | 
137 | 		This method will return `n` samples generated from the underlying
138 | 		probability distribution.
139 | 
140 | 
141 | 		Parameters
142 | 		----------
143 | 		n: int
144 | 			The number of samples to generate.
145 | 		
146 | 
147 | 		Returns
148 | 		-------
149 | 		X: torch.tensor, shape=(n, self.d)
150 | 			Randomly generated samples.
151 | 		"""
152 | 
153 | 		return torch.distributions.Categorical(self.probs).sample([n])
154 | 
155 | 	def log_probability(self, X):
156 | 		"""Calculate the log probability of each example.
157 | 
158 | 		This method calculates the log probability of each example given the
159 | 		parameters of the distribution. The examples must be given in a 2D
160 | 		format. For a categorical distribution, each entry in the data must
161 | 		be an integer in the range [0, n_keys).
162 | 
163 | 		Note: This differs from some other log probability calculation
164 | 		functions, like those in torch.distributions, because it is not
165 | 		returning the log probability of each feature independently, but rather
166 | 		the total log probability of the entire example.
167 | 
168 | 
169 | 		Parameters
170 | 		----------
171 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
172 | 			A set of examples to evaluate.
173 | 
174 | 
175 | 		Returns
176 | 		-------
177 | 		logp: torch.Tensor, shape=(-1,)
178 | 			The log probability of each example.
179 | 		"""
180 | 
181 | 		X = _check_parameter(_cast_as_tensor(X), "X", min_value=0.0,
182 | 			max_value=self.n_keys-1, ndim=2, shape=(-1, self.d),
183 | 			check_parameter=self.check_data)
184 | 
185 | 		logps = torch.zeros(X.shape[0], dtype=self.probs.dtype, 
186 | 			device=self.device)
187 | 		
188 | 		for i in range(self.d):
189 | 			if isinstance(X, torch.masked.MaskedTensor):
190 | 				logp_ = self._log_probs[i][X[:, i]._masked_data]
191 | 				logp_[~X[:, i]._masked_mask] = 0
192 | 				logps += logp_
193 | 			else:
194 | 				logps += self._log_probs[i][X[:, i]]
195 | 
196 | 		return logps
197 | 
198 | 	def summarize(self, X, sample_weight=None):
199 | 		"""Extract the sufficient statistics from a batch of data.
200 | 
201 | 		This method calculates the sufficient statistics from optionally
202 | 		weighted data and adds them to the stored cache. The examples must be
203 | 		given in a 2D format. Sample weights can either be provided as one
204 | 		value per example or as a 2D matrix of weights for each feature in
205 | 		each example.
206 | 
207 | 
208 | 		Parameters
209 | 		----------
210 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
211 | 			A set of examples to summarize.
212 | 
213 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
214 | 			A set of weights for the examples. This can be either of shape
215 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
216 | 		"""
217 | 
218 | 		if self.frozen == True:
219 | 			return
220 | 
221 | 		X = _cast_as_tensor(X)
222 | 		if not self._initialized:
223 | 			if self.n_keys is not None:
224 | 				n_keys = self.n_keys
225 | 			elif isinstance(X, torch.masked.MaskedTensor):
226 | 				n_keys = int(torch.max(X._masked_data)) + 1
227 | 			else:
228 | 				n_keys = int(torch.max(X)) + 1
229 | 
230 | 			self._initialize(X.shape[1], n_keys)
231 | 
232 | 		X = _check_parameter(X, "X", min_value=0, max_value=self.n_keys-1, 
233 | 			ndim=2, shape=(-1, self.d), check_parameter=self.check_data)
234 | 		sample_weight = _reshape_weights(X, _cast_as_tensor(sample_weight))
235 | 
236 | 		_inplace_add(self._w_sum, torch.sum(sample_weight, dim=0))
237 | 		for i in range(self.n_keys):
238 | 			_inplace_add(self._xw_sum[:, i], torch.sum((X == i) * sample_weight, 
239 | 				dim=0))
240 | 
241 | 	def from_summaries(self):
242 | 		"""Update the model parameters given the extracted statistics.
243 | 
244 | 		This method uses calculated statistics from calls to the `summarize`
245 | 		method to update the distribution parameters. Hyperparameters for the
246 | 		update are passed in at initialization time.
247 | 
248 | 		Note: Internally, a call to `fit` is just a successive call to the
249 | 		`summarize` method followed by the `from_summaries` method.
250 | 		"""
251 | 
252 | 		if self.frozen == True:
253 | 			return
254 | 
255 | 		probs = (self._xw_sum + self.pseudocount) / (self._w_sum + 
256 | 			self.pseudocount * self.n_keys).unsqueeze(1)
257 | 
258 | 		_update_parameter(self.probs, probs, self.inertia)
259 | 		self._reset_cache()
260 | 


--------------------------------------------------------------------------------
/pomegranate/distributions/conditional_categorical.py:
--------------------------------------------------------------------------------
  1 | # conditional_categorical.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import numpy
  5 | import torch
  6 | import itertools
  7 | 
  8 | from .._utils import _cast_as_tensor
  9 | from .._utils import _cast_as_parameter
 10 | from .._utils import _update_parameter
 11 | from .._utils import _check_parameter
 12 | from .._utils import _reshape_weights
 13 | 
 14 | from .._utils import BufferList
 15 | 
 16 | from ._distribution import ConditionalDistribution
 17 | from .categorical import Categorical
 18 | 
 19 | 
 20 | class ConditionalCategorical(ConditionalDistribution):
 21 | 	"""A conditional categorical distribution.
 22 | 
 23 | 	This is a categorical distribution that is conditioned on previous
 24 | 	emissions, meaning that the probability of each character depends on the
 25 | 	observed character earlier in the sequence. Each feature is conditioned
 26 | 	independently of the others like a `Categorical` distribution. 
 27 | 
 28 | 	This conditioning makes the shape of the distribution a bit more
 29 | 	complicated than the `JointCategorical` distribution. Specifically, a 
 30 | 	`JointCategorical` distribution is multivariate by definition but a
 31 | 	`ConditionalCategorical` does not have to be. Although both may appear 
 32 | 	similar in that they both take in a vector of characters and return 
 33 | 	probabilities, the vector fed into the JointCategorical are all observed 
 34 | 	together without some notion of time, whereas the ConditionalCategorical 
 35 | 	explicitly requires a notion of timing, where the probability of later 
 36 | 	characters depend on the composition of characters seen before.
 37 | 
 38 | 
 39 | 	Parameters
 40 | 	----------
 41 | 	probs: list of numpy.ndarray, torch.tensor or None, shape=(k, k), optional
 42 | 		A list of conditional probabilities with one tensor for each feature
 43 | 		in the data being modeled. Each tensor should have `k+1` dimensions 
 44 | 		where `k` is the number of timesteps to condition on. Each dimension
 45 | 		should span the number of keys in that dimension. For example, if
 46 | 		specifying a univariate conditional categorical distribution where
 47 | 		k=2, a valid tensor shape would be [(2, 3, 4)]. Default is None.
 48 | 
 49 | 	n_categories: list, numpy.ndarray, torch.tensor or None, optional
 50 | 		The number of categories for each feature in the data. Only needs to
 51 | 		be provided when the parameters will be learned directly from data and
 52 | 		you want to make sure that right number of keys are included in each
 53 | 		dimension. Unlike the `Categorical` distribution, this needs to be
 54 | 		a list of shapes with one shape for each feature and the shape matches
 55 | 		that specified in `probs`. Default is None.
 56 | 
 57 | 	pseudocount: float, optional
 58 | 		A value to add to the observed counts of each feature when training.
 59 | 		Setting this to a positive value ensures that no probabilities are
 60 | 		truly zero. Default is 0.
 61 | 
 62 | 	inertia: float, (0, 1), optional
 63 | 		Indicates the proportion of the update to apply to the parameters
 64 | 		during training. When the inertia is 0.0, the update is applied in
 65 | 		its entirety and the previous parameters are ignored. When the
 66 | 		inertia is 1.0, the update is entirely ignored and the previous
 67 | 		parameters are kept, equivalently to if the parameters were frozen.
 68 | 
 69 | 	frozen: bool, optional
 70 | 		Whether all the parameters associated with this distribution are frozen.
 71 | 		If you want to freeze individual pameters, or individual values in those
 72 | 		parameters, you must modify the `frozen` attribute of the tensor or
 73 | 		parameter directly. Default is False.
 74 | 
 75 | 	check_data: bool, optional
 76 | 		Whether to check properties of the data and potentially recast it to
 77 | 		torch.tensors. This does not prevent checking of parameters but can
 78 | 		slightly speed up computation when you know that your inputs are valid.
 79 | 		Setting this to False is also necessary for compiling.
 80 | 	"""
 81 | 
 82 | 	def __init__(self, probs=None, n_categories=None, pseudocount=0, 
 83 | 		inertia=0.0, frozen=False, check_data=True):
 84 | 		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
 85 | 		self.name = "ConditionalCategorical"
 86 | 
 87 | 		if probs is not None:
 88 | 			self.n_categories = []
 89 | 			self.probs = torch.nn.ParameterList([])
 90 | 			
 91 | 			for prob in probs:
 92 | 				prob = _check_parameter(_cast_as_parameter(prob), "probs",
 93 | 					min_value=0, max_value=1)
 94 | 				
 95 | 				self.probs.append(prob)
 96 | 				self.n_categories.append(tuple(prob.shape))
 97 | 
 98 | 		else:
 99 | 			self.probs = None
100 | 			self.n_categories = n_categories
101 | 		
102 | 		self.pseudocount = _check_parameter(pseudocount, "pseudocount")
103 | 
104 | 		self._initialized = probs is not None
105 | 		self.d = len(self.probs) if self._initialized else None
106 | 		self.n_parents = len(self.probs[0].shape) if self._initialized else None
107 | 		self._reset_cache()
108 | 
109 | 	def _initialize(self, d, n_categories):
110 | 		"""Initialize the probability distribution.
111 | 
112 | 		This method is meant to only be called internally. It initializes the
113 | 		parameters of the distribution and stores its dimensionality. For more
114 | 		complex methods, this function will do more.
115 | 
116 | 
117 | 		Parameters
118 | 		----------
119 | 		d: int
120 | 			The dimensionality the distribution is being initialized to.
121 | 
122 | 		n_categories: list of tuples
123 | 			The shape of each conditional distribution, one per feature.
124 | 		"""
125 | 
126 | 		self.n_categories = []
127 | 		for n_cat in n_categories:
128 | 			if isinstance(n_cat, (list, tuple)):
129 | 				self.n_categories.append(tuple(n_cat))
130 | 			elif isinstance(n_cat, (numpy.ndarray, torch.Tensor)):
131 | 				self.n_categories.append(tuple(n_cat.tolist()))
132 | 
133 | 		self.n_parents = len(self.n_categories[0])
134 | 		self.probs = torch.nn.ParameterList([_cast_as_parameter(torch.zeros(
135 | 			*cats, dtype=self.dtype, device=self.device, requires_grad=False)) 
136 | 				for cats in self.n_categories])
137 | 
138 | 		self._initialized = True
139 | 		super()._initialize(d)
140 | 
141 | 	def _reset_cache(self):
142 | 		"""Reset the internally stored statistics.
143 | 
144 | 		This method is meant to only be called internally. It resets the
145 | 		stored statistics used to update the model parameters as well as
146 | 		recalculates the cached values meant to speed up log probability
147 | 		calculations.
148 | 		"""
149 | 
150 | 		if self._initialized == False:
151 | 			return
152 | 
153 | 		_w_sum = []
154 | 		_xw_sum = []
155 | 
156 | 		for n_categories in self.n_categories:
157 | 			_w_sum.append(torch.zeros(*n_categories[:-1], 
158 | 				dtype=self.probs[0].dtype, device=self.device))
159 | 			_xw_sum.append(torch.zeros(*n_categories, 
160 | 				dtype=self.probs[0].dtype, device=self.device))
161 | 
162 | 		self._w_sum = BufferList(_w_sum)
163 | 		self._xw_sum = BufferList(_xw_sum)
164 | 
165 | 		self._log_probs = BufferList([torch.log(prob) for prob in self.probs])
166 | 
167 | 	def sample(self, n, X):
168 | 		"""Sample from the probability distribution.
169 | 
170 | 		This method will return `n` samples generated from the underlying
171 | 		probability distribution. For a mixture model, this involves first
172 | 		sampling the component using the prior probabilities, and then sampling
173 | 		from the chosen distribution.
174 | 
175 | 
176 | 		Parameters
177 | 		----------
178 | 		n: int
179 | 			The number of samples to generate.
180 | 		
181 | 		X: list, numpy.ndarray, torch.tensor, shape=(n, d, *self.probs.shape-1) 
182 | 			The values to be conditioned on when generating the samples.
183 | 
184 | 		Returns
185 | 		-------
186 | 		X: torch.tensor, shape=(n, self.d)
187 | 			Randomly generated samples.
188 | 		"""
189 | 
190 | 		X = _check_parameter(_cast_as_tensor(X), "X", ndim=3, 
191 | 			shape=(-1, self.n_parents-1, self.d))
192 | 
193 | 		y = []
194 | 		for i in range(n):
195 | 			y.append([])
196 | 
197 | 			for j in range(self.d):
198 | 				idx = tuple(X[i, :, j])
199 | 				if len(idx) == 1:
200 | 					idx = idx[0].item()
201 | 				
202 | 				probs = self.probs[j][idx]
203 | 
204 | 				y_ = torch.multinomial(probs, 1).item()
205 | 				y[-1].append(y_)
206 | 
207 | 		return torch.tensor(y)
208 | 
209 | 	def log_probability(self, X):
210 | 		X = _check_parameter(_cast_as_tensor(X), "X", ndim=3, 
211 | 			shape=(-1, self.n_parents, self.d), check_parameter=self.check_data)
212 | 
213 | 		logps = torch.zeros(len(X), dtype=self.probs[0].dtype, device=X.device, 
214 | 			requires_grad=False)
215 | 
216 | 		for i in range(len(X)):
217 | 			for j in range(self.d):
218 | 				logps[i] += self._log_probs[j][tuple(X[i, :, j])]
219 | 
220 | 		return logps
221 | 
222 | 	def summarize(self, X, sample_weight=None):
223 | 		if self.frozen == True:
224 | 			return
225 | 
226 | 		X = _check_parameter(_cast_as_tensor(X), "X", ndim=3, 
227 | 			dtypes=(torch.int32, torch.int64), check_parameter=self.check_data)
228 | 
229 | 		if not self._initialized:
230 | 			self._initialize(len(X[0][0]), torch.max(X, dim=0)[0].T+1)
231 | 
232 | 		X = _check_parameter(X, "X", shape=(-1, self.n_parents, self.d),
233 | 			check_parameter=self.check_data)
234 | 		sample_weight = _check_parameter(_cast_as_tensor(sample_weight, 
235 | 			dtype=torch.float32), "sample_weight", min_value=0, ndim=(1, 2))
236 | 
237 | 		if sample_weight is None:
238 | 			sample_weight = torch.ones(X[:, 0].shape[0], X[:, 0].shape[-1], 
239 | 				dtype=self.probs[0].dtype)
240 | 		elif len(sample_weight.shape) == 1: 
241 | 			sample_weight = sample_weight.reshape(-1, 1).expand(-1, X.shape[2])
242 | 		elif sample_weight.shape[1] == 1 and self.d > 1:
243 | 			sample_weight = sample_weight.expand(-1, X.shape[2])
244 | 
245 | 		_check_parameter(sample_weight, "sample_weight", 
246 | 			min_value=0, ndim=2, shape=(X.shape[0], X.shape[2]))
247 | 
248 | 		for j in range(self.d):
249 | 			strides = torch.tensor(self._xw_sum[j].stride(), device=X.device)
250 | 			X_ = torch.sum(X[:, :, j] * strides, dim=-1)
251 | 
252 | 			self._xw_sum[j].view(-1).scatter_add_(0, X_, sample_weight[:,j])
253 | 			self._w_sum[j][:] = self._xw_sum[j].sum(dim=-1)
254 | 
255 | 	def from_summaries(self):
256 | 		if self.frozen == True:
257 | 			return
258 | 
259 | 		for i in range(self.d):
260 | 			probs = self._xw_sum[i] / self._w_sum[i].unsqueeze(-1)
261 | 			probs = torch.nan_to_num(probs, 1. / probs.shape[-1])
262 | 
263 | 			_update_parameter(self.probs[i], probs, self.inertia)
264 | 
265 | 		self._reset_cache()
266 | 
267 | 


--------------------------------------------------------------------------------
/pomegranate/distributions/dirac_delta.py:
--------------------------------------------------------------------------------
  1 | # diracdelta.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import torch
  5 | 
  6 | from .._utils import _cast_as_tensor
  7 | from .._utils import _cast_as_parameter
  8 | from .._utils import _update_parameter
  9 | from .._utils import _check_parameter
 10 | 
 11 | from ._distribution import Distribution
 12 | 
 13 | 
 14 | class DiracDelta(Distribution):
 15 | 	"""A dirac delta distribution object.
 16 | 
 17 | 	A dirac delta distribution is a probability distribution that has its entire
 18 | 	density at zero. This distribution assumes that each feature is independent
 19 | 	of the others. This means that, in practice, it will assign a zero
 20 | 	probability if any value in an example is non-zero.
 21 | 
 22 | 	There are two ways to initialize this object. The first is to pass in
 23 | 	the tensor of alpha values representing the probability to return given a
 24 | 	zero value, at which point they can immediately be
 25 | 	used. The second is to not pass in the rate parameters and then call
 26 | 	either `fit` or `summary` + `from_summaries`, at which point the probability
 27 | 	parameter will be learned from data.
 28 | 
 29 | 
 30 | 	Parameters
 31 | 	----------
 32 | 	alphas: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional
 33 | 		The probability parameters for each feature. Default is None.
 34 | 
 35 | 	inertia: float, [0, 1], optional
 36 | 		Indicates the proportion of the update to apply to the parameters
 37 | 		during training. When the inertia is 0.0, the update is applied in
 38 | 		its entirety and the previous parameters are ignored. When the
 39 | 		inertia is 1.0, the update is entirely ignored and the previous
 40 | 		parameters are kept, equivalently to if the parameters were frozen.
 41 | 
 42 | 	frozen: bool, optional
 43 | 		Whether all the parameters associated with this distribution are frozen.
 44 | 		If you want to freeze individual pameters, or individual values in those
 45 | 		parameters, you must modify the `frozen` attribute of the tensor or
 46 | 		parameter directly. Default is False.
 47 | 
 48 | 	check_data: bool, optional
 49 | 		Whether to check properties of the data and potentially recast it to
 50 | 		torch.tensors. This does not prevent checking of parameters but can
 51 | 		slightly speed up computation when you know that your inputs are valid.
 52 | 		Setting this to False is also necessary for compiling.
 53 | 	"""
 54 | 
 55 | 	def __init__(self, alphas=None, inertia=0.0, frozen=False, check_data=True):
 56 | 		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
 57 | 		self.name = "DiracDelta"
 58 | 
 59 | 		self.alphas = _check_parameter(_cast_as_parameter(alphas), "alphas", 
 60 | 			min_value=0.0, ndim=1)
 61 | 
 62 | 		self._initialized = alphas is not None
 63 | 		self.d = len(self.alphas) if self._initialized else None
 64 | 		self._reset_cache()
 65 | 
 66 | 	def _initialize(self, d):
 67 | 		"""Initialize the probability distribution.
 68 | 
 69 | 		This method is meant to only be called internally. It initializes the
 70 | 		parameters of the distribution and stores its dimensionality. For more
 71 | 		complex methods, this function will do more.
 72 | 
 73 | 
 74 | 		Parameters
 75 | 		----------
 76 | 		d: int
 77 | 			The dimensionality the distribution is being initialized to.
 78 | 		"""
 79 | 
 80 | 		self.alphas = _cast_as_parameter(torch.ones(d, device=self.device))
 81 | 
 82 | 		self._initialized = True
 83 | 		super()._initialize(d)
 84 | 
 85 | 	def _reset_cache(self):
 86 | 		"""Reset the internally stored statistics.
 87 | 
 88 | 		This method is meant to only be called internally. It resets the
 89 | 		stored statistics used to update the model parameters as well as
 90 | 		recalculates the cached values meant to speed up log probability
 91 | 		calculations.
 92 | 		"""
 93 | 
 94 | 		if self._initialized == False:
 95 | 			return
 96 | 
 97 | 		self.register_buffer("_log_alphas", torch.log(self.alphas))
 98 | 
 99 | 	def log_probability(self, X):
100 | 		"""Calculate the log probability of each example.
101 | 
102 | 		This method calculates the log probability of each example given the
103 | 		parameters of the distribution. The examples must be given in a 2D
104 | 		format. 
105 | 
106 | 		Note: This differs from some other log probability calculation
107 | 		functions, like those in torch.distributions, because it is not
108 | 		returning the log probability of each feature independently, but rather
109 | 		the total log probability of the entire example.
110 | 
111 | 
112 | 		Parameters
113 | 		----------
114 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
115 | 			A set of examples to evaluate.
116 | 
117 | 
118 | 		Returns
119 | 		-------
120 | 		logp: torch.Tensor, shape=(-1,)
121 | 			The log probability of each example.
122 | 		"""
123 | 
124 | 		X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 
125 | 			shape=(-1, self.d), check_parameter=self.check_data)
126 | 
127 | 		return torch.sum(torch.where(X == 0.0, self._log_alphas, float("-inf")), 
128 | 			dim=-1)
129 | 
130 | 	def summarize(self, X, sample_weight=None):
131 | 		"""Extract the sufficient statistics from a batch of data.
132 | 
133 | 		This method calculates the sufficient statistics from optionally
134 | 		weighted data and adds them to the stored cache. The examples must be
135 | 		given in a 2D format. Sample weights can either be provided as one
136 | 		value per example or as a 2D matrix of weights for each feature in
137 | 		each example.
138 | 
139 | 		For a dirac delta distribution, there are no statistics to extract.
140 | 
141 | 
142 | 		Parameters
143 | 		----------
144 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
145 | 			A set of examples to summarize.
146 | 
147 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
148 | 			A set of weights for the examples. This can be either of shape
149 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
150 | 		"""
151 | 
152 | 		if self.frozen == True:
153 | 			return
154 | 
155 | 		X, sample_weight = super().summarize(X, sample_weight=sample_weight)
156 | 
157 | 	def from_summaries(self):
158 | 		"""Update the model parameters given the extracted statistics.
159 | 
160 | 		This method uses calculated statistics from calls to the `summarize`
161 | 		method to update the distribution parameters. Hyperparameters for the
162 | 		update are passed in at initialization time.
163 | 
164 | 		For a dirac delta distribution, there are no updates.
165 | 
166 | 		Note: Internally, a call to `fit` is just a successive call to the
167 | 		`summarize` method followed by the `from_summaries` method.
168 | 		"""
169 | 
170 | 		return
171 | 


--------------------------------------------------------------------------------
/pomegranate/distributions/exponential.py:
--------------------------------------------------------------------------------
  1 | # exponential.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import torch
  5 | from torch.distributions import Exponential as tExponential
  6 | 
  7 | from .._utils import _cast_as_tensor
  8 | from .._utils import _cast_as_parameter
  9 | from .._utils import _update_parameter
 10 | from .._utils import _check_parameter
 11 | 
 12 | from ._distribution import Distribution
 13 | 
 14 | 
 15 | class Exponential(Distribution):
 16 | 	"""An exponential distribution object.
 17 | 
 18 | 	An exponential distribution models scales of discrete events, and has a
 19 | 	rate parameter describing the average time between event occurrences.
 20 | 	This distribution assumes that each feature is independent of the others.
 21 | 	Although the object is meant to operate on discrete counts, it can be used
 22 | 	on any non-negative continuous data.
 23 | 
 24 | 	There are two ways to initialize this object. The first is to pass in
 25 | 	the tensor of rate parameters, at which point they can immediately be
 26 | 	used. The second is to not pass in the rate parameters and then call
 27 | 	either `fit` or `summary` + `from_summaries`, at which point the rate
 28 | 	parameter will be learned from data.
 29 | 
 30 | 
 31 | 	Parameters
 32 | 	----------
 33 | 	scales: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional
 34 | 		The rate parameters for each feature. Default is None.
 35 | 
 36 | 	inertia: float, (0, 1), optional
 37 | 		Indicates the proportion of the update to apply to the parameters
 38 | 		during training. When the inertia is 0.0, the update is applied in
 39 | 		its entirety and the previous parameters are ignored. When the
 40 | 		inertia is 1.0, the update is entirely ignored and the previous
 41 | 		parameters are kept, equivalently to if the parameters were frozen.
 42 | 
 43 | 	frozen: bool, optional
 44 | 		Whether all the parameters associated with this distribution are 
 45 | 		frozen. If you want to freeze individual pameters, or individual values 
 46 | 		in those parameters, you must modify the `frozen` attribute of the 
 47 | 		tensor or parameter directly. Default is False.
 48 | 
 49 | 	check_data: bool, optional
 50 | 		Whether to check properties of the data and potentially recast it to
 51 | 		torch.tensors. This does not prevent checking of parameters but can
 52 | 		slightly speed up computation when you know that your inputs are valid.
 53 | 		Setting this to False is also necessary for compiling.
 54 | 	"""
 55 | 
 56 | 	def __init__(self, scales=None, inertia=0.0, frozen=False, check_data=True):
 57 | 		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
 58 | 		self.name = "Exponential"
 59 | 
 60 | 		self.scales = _check_parameter(_cast_as_parameter(scales), "scales", 
 61 | 			min_value=0, ndim=1)
 62 | 
 63 | 		self._initialized = scales is not None
 64 | 		self.d = self.scales.shape[-1] if self._initialized else None
 65 | 		self._reset_cache()
 66 | 
 67 | 	def _initialize(self, d):
 68 | 		"""Initialize the probability distribution.
 69 | 
 70 | 		This method is meant to only be called internally. It initializes the
 71 | 		parameters of the distribution and stores its dimensionality. For more
 72 | 		complex methods, this function will do more.
 73 | 
 74 | 
 75 | 		Parameters
 76 | 		----------
 77 | 		d: int
 78 | 			The dimensionality the distribution is being initialized to.
 79 | 		"""
 80 | 
 81 | 		self.scales = _cast_as_parameter(torch.zeros(d, dtype=self.dtype,
 82 | 			device=self.device))
 83 | 
 84 | 		self._initialized = True
 85 | 		super()._initialize(d)
 86 | 
 87 | 	def _reset_cache(self):
 88 | 		"""Reset the internally stored statistics.
 89 | 
 90 | 		This method is meant to only be called internally. It resets the
 91 | 		stored statistics used to update the model parameters as well as
 92 | 		recalculates the cached values meant to speed up log probability
 93 | 		calculations.
 94 | 		"""
 95 | 
 96 | 		if self._initialized == False:
 97 | 			return
 98 | 
 99 | 		self.register_buffer("_w_sum", torch.zeros(self.d, device=self.device))
100 | 		self.register_buffer("_xw_sum", torch.zeros(self.d, device=self.device))
101 | 
102 | 		self.register_buffer("_log_scales", torch.log(self.scales))
103 | 
104 | 	def sample(self, n):
105 | 		"""Sample from the probability distribution.
106 | 
107 | 		This method will return `n` samples generated from the underlying
108 | 		probability distribution.
109 | 
110 | 
111 | 		Parameters
112 | 		----------
113 | 		n: int
114 | 			The number of samples to generate.
115 | 		
116 | 
117 | 		Returns
118 | 		-------
119 | 		X: torch.tensor, shape=(n, self.d)
120 | 			Randomly generated samples.
121 | 		"""
122 | 
123 | 		return tExponential(1. / self.scales).sample([n])
124 | 
125 | 	def log_probability(self, X):
126 | 		"""Calculate the log probability of each example.
127 | 
128 | 		This method calculates the log probability of each example given the
129 | 		parameters of the distribution. The examples must be given in a 2D
130 | 		format. For an exponential distribution, the data must be non-negative.
131 | 
132 | 		Note: This differs from some other log probability calculation
133 | 		functions, like those in torch.distributions, because it is not
134 | 		returning the log probability of each feature independently, but rather
135 | 		the total log probability of the entire example.
136 | 
137 | 
138 | 		Parameters
139 | 		----------
140 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
141 | 			A set of examples to evaluate.
142 | 
143 | 
144 | 		Returns
145 | 		-------
146 | 		logp: torch.Tensor, shape=(-1,)
147 | 			The log probability of each example.
148 | 		"""
149 | 
150 | 		X = _check_parameter(_cast_as_tensor(X), "X", min_value=0.0, 
151 | 			ndim=2, shape=(-1, self.d), check_parameter=self.check_data)
152 | 		
153 | 		return torch.sum(-self._log_scales - (1. / self.scales) * X, dim=1)
154 | 
155 | 	def summarize(self, X, sample_weight=None):
156 | 		"""Extract the sufficient statistics from a batch of data.
157 | 
158 | 		This method calculates the sufficient statistics from optionally
159 | 		weighted data and adds them to the stored cache. The examples must be
160 | 		given in a 2D format. Sample weights can either be provided as one
161 | 		value per example or as a 2D matrix of weights for each feature in
162 | 		each example.
163 | 
164 | 
165 | 		Parameters
166 | 		----------
167 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
168 | 			A set of examples to summarize.
169 | 
170 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
171 | 			A set of weights for the examples. This can be either of shape
172 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
173 | 		"""
174 | 
175 | 		if self.frozen == True:
176 | 			return
177 | 			
178 | 		X, sample_weight = super().summarize(X, sample_weight=sample_weight)
179 | 		_check_parameter(X, "X", min_value=0, check_parameter=self.check_data)
180 | 
181 | 		self._w_sum[:] = self._w_sum + torch.sum(sample_weight, dim=0)
182 | 		self._xw_sum[:] = self._xw_sum + torch.sum(X * sample_weight, dim=0)
183 | 
184 | 	def from_summaries(self):
185 | 		"""Update the model parameters given the extracted statistics.
186 | 
187 | 		This method uses calculated statistics from calls to the `summarize`
188 | 		method to update the distribution parameters. Hyperparameters for the
189 | 		update are passed in at initialization time.
190 | 
191 | 		Note: Internally, a call to `fit` is just a successive call to the
192 | 		`summarize` method followed by the `from_summaries` method.
193 | 		"""
194 | 		
195 | 		if self.frozen == True:
196 | 			return
197 | 
198 | 		scales = self._xw_sum / self._w_sum
199 | 		_update_parameter(self.scales, scales, self.inertia)
200 | 		self._reset_cache()
201 | 


--------------------------------------------------------------------------------
/pomegranate/distributions/gamma.py:
--------------------------------------------------------------------------------
  1 | # gamma.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import torch
  5 | 
  6 | from .._utils import _cast_as_tensor
  7 | from .._utils import _cast_as_parameter
  8 | from .._utils import _update_parameter
  9 | from .._utils import _check_parameter
 10 | from .._utils import _check_shapes
 11 | 
 12 | from ._distribution import Distribution
 13 | 
 14 | 
 15 | class Gamma(Distribution):
 16 | 	"""A gamma distribution object.
 17 | 
 18 | 	A gamma distribution is the sum of exponential distributions, and has shape
 19 | 	and rate parameters. This distribution assumes that each feature is
 20 | 	independent of the others. 
 21 | 
 22 | 	There are two ways to initialize this objecct. The first is to pass in
 23 | 	the tensor of rate and shae parameters, at which point they can immediately 
 24 | 	be used. The second is to not pass in the rate parameters and then call
 25 | 	either `fit` or `summary` + `from_summaries`, at which point the rate
 26 | 	and shape parameters will be learned from data.
 27 | 
 28 | 
 29 | 	Parameters
 30 | 	----------
 31 | 	shapes: torch.tensor or None, shape=(d,), optional
 32 | 		The shape parameter for each feature. Default is None
 33 | 
 34 | 	rates: torch.tensor or None, shape=(d,), optional
 35 | 		The rate parameters for each feature. Default is None.
 36 | 
 37 | 	inertia: float, (0, 1), optional
 38 | 		Indicates the proportion of the update to apply to the parameters
 39 | 		during training. When the inertia is 0.0, the update is applied in
 40 | 		its entirety and the previous parameters are ignored. When the
 41 | 		inertia is 1.0, the update is entirely ignored and the previous
 42 | 		parameters are kept, equivalently to if the parameters were frozen.
 43 | 
 44 | 	tol: float, [0, inf), optional
 45 | 		The threshold at which to stop fitting the parameters of the
 46 | 		distribution. Default is 1e-4.
 47 | 
 48 | 	max_iter: int, [0, inf), optional
 49 | 		The maximum number of iterations to run EM when fitting the parameters
 50 | 		of the distribution. Default is 20.
 51 | 
 52 | 	frozen: bool, optional
 53 | 		Whether all the parameters associated with this distribution are frozen.
 54 | 		If you want to freeze individual pameters, or individual values in those
 55 | 		parameters, you must modify the `frozen` attribute of the tensor or
 56 | 		parameter directly. Default is False.
 57 | 
 58 | 	check_data: bool, optional
 59 | 		Whether to check properties of the data and potentially recast it to
 60 | 		torch.tensors. This does not prevent checking of parameters but can
 61 | 		slightly speed up computation when you know that your inputs are valid.
 62 | 		Setting this to False is also necessary for compiling.
 63 | 	"""
 64 | 
 65 | 	def __init__(self, shapes=None, rates=None, inertia=0.0, tol=1e-4, 
 66 | 		max_iter=20, frozen=False, check_data=True):
 67 | 		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
 68 | 		self.name = "Gamma"
 69 | 
 70 | 		self.shapes = _check_parameter(_cast_as_parameter(shapes), "shapes", 
 71 | 			min_value=0, ndim=1)
 72 | 		self.rates = _check_parameter(_cast_as_parameter(rates), "rates", 
 73 | 			min_value=0, ndim=1)
 74 | 
 75 | 		_check_shapes([self.shapes, self.rates], ["shapes", "rates"])
 76 | 
 77 | 		self.tol = _check_parameter(tol, "tol", min_value=0, ndim=0)
 78 | 		self.max_iter = _check_parameter(max_iter, "max_iter", min_value=1,
 79 | 			ndim=0)
 80 | 
 81 | 		self._initialized = (shapes is not None) and (rates is not None)
 82 | 		self.d = self.shapes.shape[-1] if self._initialized else None
 83 | 		self._reset_cache()
 84 | 
 85 | 	def _initialize(self, d):
 86 | 		"""Initialize the probability distribution.
 87 | 
 88 | 		This method is meant to only be called internally. It initializes the
 89 | 		parameters of the distribution and stores its dimensionality. For more
 90 | 		complex methods, this function will do more.
 91 | 
 92 | 
 93 | 		Parameters
 94 | 		----------
 95 | 		d: int
 96 | 			The dimensionality the distribution is being initialized to.
 97 | 		"""
 98 | 
 99 | 		self.shapes = _cast_as_parameter(torch.zeros(d, dtype=self.dtype,
100 | 			device=self.device))
101 | 		self.rates = _cast_as_parameter(torch.zeros(d, dtype=self.dtype,
102 | 			device=self.device))
103 | 
104 | 		self._initialized = True
105 | 		super()._initialize(d)
106 | 
107 | 	def _reset_cache(self):
108 | 		"""Reset the internally stored statistics.
109 | 
110 | 		This method is meant to only be called internally. It resets the
111 | 		stored statistics used to update the model parameters as well as
112 | 		recalculates the cached values meant to speed up log probability
113 | 		calculations.
114 | 		"""
115 | 
116 | 		if self._initialized == False:
117 | 			return
118 | 
119 | 		self.register_buffer("_w_sum", torch.zeros(self.d, device=self.device))
120 | 		self.register_buffer("_xw_sum", torch.zeros(self.d, device=self.device))
121 | 		self.register_buffer("_logx_w_sum", torch.zeros(self.d, 
122 | 			device=self.device))
123 | 
124 | 		self.register_buffer("_log_rates", torch.log(self.rates))
125 | 		self.register_buffer("_lgamma_shapes", torch.lgamma(self.shapes))
126 | 		self.register_buffer("_thetas", self._log_rates * self.shapes - 
127 | 			self._lgamma_shapes)
128 | 
129 | 	def sample(self, n):
130 | 		"""Sample from the probability distribution.
131 | 
132 | 		This method will return `n` samples generated from the underlying
133 | 		probability distribution.
134 | 
135 | 
136 | 		Parameters
137 | 		----------
138 | 		n: int
139 | 			The number of samples to generate.
140 | 		
141 | 
142 | 		Returns
143 | 		-------
144 | 		X: torch.tensor, shape=(n, self.d)
145 | 			Randomly generated samples.
146 | 		"""
147 | 
148 | 		return torch.distributions.Gamma(self.shapes, self.rates).sample([n])
149 | 
150 | 	def log_probability(self, X):
151 | 		"""Calculate the log probability of each example.
152 | 
153 | 		This method calculates the log probability of each example given the
154 | 		parameters of the distribution. The examples must be given in a 2D
155 | 		format. For a gamma distribution, the data must be non-negative.
156 | 
157 | 		Note: This differs from some other log probability calculation
158 | 		functions, like those in torch.distributions, because it is not
159 | 		returning the log probability of each feature independently, but rather
160 | 		the total log probability of the entire example.
161 | 
162 | 
163 | 		Parameters
164 | 		----------
165 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
166 | 			A set of examples to evaluate.
167 | 
168 | 
169 | 		Returns
170 | 		-------
171 | 		logp: torch.Tensor, shape=(-1,)
172 | 			The log probability of each example.
173 | 		"""
174 | 
175 | 		X = _check_parameter(_cast_as_tensor(X), "X", min_value=0.0, 
176 | 			ndim=2, shape=(-1, self.d), check_parameter=self.check_data)
177 | 
178 | 		return torch.sum(self._thetas + torch.log(X) * (self.shapes - 1) - 
179 | 			self.rates * X, dim=-1)
180 | 
181 | 	def summarize(self, X, sample_weight=None):
182 | 		"""Extract the sufficient statistics from a batch of data.
183 | 
184 | 		This method calculates the sufficient statistics from optionally
185 | 		weighted data and adds them to the stored cache. The examples must be
186 | 		given in a 2D format. Sample weights can either be provided as one
187 | 		value per example or as a 2D matrix of weights for each feature in
188 | 		each example.
189 | 
190 | 
191 | 		Parameters
192 | 		----------
193 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
194 | 			A set of examples to summarize.
195 | 
196 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
197 | 			A set of weights for the examples. This can be either of shape
198 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
199 | 		"""
200 | 
201 | 		if self.frozen == True:
202 | 			return
203 | 
204 | 		X, sample_weight = super().summarize(X, sample_weight=sample_weight)
205 | 		_check_parameter(X, "X", min_value=0, check_parameter=self.check_data)
206 | 
207 | 		self._w_sum[:] = self._w_sum + torch.sum(sample_weight, dim=0)
208 | 		self._xw_sum[:] = self._xw_sum + torch.sum(X * sample_weight, dim=0)
209 | 		self._logx_w_sum[:] = self._logx_w_sum + torch.sum(torch.log(X) * 
210 | 			sample_weight, dim=0)
211 | 
212 | 	def from_summaries(self):
213 | 		"""Update the model parameters given the extracted statistics.
214 | 
215 | 		This method uses calculated statistics from calls to the `summarize`
216 | 		method to update the distribution parameters. Hyperparameters for the
217 | 		update are passed in at initialization time.
218 | 
219 | 		Note: Internally, a call to `fit` is just a successive call to the
220 | 		`summarize` method followed by the `from_summaries` method.
221 | 		"""
222 | 		
223 | 		if self.frozen == True:
224 | 			return
225 | 
226 | 		thetas = torch.log(self._xw_sum / self._w_sum) - \
227 | 			self._logx_w_sum / self._w_sum
228 | 
229 | 		numerator = (3 - thetas + torch.sqrt((thetas - 3) ** 2 + 24 * thetas))
230 | 		denominator = (12 * thetas)
231 | 
232 | 		new_shapes = numerator / denominator
233 | 		shapes = new_shapes + self.tol
234 | 
235 | 		for iteration in range(self.max_iter):
236 | 			mask = torch.abs(shapes - new_shapes) < self.tol
237 | 			if torch.all(mask):
238 | 				break
239 | 
240 | 			shapes = new_shapes
241 | 			new_shapes = (shapes - (torch.log(shapes) - torch.polygamma(0, 
242 | 				shapes) - thetas) / (1.0 / shapes - torch.polygamma(1, shapes)))
243 | 
244 | 		shapes = new_shapes
245 | 		rates = 1.0 / (1.0 / (shapes * self._w_sum) * self._xw_sum)
246 | 
247 | 		_update_parameter(self.shapes, shapes, self.inertia)
248 | 		_update_parameter(self.rates, rates, self.inertia)
249 | 		self._reset_cache()
250 | 


--------------------------------------------------------------------------------
/pomegranate/distributions/halfnormal.py:
--------------------------------------------------------------------------------
  1 | # normal.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import torch
  5 | 
  6 | from .._utils import _cast_as_tensor
  7 | from .._utils import _cast_as_parameter
  8 | from .._utils import _update_parameter
  9 | from .._utils import _check_parameter
 10 | from .._utils import _check_shapes
 11 | 
 12 | from ._distribution import Distribution
 13 | from .normal import Normal
 14 | 
 15 | 
 16 | # Define some useful constants
 17 | LOG_2 = 0.6931471805599453
 18 | 
 19 | 
 20 | class HalfNormal(Normal):
 21 |     """A half-normal distribution object.
 22 | 
 23 |     A half-normal distribution is a distribution over positive real numbers that
 24 |     is zero for negative numbers. It is defined by a single parameter, sigma,
 25 |     which is the standard deviation of the distribution. The mean of the
 26 |     distribution is sqrt(2/pi) * sigma, and the variance is (1 - 2/pi) * sigma^2.
 27 | 
 28 |     This distribution can assume that features are independent of the others if
 29 |     the covariance type is 'diag' or 'sphere', but if the type is 'full' then
 30 |     the features are not independent.
 31 | 
 32 |     There are two ways to initialize this object. The first is to pass in
 33 |     the tensor of probablity parameters, at which point they can immediately be
 34 |     used. The second is to not pass in the rate parameters and then call
 35 |     either `fit` or `summarize` + `from_summaries`, at which point the probability
 36 |     parameter will be learned from data.
 37 | 
 38 | 
 39 |     Parameters
 40 |     ----------
 41 |     covs: list, numpy.ndarray, torch.Tensor, or None, optional
 42 |             The variances and covariances of the distribution. If covariance_type
 43 |             is 'full', the shape should be (self.d, self.d); if 'diag', the shape
 44 |             should be (self.d,); if 'sphere', it should be (1,). Note that this is
 45 |             the variances or covariances in all settings, and not the standard
 46 |             deviation, as may be more common for diagonal covariance matrices.
 47 |             Default is None.
 48 | 
 49 |     covariance_type: str, optional
 50 |             The type of covariance matrix. Must be one of 'full', 'diag', or
 51 |             'sphere'. Default is 'full'.
 52 | 
 53 |     min_cov: float or None, optional
 54 |             The minimum variance or covariance.
 55 | 
 56 |     inertia: float, [0, 1], optional
 57 |             Indicates the proportion of the update to apply to the parameters
 58 |             during training. When the inertia is 0.0, the update is applied in
 59 |             its entirety and the previous parameters are ignored. When the
 60 |             inertia is 1.0, the update is entirely ignored and the previous
 61 |             parameters are kept, equivalently to if the parameters were frozen.
 62 | 
 63 |     frozen: bool, optional
 64 |             Whether all the parameters associated with this distribution are frozen.
 65 |             If you want to freeze individual pameters, or individual values in those
 66 |             parameters, you must modify the `frozen` attribute of the tensor or
 67 |             parameter directly. Default is False.
 68 |     """
 69 | 
 70 |     def __init__(
 71 |         self,
 72 |         covs=None,
 73 |         covariance_type="full",
 74 |         min_cov=None,
 75 |         inertia=0.0,
 76 |         frozen=False,
 77 |         check_data=True,
 78 |     ):
 79 |         self.name = "HalfNormal"
 80 |         super().__init__(
 81 |             means=None,
 82 |             covs=covs,
 83 |             min_cov=min_cov,
 84 |             covariance_type=covariance_type,
 85 |             inertia=inertia,
 86 |             frozen=frozen,
 87 |             check_data=check_data,
 88 |         )
 89 | 
 90 |     def _initialize(self, d):
 91 |         """Initialize the probability distribution.
 92 | 
 93 |         This method is meant to only be called internally. It initializes the
 94 |         parameters of the distribution and stores its dimensionality. For more
 95 |         complex methods, this function will do more.
 96 | 
 97 | 
 98 |         Parameters
 99 |         ----------
100 |         d: int
101 |                 The dimensionality the distribution is being initialized to.
102 |         """
103 |         super()._initialize(d)
104 | 
105 |     def _reset_cache(self):
106 |         """Reset the internally stored statistics.
107 | 
108 |         This method is meant to only be called internally. It resets the
109 |         stored statistics used to update the model parameters as well as
110 |         recalculates the cached values meant to speed up log probability
111 |         calculations.
112 |         """
113 |         super()._reset_cache()
114 | 
115 |     def sample(self, n):
116 |         """Sample from the probability distribution.
117 | 
118 |         This method will return `n` samples generated from the underlying
119 |         probability distribution.
120 | 
121 | 
122 |         Parameters
123 |         ----------
124 |         n: int
125 |                 The number of samples to generate.
126 | 
127 | 
128 |         Returns
129 |         -------
130 |         X: torch.tensor, shape=(n, self.d)
131 |                 Randomly generated samples.
132 |         """
133 |         if self.covariance_type in ["diag", "full"]:
134 |             return torch.distributions.HalfNormal(self.covs).sample([n])
135 | 
136 |     def log_probability(self, X):
137 |         """Calculate the log probability of each example.
138 | 
139 |         This method calculates the log probability of each example given the
140 |         parameters of the distribution. The examples must be given in a 2D
141 |         format.
142 | 
143 |         Note: This differs from some other log probability calculation
144 |         functions, like those in torch.distributions, because it is not
145 |         returning the log probability of each feature independently, but rather
146 |         the total log probability of the entire example.
147 | 
148 | 
149 |         Parameters
150 |         ----------
151 |         X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
152 |                 A set of examples to evaluate.
153 | 
154 | 
155 |         Returns
156 |         -------
157 |         logp: torch.Tensor, shape=(-1,)
158 |                 The log probability of each example.
159 |         """
160 | 
161 |         X = _check_parameter(
162 |             _cast_as_tensor(X, dtype=self.covs.dtype),
163 |             "X",
164 |             ndim=2,
165 |             shape=(-1, self.d),
166 |             check_parameter=self.check_data,
167 |         )
168 |         return super().log_probability(X) + LOG_2
169 | 
170 |     def summarize(self, X, sample_weight=None):
171 |         """Extract the sufficient statistics from a batch of data.
172 | 
173 |         This method calculates the sufficient statistics from optionally
174 |         weighted data and adds them to the stored cache. The examples must be
175 |         given in a 2D format. Sample weights can either be provided as one
176 |         value per example or as a 2D matrix of weights for each feature in
177 |         each example.
178 | 
179 | 
180 |         Parameters
181 |         ----------
182 |         X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
183 |                 A set of examples to summarize.
184 | 
185 |         sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
186 |                 A set of weights for the examples. This can be either of shape
187 |                 (-1, self.d) or a vector of shape (-1,). Default is ones.
188 |         """
189 | 
190 |         super().summarize(X, sample_weight=sample_weight)
191 | 
192 |     def from_summaries(self):
193 |         """Update the model parameters given the extracted statistics.
194 | 
195 |         This method uses calculated statistics from calls to the `summarize`
196 |         method to update the distribution parameters. Hyperparameters for the
197 |         update are passed in at initialization time.
198 | 
199 |         Note: Internally, a call to `fit` is just a successive call to the
200 |         `summarize` method followed by the `from_summaries` method.
201 |         """
202 | 
203 |         if self.frozen == True:
204 |             return
205 | 
206 |         #  the means are always zero for a half normal distribution
207 |         means = torch.zeros(self.d, dtype=self.covs.dtype)
208 | 
209 |         if self.covariance_type == "full":
210 |             v = self._xw_sum.unsqueeze(0) * self._xw_sum.unsqueeze(1)
211 |             covs = self._xxw_sum / self._w_sum - v / self._w_sum**2.0
212 | 
213 |         elif self.covariance_type in ["diag", "sphere"]:
214 |             covs = self._xxw_sum / self._w_sum - self._xw_sum**2.0 / self._w_sum**2.0
215 |             if self.covariance_type == "sphere":
216 |                 covs = covs.mean(dim=-1)
217 | 
218 |         _update_parameter(self.covs, covs, self.inertia)
219 |         _update_parameter(self.means, means, self.inertia)
220 |         self._reset_cache()
221 | 


--------------------------------------------------------------------------------
/pomegranate/distributions/independent_components.py:
--------------------------------------------------------------------------------
  1 | # independent_components.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import torch
  5 | 
  6 | from .._utils import _cast_as_tensor
  7 | from .._utils import _cast_as_parameter
  8 | from .._utils import _update_parameter
  9 | from .._utils import _check_parameter
 10 | from .._utils import _reshape_weights
 11 | 
 12 | from ._distribution import Distribution
 13 | 
 14 | 
 15 | class IndependentComponents(Distribution):
 16 | 	"""An independent components distribution object.
 17 | 
 18 | 	A distribution made up of independent, univariate, distributions that each
 19 | 	model a single feature in the data. This means that instead of using a
 20 | 	single type of distribution to model all of the features in your data, you
 21 | 	use one distribution per feature. Note that this will likely be slower
 22 | 	than using a single distribution because the amount of batching possible
 23 | 	will go down significantly.
 24 | 
 25 | 	There are two ways to initialize this object. The first is to pass in a
 26 | 	set of distributions that are all initialized with parameters, at which
 27 | 	point this distribution can be immediately used for inference. The second
 28 | 	is to pass in a set of distributions that are not initialized with
 29 | 	parameters, and then call either `fit` or `summary` + `from_summaries` to
 30 | 	learn the parameters of all the distributions.
 31 | 
 32 | 
 33 | 	Parameters
 34 | 	----------
 35 | 	distributions: list, tuple, numpy.ndarray, torch.Tensor, shape=(d,)
 36 | 		An ordered iterable containing all of the distributions, one per
 37 | 		feature, that will be used.
 38 | 
 39 | 	check_data: bool, optional
 40 | 		Whether to check properties of the data and potentially recast it to
 41 | 		torch.tensors. This does not prevent checking of parameters but can
 42 | 		slightly speed up computation when you know that your inputs are valid.
 43 | 		Setting this to False is also necessary for compiling.
 44 | 	"""
 45 | 
 46 | 	def __init__(self, distributions, check_data=False):
 47 | 		super().__init__(inertia=0.0, frozen=False, check_data=check_data)
 48 | 		self.name = "IndependentComponents"
 49 | 
 50 | 		if len(distributions) <= 1:
 51 | 			raise ValueError("Must pass in at least 2 distributions.")
 52 | 		for distribution in distributions:
 53 | 			if not isinstance(distribution, Distribution):
 54 | 				raise ValueError("All passed in distributions must " +
 55 | 					"inherit from the Distribution object.")
 56 | 
 57 | 		self.distributions = distributions
 58 | 		self._initialized = all(d._initialized for d in distributions)
 59 | 		self.d = len(distributions)
 60 | 		self._reset_cache()
 61 | 
 62 | 
 63 | 	def _initialize(self, d):
 64 | 		"""Initialize the probability distribution.
 65 | 
 66 | 		This method is meant to only be called internally. It initializes the
 67 | 		parameters of the distribution and stores its dimensionality. For more
 68 | 		complex methods, this function will do more.
 69 | 
 70 | 
 71 | 		Parameters
 72 | 		----------
 73 | 		d: int
 74 | 			The dimensionality the distribution is being initialized to.
 75 | 		"""
 76 | 
 77 | 		for distribution in self.distributions:
 78 | 			distribution._initialize(d)
 79 | 
 80 | 		self._initialized = True
 81 | 
 82 | 
 83 | 	def _reset_cache(self):
 84 | 		"""Reset the internally stored statistics.
 85 | 
 86 | 		This method is meant to only be called internally. It resets the
 87 | 		stored statistics used to update the model parameters as well as
 88 | 		recalculates the cached values meant to speed up log probability
 89 | 		calculations.
 90 | 		"""
 91 | 
 92 | 		if self._initialized == False:
 93 | 			return
 94 | 
 95 | 		for distribution in self.distributions:
 96 | 			distribution._reset_cache()
 97 | 
 98 | 
 99 | 	def sample(self, n):
100 | 		"""Sample from the probability distribution.
101 | 
102 | 		This method will return `n` samples generated from the underlying
103 | 		probability distribution.
104 | 
105 | 
106 | 		Parameters
107 | 		----------
108 | 		n: int
109 | 			The number of samples to generate.
110 | 		
111 | 
112 | 		Returns
113 | 		-------
114 | 		X: torch.tensor, shape=(n, self.d)
115 | 			Randomly generated samples.
116 | 		"""
117 | 
118 | 		return torch.hstack([d.sample(n) for d in self.distributions])
119 | 
120 | 
121 | 	def log_probability(self, X):
122 | 		"""Calculate the log probability of each example.
123 | 
124 | 		This method calculates the log probability of each example given the
125 | 		parameters of the distribution. The examples must be given in a 2D
126 | 		format.
127 | 
128 | 		Note: This differs from some other log probability calculation
129 | 		functions, like those in torch.distributions, because it is not
130 | 		returning the log probability of each feature independently, but rather
131 | 		the total log probability of the entire example.
132 | 
133 | 
134 | 		Parameters
135 | 		----------
136 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
137 | 			A set of examples to evaluate.
138 | 
139 | 
140 | 		Returns
141 | 		-------
142 | 		logp: torch.Tensor, shape=(-1,)
143 | 			The log probability of each example.
144 | 		"""
145 | 
146 | 		X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 
147 | 			shape=(-1, self.d))
148 | 
149 | 		logp = torch.zeros(X.shape[0])
150 | 		for i, d in enumerate(self.distributions):
151 | 			if isinstance(X, torch.masked.MaskedTensor):
152 | 				logp.add_(d.log_probability(X[:, i:i+1])._masked_data)
153 | 			else:
154 | 				logp.add_(d.log_probability(X[:, i:i+1]))
155 | 
156 | 		return logp
157 | 
158 | 
159 | 	def summarize(self, X, sample_weight=None):
160 | 		"""Extract the sufficient statistics from a batch of data.
161 | 
162 | 		This method calculates the sufficient statistics from optionally
163 | 		weighted data and adds them to the stored cache. The examples must be
164 | 		given in a 2D format. Sample weights can either be provided as one
165 | 		value per example or as a 2D matrix of weights for each feature in
166 | 		each example.
167 | 
168 | 
169 | 		Parameters
170 | 		----------
171 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
172 | 			A set of examples to summarize.
173 | 
174 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
175 | 			A set of weights for the examples. This can be either of shape
176 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
177 | 		"""
178 | 
179 | 		if self.frozen == True:
180 | 			return
181 | 			
182 | 		X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 
183 | 			shape=(-1, self.d))
184 | 
185 | 		sample_weight = _reshape_weights(X, _cast_as_tensor(sample_weight, 
186 | 			dtype=torch.float32), device=self.device)
187 | 
188 | 		for i, d in enumerate(self.distributions):
189 | 			d.summarize(X[:, i:i+1], sample_weight=sample_weight[:, i:i+1])
190 | 
191 | 
192 | 	def from_summaries(self):
193 | 		"""Update the model parameters given the extracted statistics.
194 | 
195 | 		This method uses calculated statistics from calls to the `summarize`
196 | 		method to update the distribution parameters. Hyperparameters for the
197 | 		update are passed in at initialization time.
198 | 
199 | 		Note: Internally, a call to `fit` is just a successive call to the
200 | 		`summarize` method followed by the `from_summaries` method.
201 | 		"""
202 | 		
203 | 		if self.frozen == True:
204 | 			return
205 | 
206 | 		for distribution in self.distributions:
207 | 			distribution.from_summaries()
208 | 


--------------------------------------------------------------------------------
/pomegranate/distributions/joint_categorical.py:
--------------------------------------------------------------------------------
  1 | # joint_categorical.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import numpy
  5 | import torch
  6 | 
  7 | from .._utils import _cast_as_tensor
  8 | from .._utils import _cast_as_parameter
  9 | from .._utils import _update_parameter
 10 | from .._utils import _check_parameter
 11 | from .._utils import _reshape_weights
 12 | 
 13 | from ._distribution import Distribution
 14 | from .categorical import Categorical
 15 | 
 16 | 
 17 | class JointCategorical(Distribution):
 18 | 	"""A joint categorical distribution.
 19 | 
 20 | 	A joint categorical distribution models the probability of a vector of
 21 | 	categorical values occurring without assuming that the dimensions are
 22 | 	independent from each other. Essentially, it is a Categorical distribution
 23 | 	without the assumption that the dimensions are independent of each other. 
 24 | 
 25 | 	There are two ways to initialize this object. The first is to pass in
 26 | 	the tensor of probability parameters, at which point they can immediately be
 27 | 	used. The second is to not pass in the rate parameters and then call
 28 | 	either `fit` or `summary` + `from_summaries`, at which point the 
 29 | 	probability parameters will be learned from data.
 30 | 
 31 | 
 32 | 	Parameters
 33 | 	----------
 34 | 	probs: list, numpy.ndarray, torch.tensor, or None, shape=*n_categories
 35 | 		A tensor where each dimension corresponds to one column in the data
 36 | 		set being modeled and the size of each dimension is the number of
 37 | 		categories in that column, e.g., if the data being modeled is binary 
 38 | 		and has shape (5, 4), this will be a tensor with shape (2, 2, 2, 2).
 39 | 		Default is None.
 40 | 
 41 | 	n_categories: list, numpy.ndarray, torch.tensor, or None, shape=(d,)
 42 | 		A vector with the maximum number of categories that each column
 43 | 		can have. If not given, this will be inferred from the data. Default
 44 | 		is None.
 45 | 
 46 | 	inertia: float, [0, 1], optional
 47 | 		Indicates the proportion of the update to apply to the parameters
 48 | 		during training. When the inertia is 0.0, the update is applied in
 49 | 		its entirety and the previous parameters are ignored. When the
 50 | 		inertia is 1.0, the update is entirely ignored and the previous
 51 | 		parameters are kept, equivalently to if the parameters were frozen.
 52 | 
 53 | 	pseudocount: float, optional
 54 | 		A number of observations to add to each entry in the probability
 55 | 		distribution during training. A higher value will smooth the 
 56 | 		distributions more. Default is 0.
 57 | 
 58 | 	inertia: float, [0, 1], optional
 59 | 		Indicates the proportion of the update to apply to the parameters
 60 | 		during training. When the inertia is 0.0, the update is applied in
 61 | 		its entirety and the previous parameters are ignored. When the
 62 | 		inertia is 1.0, the update is entirely ignored and the previous
 63 | 		parameters are kept, equivalently to if the parameters were frozen.
 64 | 
 65 | 	frozen: bool, optional
 66 | 		Whether all the parameters associated with this distribution are frozen.
 67 | 		If you want to freeze individual pameters, or individual values in those
 68 | 		parameters, you must modify the `frozen` attribute of the tensor or
 69 | 		parameter directly. Default is False.
 70 | 
 71 | 	check_data: bool, optional
 72 | 		Whether to check properties of the data and potentially recast it to
 73 | 		torch.tensors. This does not prevent checking of parameters but can
 74 | 		slightly speed up computation when you know that your inputs are valid.
 75 | 		Setting this to False is also necessary for compiling.
 76 | 	"""
 77 | 	
 78 | 	def __init__(self, probs=None, n_categories=None, pseudocount=0, 
 79 | 		inertia=0.0, frozen=False, check_data=True):
 80 | 		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
 81 | 		self.name = "JointCategorical"
 82 | 
 83 | 		self.probs = _check_parameter(_cast_as_parameter(probs), "probs", 
 84 | 			min_value=0, max_value=1, value_sum=1)
 85 | 
 86 | 		self.n_categories = _check_parameter(n_categories, "n_categories", 
 87 | 			min_value=2)
 88 | 		self.pseudocount = _check_parameter(pseudocount, "pseudocount")
 89 | 
 90 | 		self._initialized = probs is not None
 91 | 		self.d = len(self.probs.shape) if self._initialized else None
 92 | 
 93 | 		if self._initialized:
 94 | 			if n_categories is None:
 95 | 				self.n_categories = tuple(self.probs.shape)
 96 | 			elif isinstance(n_categories, int):
 97 | 				self.n_categories = (n_categories for i in range(n_categories))
 98 | 			else:
 99 | 				self.n_categories = tuple(n_categories)
100 | 		else:
101 | 			self.n_categories = None
102 | 
103 | 		self._reset_cache()
104 | 
105 | 	def _initialize(self, d, n_categories):
106 | 		"""Initialize the probability distribution.
107 | 
108 | 		This method is meant to only be called internally. It initializes the
109 | 		parameters of the distribution and stores its dimensionality. For more
110 | 		complex methods, this function will do more.
111 | 
112 | 
113 | 		Parameters
114 | 		----------
115 | 		d: int
116 | 			The dimensionality the distribution is being initialized to.
117 | 
118 | 		n_categories: list, numpy.ndarray, torch.tensor, or None, shape=(d,)
119 | 			A vector with the maximum number of categories that each column
120 | 			can have. If not given, this will be inferred from the data. 
121 | 			Default is None.
122 | 		"""
123 | 
124 | 		self.probs = _cast_as_parameter(torch.zeros(*n_categories, 
125 | 			dtype=self.dtype, device=self.device))
126 | 
127 | 		self.n_categories = n_categories
128 | 		self._initialized = True
129 | 		super()._initialize(d)
130 | 
131 | 	def _reset_cache(self):
132 | 		"""Reset the internally stored statistics.
133 | 
134 | 		This method is meant to only be called internally. It resets the
135 | 		stored statistics used to update the model parameters as well as
136 | 		recalculates the cached values meant to speed up log probability
137 | 		calculations.
138 | 		"""
139 | 
140 | 		if self._initialized == False:
141 | 			return
142 | 
143 | 		self._w_sum = torch.zeros(self.d, dtype=self.probs.dtype)
144 | 		self._xw_sum = torch.zeros(*self.n_categories, dtype=self.probs.dtype)
145 | 
146 | 		self._log_probs = torch.log(self.probs)
147 | 
148 | 	def sample(self, n):
149 | 		"""Sample from the probability distribution.
150 | 
151 | 		This method will return `n` samples generated from the underlying
152 | 		probability distribution. For a mixture model, this involves first
153 | 		sampling the component using the prior probabilities, and then sampling
154 | 		from the chosen distribution.
155 | 
156 | 
157 | 		Parameters
158 | 		----------
159 | 		n: int
160 | 			The number of samples to generate.
161 | 		
162 | 
163 | 		Returns
164 | 		-------
165 | 		X: torch.tensor, shape=(n, self.d)
166 | 			Randomly generated samples.
167 | 		"""
168 | 
169 | 		idxs = torch.multinomial(self.probs.flatten(), num_samples=n, 
170 | 			replacement=True)
171 | 
172 | 		X = numpy.unravel_index(idxs.numpy(), self.n_categories)
173 | 		X = numpy.stack(X).T
174 | 		return torch.from_numpy(X)
175 | 
176 | 	def log_probability(self, X):
177 | 		"""Calculate the log probability of each example.
178 | 
179 | 		This method calculates the log probability of each example given the
180 | 		parameters of the distribution. The examples must be given in a 2D
181 | 		format. For a joint categorical distribution, each value must be an
182 | 		integer category that is smaller than the maximum number of categories
183 | 		for each feature.
184 | 
185 | 		Note: This differs from some other log probability calculation
186 | 		functions, like those in torch.distributions, because it is not
187 | 		returning the log probability of each feature independently, but rather
188 | 		the total log probability of the entire example.
189 | 
190 | 
191 | 		Parameters
192 | 		----------
193 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
194 | 			A set of examples to evaluate.
195 | 
196 | 
197 | 		Returns
198 | 		-------
199 | 		logp: torch.Tensor, shape=(-1,)
200 | 			The log probability of each example.
201 | 		"""
202 | 
203 | 		X = _check_parameter(_cast_as_tensor(X), "X", 
204 | 			value_set=tuple(range(max(self.n_categories)+1)), ndim=2, 
205 | 			shape=(-1, self.d), check_parameter=self.check_data)
206 | 
207 | 		logps = torch.zeros(len(X), dtype=self.probs.dtype)
208 | 		for i in range(len(X)):
209 | 			logps[i] = self._log_probs[tuple(X[i])]
210 | 
211 | 		return logps
212 | 
213 | 
214 | 	def summarize(self, X, sample_weight=None):
215 | 		"""Extract the sufficient statistics from a batch of data.
216 | 
217 | 		This method calculates the sufficient statistics from optionally
218 | 		weighted data and adds them to the stored cache. The examples must be
219 | 		given in a 2D format. Sample weights can either be provided as one
220 | 		value per example or as a 2D matrix of weights for each feature in
221 | 		each example.
222 | 
223 | 
224 | 		Parameters
225 | 		----------
226 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
227 | 			A set of examples to summarize.
228 | 
229 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
230 | 			A set of weights for the examples. This can be either of shape
231 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
232 | 		"""
233 | 
234 | 		if self.frozen == True:
235 | 			return
236 | 
237 | 		X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 
238 | 			dtypes=(torch.int32, torch.int64), check_parameter=self.check_data)
239 | 
240 | 		if not self._initialized:
241 | 			self._initialize(len(X[0]), torch.max(X, dim=0)[0]+1)
242 | 
243 | 		X = _check_parameter(X, "X", shape=(-1, self.d), 
244 | 			value_set=tuple(range(max(self.n_categories)+1)), 
245 | 			check_parameter=self.check_data)
246 | 
247 | 		sample_weight = _reshape_weights(X, _cast_as_tensor(sample_weight, 
248 | 			dtype=torch.float32))[:,0]
249 | 
250 | 		self._w_sum += torch.sum(sample_weight, dim=0)
251 | 		for i in range(len(X)):
252 | 			self._xw_sum[tuple(X[i])] += sample_weight[i]
253 | 
254 | 	def from_summaries(self):
255 | 		"""Update the model parameters given the extracted statistics.
256 | 
257 | 		This method uses calculated statistics from calls to the `summarize`
258 | 		method to update the distribution parameters. Hyperparameters for the
259 | 		update are passed in at initialization time.
260 | 
261 | 		Note: Internally, a call to `fit` is just a successive call to the
262 | 		`summarize` method followed by the `from_summaries` method.
263 | 		"""
264 | 
265 | 		if self.frozen == True:
266 | 			return
267 | 
268 | 		probs = self._xw_sum / self._w_sum[0]
269 | 
270 | 		_update_parameter(self.probs, probs, self.inertia)
271 | 		self._reset_cache()
272 | 


--------------------------------------------------------------------------------
/pomegranate/distributions/lognormal.py:
--------------------------------------------------------------------------------
  1 | # normal.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import torch
  5 | 
  6 | from .._utils import _cast_as_tensor
  7 | from .._utils import _cast_as_parameter
  8 | from .._utils import _update_parameter
  9 | from .._utils import _check_parameter
 10 | from .._utils import _check_shapes
 11 | 
 12 | from .normal import Normal
 13 | 
 14 | 
 15 | class LogNormal(Normal):
 16 |     """A lognormal object.
 17 | 
 18 |     The parameters are the mu and sigma of the normal distribution, which
 19 |     is the the exponential of the log normal distribution. This
 20 |     distribution can assume that features are independent of the others if
 21 |     the covariance type is 'diag' or 'sphere', but if the type is 'full' then
 22 |     the features are not independent.
 23 | 
 24 |     There are two ways to initialize this object. The first is to pass in
 25 |     the tensor of probablity parameters, at which point they can immediately be
 26 |     used. The second is to not pass in the rate parameters and then call
 27 |     either `fit` or `summarize` + `from_summaries`, at which point the probability
 28 |     parameter will be learned from data.
 29 | 
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     means: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional
 34 |             The mean values of the normal distributions. Default is None.
 35 | 
 36 |     covs: list, numpy.ndarray, torch.Tensor, or None, optional
 37 |             The variances and covariances of the distribution. If covariance_type
 38 |             is 'full', the shape should be (self.d, self.d); if 'diag', the shape
 39 |             should be (self.d,); if 'sphere', it should be (1,). Note that this is
 40 |             the variances or covariances in all settings, and not the standard
 41 |             deviation, as may be more common for diagonal covariance matrices.
 42 |             Default is None.
 43 | 
 44 |     covariance_type: str, optional
 45 |             The type of covariance matrix. Must be one of 'full', 'diag', or
 46 |             'sphere'. Default is 'full'.
 47 | 
 48 |     min_cov: float or None, optional
 49 |             The minimum variance or covariance.
 50 | 
 51 |     inertia: float, [0, 1], optional
 52 |             Indicates the proportion of the update to apply to the parameters
 53 |             during training. When the inertia is 0.0, the update is applied in
 54 |             its entirety and the previous parameters are ignored. When the
 55 |             inertia is 1.0, the update is entirely ignored and the previous
 56 |             parameters are kept, equivalently to if the parameters were frozen.
 57 | 
 58 |     frozen: bool, optional
 59 |             Whether all the parameters associated with this distribution are frozen.
 60 |             If you want to freeze individual pameters, or individual values in those
 61 |             parameters, you must modify the `frozen` attribute of the tensor or
 62 |             parameter directly. Default is False.
 63 |     """
 64 | 
 65 |     def __init__(
 66 |         self,
 67 |         means=None,
 68 |         covs=None,
 69 |         covariance_type="full",
 70 |         min_cov=None,
 71 |         inertia=0.0,
 72 |         frozen=False,
 73 |         check_data=True,
 74 |     ):
 75 |         self.name = "LogNormal"
 76 |         super().__init__(
 77 |             means=means,
 78 |             covs=covs,
 79 |             covariance_type=covariance_type,
 80 |             min_cov=min_cov,
 81 |             inertia=inertia,
 82 |             frozen=frozen,
 83 |             check_data=check_data,
 84 |         )
 85 | 
 86 |     def sample(self, n):
 87 |         """Sample from the probability distribution.
 88 | 
 89 |         This method will return `n` samples generated from the underlying
 90 |         probability distribution.
 91 | 
 92 | 
 93 |         Parameters
 94 |         ----------
 95 |         n: int
 96 |                 The number of samples to generate.
 97 | 
 98 | 
 99 |         Returns
100 |         -------
101 |         X: torch.tensor, shape=(n, self.d)
102 |                 Randomly generated samples.
103 |         """
104 | 
105 |         if self.covariance_type == "diag":
106 |             return torch.distributions.Normal(self.means, self.covs).sample([n]).exp()
107 |         elif self.covariance_type == "full":
108 |             return (
109 |                 torch.distributions.MultivariateNormal(self.means, self.covs)
110 |                 .sample([n])
111 |                 .exp()
112 |             )
113 | 
114 |     def log_probability(self, X):
115 |         """Calculate the log probability of each example.
116 | 
117 |         This method calculates the log probability of each example given the
118 |         parameters of the distribution. The examples must be given in a 2D
119 |         format.
120 | 
121 |         Note: This differs from some other log probability calculation
122 |         functions, like those in torch.distributions, because it is not
123 |         returning the log probability of each feature independently, but rather
124 |         the total log probability of the entire example.
125 | 
126 | 
127 |         Parameters
128 |         ----------
129 |         X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
130 |                 A set of examples to evaluate.
131 | 
132 | 
133 |         Returns
134 |         -------
135 |         logp: torch.Tensor, shape=(-1,)
136 |                 The log probability of each example.
137 |         """
138 | 
139 |         X = _check_parameter(
140 |             _cast_as_tensor(X, dtype=self.means.dtype),
141 |             "X",
142 |             ndim=2,
143 |             shape=(-1, self.d),
144 |             check_parameter=self.check_data,
145 |         )
146 | 
147 |         # take the log of X
148 |         x_log = X.log()
149 | 
150 |         return super().log_probability(x_log)
151 | 
152 |     def summarize(self, X, sample_weight=None):
153 |         """Extract the sufficient statistics from a batch of data.
154 | 
155 |         This method calculates the sufficient statistics from optionally
156 |         weighted data and adds them to the stored cache. The examples must be
157 |         given in a 2D format. Sample weights can either be provided as one
158 |         value per example or as a 2D matrix of weights for each feature in
159 |         each example.
160 | 
161 | 
162 |         Parameters
163 |         ----------
164 |         X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
165 |                 A set of examples to summarize.
166 | 
167 |         sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
168 |                 A set of weights for the examples. This can be either of shape
169 |                 (-1, self.d) or a vector of shape (-1,). Default is ones.
170 |         """
171 | 
172 |         if self.frozen is True:
173 |             return
174 |         X = _cast_as_tensor(X, dtype=self.means.dtype)
175 |         super().summarize(X.log(), sample_weight=sample_weight)
176 | 


--------------------------------------------------------------------------------
/pomegranate/distributions/poisson.py:
--------------------------------------------------------------------------------
  1 | # poisson.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import torch
  5 | 
  6 | from .._utils import _cast_as_tensor
  7 | from .._utils import _cast_as_parameter
  8 | from .._utils import _update_parameter
  9 | from .._utils import _check_parameter
 10 | 
 11 | from ._distribution import Distribution
 12 | 
 13 | 
 14 | class Poisson(Distribution):
 15 | 	"""An poisson distribution object.
 16 | 
 17 | 	A poisson distribution models the number of occurrences of events that
 18 | 	happen in a fixed time span, assuming that the occurrence of each event
 19 | 	is independent. This distribution also assumes that each feature is
 20 | 	independent of the others.
 21 | 
 22 | 	There are two ways to initialize this objecct. The first is to pass in
 23 | 	the tensor of lambda parameters, at which point they can immediately be
 24 | 	used. The second is to not pass in the lambda parameters and then call
 25 | 	either `fit` or `summary` + `from_summaries`, at which point the lambda
 26 | 	parameter will be learned from data.
 27 | 
 28 | 
 29 | 	Parameters
 30 | 	----------
 31 | 	lambdas: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional
 32 | 		The lambda parameters for each feature. Default is None.
 33 | 
 34 | 	inertia: float, (0, 1), optional
 35 | 		Indicates the proportion of the update to apply to the parameters
 36 | 		during training. When the inertia is 0.0, the update is applied in
 37 | 		its entirety and the previous parameters are ignored. When the
 38 | 		inertia is 1.0, the update is entirely ignored and the previous
 39 | 		parameters are kept, equivalently to if the parameters were frozen.
 40 | 
 41 | 	frozen: bool, optional
 42 | 		Whether all the parameters associated with this distribution are frozen.
 43 | 		If you want to freeze individual pameters, or individual values in those
 44 | 		parameters, you must modify the `frozen` attribute of the tensor or
 45 | 		parameter directly. Default is False.
 46 | 
 47 | 	check_data: bool, optional
 48 | 		Whether to check properties of the data and potentially recast it to
 49 | 		torch.tensors. This does not prevent checking of parameters but can
 50 | 		slightly speed up computation when you know that your inputs are valid.
 51 | 		Setting this to False is also necessary for compiling.
 52 | 	"""
 53 | 
 54 | 
 55 | 	def __init__(self, lambdas=None, inertia=0.0, frozen=False, 
 56 | 		check_data=True):
 57 | 		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
 58 | 		self.name = "Poisson"
 59 | 
 60 | 		self.lambdas = _check_parameter(_cast_as_parameter(lambdas), "lambdas", 
 61 | 			min_value=0, ndim=1)
 62 | 
 63 | 		self._initialized = lambdas is not None
 64 | 		self.d = self.lambdas.shape[-1] if self._initialized else None
 65 | 		self._reset_cache()
 66 | 
 67 | 	def _initialize(self, d):
 68 | 		"""Initialize the probability distribution.
 69 | 
 70 | 		This method is meant to only be called internally. It initializes the
 71 | 		parameters of the distribution and stores its dimensionality. For more
 72 | 		complex methods, this function will do more.
 73 | 
 74 | 
 75 | 		Parameters
 76 | 		----------
 77 | 		d: int
 78 | 			The dimensionality the distribution is being initialized to.
 79 | 		"""
 80 | 
 81 | 		self.lambdas = _cast_as_parameter(torch.zeros(d, dtype=self.dtype,
 82 | 			device=self.device))
 83 | 
 84 | 		self._initialized = True
 85 | 		super()._initialize(d)
 86 | 
 87 | 	def _reset_cache(self):
 88 | 		"""Reset the internally stored statistics.
 89 | 
 90 | 		This method is meant to only be called internally. It resets the
 91 | 		stored statistics used to update the model parameters as well as
 92 | 		recalculates the cached values meant to speed up log probability
 93 | 		calculations.
 94 | 		"""
 95 | 
 96 | 		if self._initialized == False:
 97 | 			return
 98 | 
 99 | 		self.register_buffer("_w_sum", torch.zeros(self.d, device=self.device))
100 | 		self.register_buffer("_xw_sum", torch.zeros(self.d, device=self.device))
101 | 
102 | 		self.register_buffer("_log_lambdas", torch.log(self.lambdas))
103 | 
104 | 	def sample(self, n):
105 | 		"""Sample from the probability distribution.
106 | 
107 | 		This method will return `n` samples generated from the underlying
108 | 		probability distribution.
109 | 
110 | 
111 | 		Parameters
112 | 		----------
113 | 		n: int
114 | 			The number of samples to generate.
115 | 		
116 | 
117 | 		Returns
118 | 		-------
119 | 		X: torch.tensor, shape=(n, self.d)
120 | 			Randomly generated samples.
121 | 		"""
122 | 
123 | 		return torch.distributions.Poisson(self.lambdas).sample([n])
124 | 
125 | 	def log_probability(self, X):
126 | 		"""Calculate the log probability of each example.
127 | 
128 | 		This method calculates the log probability of each example given the
129 | 		parameters of the distribution. The examples must be given in a 2D
130 | 		format. For a Poisson distribution, each entry in the data must
131 | 		be non-negative.
132 | 
133 | 		Note: This differs from some other log probability calculation
134 | 		functions, like those in torch.distributions, because it is not
135 | 		returning the log probability of each feature independently, but rather
136 | 		the total log probability of the entire example.
137 | 
138 | 
139 | 		Parameters
140 | 		----------
141 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
142 | 			A set of examples to evaluate.
143 | 
144 | 
145 | 		Returns
146 | 		-------
147 | 		logp: torch.Tensor, shape=(-1,)
148 | 			The log probability of each example.
149 | 		"""
150 | 
151 | 		X = _check_parameter(_cast_as_tensor(X), "X", min_value=0.0, 
152 | 			ndim=2, shape=(-1, self.d), check_parameter=self.check_data)
153 | 
154 | 		return torch.sum(X * self._log_lambdas - self.lambdas - 
155 | 			torch.lgamma(X+1), dim=-1)
156 | 
157 | 	def summarize(self, X, sample_weight=None):
158 | 		"""Extract the sufficient statistics from a batch of data.
159 | 
160 | 		This method calculates the sufficient statistics from optionally
161 | 		weighted data and adds them to the stored cache. The examples must be
162 | 		given in a 2D format. Sample weights can either be provided as one
163 | 		value per example or as a 2D matrix of weights for each feature in
164 | 		each example.
165 | 
166 | 
167 | 		Parameters
168 | 		----------
169 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
170 | 			A set of examples to summarize.
171 | 
172 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
173 | 			A set of weights for the examples. This can be either of shape
174 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
175 | 		"""
176 | 
177 | 		if self.frozen == True:
178 | 			return
179 | 
180 | 		X, sample_weight = super().summarize(X, sample_weight=sample_weight)
181 | 		_check_parameter(X, "X", min_value=0, check_parameter=self.check_data)
182 | 
183 | 		self._w_sum[:] = self._w_sum + torch.sum(sample_weight, dim=0)
184 | 		self._xw_sum[:] = self._xw_sum + torch.sum(X * sample_weight, dim=0)
185 | 
186 | 	def from_summaries(self):
187 | 		"""Update the model parameters given the extracted statistics.
188 | 
189 | 		This method uses calculated statistics from calls to the `summarize`
190 | 		method to update the distribution parameters. Hyperparameters for the
191 | 		update are passed in at initialization time.
192 | 
193 | 		Note: Internally, a call to `fit` is just a successive call to the
194 | 		`summarize` method followed by the `from_summaries` method.
195 | 		"""
196 | 		
197 | 		if self.frozen == True:
198 | 			return
199 | 
200 | 		lambdas = self._xw_sum / self._w_sum
201 | 		_update_parameter(self.lambdas, lambdas, self.inertia)
202 | 		self._reset_cache()
203 | 


--------------------------------------------------------------------------------
/pomegranate/distributions/student_t.py:
--------------------------------------------------------------------------------
  1 | # student_t.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import math
  5 | import torch
  6 | 
  7 | from .._utils import _cast_as_tensor
  8 | from .._utils import _cast_as_parameter
  9 | from .._utils import _update_parameter
 10 | from .._utils import _check_parameter
 11 | 
 12 | from .normal import Normal
 13 | 
 14 | 
 15 | class StudentT(Normal):
 16 | 	"""A Student T distribution.
 17 | 
 18 | 	A Student T distribution models the probability of a variable occurring under
 19 | 	a bell-shaped curve with heavy tails. Basically, this is a version of the
 20 | 	normal distribution that is less resistant to outliers.  It is described by 
 21 | 	a vector of mean values and a vector of variance values. This
 22 | 	distribution can assume that features are independent of the others if
 23 | 	the covariance type is 'diag' or 'sphere', but if the type is 'full' then
 24 | 	the features are not independent.
 25 | 
 26 | 	There are two ways to initialize this object. The first is to pass in
 27 | 	the tensor of probability parameters, at which point they can immediately be
 28 | 	used. The second is to not pass in the rate parameters and then call
 29 | 	either `fit` or `summary` + `from_summaries`, at which point the probability
 30 | 	parameter will be learned from data.
 31 | 
 32 | 
 33 | 	Parameters
 34 | 	----------
 35 | 	means: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional
 36 | 		The mean values of the distributions. Default is None.
 37 | 
 38 | 	covs: list, numpy.ndarray, torch.Tensor, or None, optional
 39 | 		The variances and covariances of the distribution. If covariance_type
 40 | 		is 'full', the shape should be (self.d, self.d); if 'diag', the shape
 41 | 		should be (self.d,); if 'sphere', it should be (1,). Note that this is
 42 | 		the variances or covariances in all settings, and not the standard
 43 | 		deviation, as may be more common for diagonal covariance matrices.
 44 | 		Default is None.
 45 | 
 46 | 	covariance_type: str, optional
 47 | 		The type of covariance matrix. Must be one of 'full', 'diag', or
 48 | 		'sphere'. Default is 'full'. 
 49 | 
 50 | 	min_cov: float or None, optional
 51 | 		The minimum variance or covariance.
 52 | 
 53 | 	inertia: float, [0, 1], optional
 54 | 		Indicates the proportion of the update to apply to the parameters
 55 | 		during training. When the inertia is 0.0, the update is applied in
 56 | 		its entirety and the previous parameters are ignored. When the
 57 | 		inertia is 1.0, the update is entirely ignored and the previous
 58 | 		parameters are kept, equivalently to if the parameters were frozen.
 59 | 
 60 | 	frozen: bool, optional
 61 | 		Whether all the parameters associated with this distribution are frozen.
 62 | 		If you want to freeze individual pameters, or individual values in those
 63 | 		parameters, you must modify the `frozen` attribute of the tensor or
 64 | 		parameter directly. Default is False.
 65 | 
 66 | 	check_data: bool, optional
 67 | 		Whether to check properties of the data and potentially recast it to
 68 | 		torch.tensors. This does not prevent checking of parameters but can
 69 | 		slightly speed up computation when you know that your inputs are valid.
 70 | 		Setting this to False is also necessary for compiling.
 71 | 	"""
 72 | 
 73 | 	def __init__(self, dofs, means=None, covs=None, covariance_type='diag', 
 74 | 		min_cov=None, inertia=0.0, frozen=False, check_data=True):
 75 | 		dofs = _check_parameter(_cast_as_tensor(dofs), "dofs", min_value=1,
 76 | 			ndim=0, dtypes=(torch.int32, torch.int64))
 77 | 		self.dofs = dofs
 78 | 
 79 | 		super().__init__(means=means, covs=covs, min_cov=min_cov,
 80 | 			covariance_type=covariance_type, inertia=inertia, frozen=frozen,
 81 | 			check_data=check_data)
 82 | 
 83 | 		self.name = "StudentT"
 84 | 
 85 | 		del self.dofs
 86 | 
 87 | 		self.register_buffer("dofs", _cast_as_tensor(dofs))
 88 | 		self.register_buffer("_lgamma_dofsp1", torch.lgamma((dofs + 1) / 2.0))
 89 | 		self.register_buffer("_lgamma_dofs", torch.lgamma(dofs / 2.0))
 90 | 
 91 | 	def _reset_cache(self):
 92 | 		"""Reset the internally stored statistics.
 93 | 
 94 | 		This method is meant to only be called internally. It resets the
 95 | 		stored statistics used to update the model parameters as well as
 96 | 		recalculates the cached values meant to speed up log probability
 97 | 		calculations.
 98 | 		"""
 99 | 
100 | 		super()._reset_cache()
101 | 		if self._initialized == False:
102 | 			return
103 | 
104 | 		self.register_buffer("_log_sqrt_dofs_pi_cov", torch.log(torch.sqrt(
105 | 			self.dofs * math.pi * self.covs)))
106 | 
107 | 	def sample(self, n):
108 | 		"""Sample from the probability distribution.
109 | 
110 | 		This method will return `n` samples generated from the underlying
111 | 		probability distribution.
112 | 
113 | 
114 | 		Parameters
115 | 		----------
116 | 		n: int
117 | 			The number of samples to generate.
118 | 		
119 | 
120 | 		Returns
121 | 		-------
122 | 		X: torch.tensor, shape=(n, self.d)
123 | 			Randomly generated samples.
124 | 		"""
125 | 
126 | 		return torch.distributions.StudentT(self.means, self.covs).sample([n])
127 | 
128 | 	def log_probability(self, X):
129 | 		"""Calculate the log probability of each example.
130 | 
131 | 		This method calculates the log probability of each example given the
132 | 		parameters of the distribution. The examples must be given in a 2D
133 | 		format.
134 | 
135 | 		Note: This differs from some other log probability calculation
136 | 		functions, like those in torch.distributions, because it is not
137 | 		returning the log probability of each feature independently, but rather
138 | 		the total log probability of the entire example.
139 | 
140 | 
141 | 		Parameters
142 | 		----------
143 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
144 | 			A set of examples to evaluate.
145 | 
146 | 
147 | 		Returns
148 | 		-------
149 | 		logp: torch.Tensor, shape=(-1,)
150 | 			The log probability of each example.
151 | 		"""
152 | 
153 | 		X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 
154 | 			shape=(-1, self.d), check_parameter=self.check_data)
155 | 
156 | 		t = (X - self.means) ** 2 / self.covs
157 | 		return torch.sum(self._lgamma_dofsp1 - self._lgamma_dofs - \
158 | 			self._log_sqrt_dofs_pi_cov -((self.dofs + 1) / 2.0) * 
159 | 			torch.log(1 + t / self.dofs), dim=-1)
160 | 
161 | 


--------------------------------------------------------------------------------
/pomegranate/distributions/uniform.py:
--------------------------------------------------------------------------------
  1 | # uniform.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import torch
  5 | 
  6 | from .._utils import _cast_as_tensor
  7 | from .._utils import _cast_as_parameter
  8 | from .._utils import _update_parameter
  9 | from .._utils import _check_parameter
 10 | from .._utils import _check_shapes
 11 | 
 12 | from ._distribution import Distribution
 13 | 
 14 | inf = float("inf")
 15 | 
 16 | 
 17 | class Uniform(Distribution):
 18 | 	"""A uniform distribution.
 19 | 
 20 | 	A uniform distribution models the probability of a variable occurring given
 21 | 	a range that has the same probability within it and no probability outside
 22 | 	it. It is described by a vector of minimum and maximum values for this
 23 | 	range.  This distribution assumes that the features are independent of
 24 | 	each other.
 25 | 
 26 | 	There are two ways to initialize this object. The first is to pass in
 27 | 	the tensor of probability parameters, at which point they can immediately be
 28 | 	used. The second is to not pass in the rate parameters and then call
 29 | 	either `fit` or `summary` + `from_summaries`, at which point the probability
 30 | 	parameter will be learned from data.
 31 | 
 32 | 
 33 | 	Parameters
 34 | 	----------
 35 | 	mins: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional
 36 | 		The minimum values of the range.
 37 | 
 38 | 	maxs: list, numpy.ndarray, torch.Tensor, or None, optional
 39 | 		The maximum values of the range.
 40 | 
 41 | 	inertia: float, [0, 1], optional
 42 | 		Indicates the proportion of the update to apply to the parameters
 43 | 		during training. When the inertia is 0.0, the update is applied in
 44 | 		its entirety and the previous parameters are ignored. When the
 45 | 		inertia is 1.0, the update is entirely ignored and the previous
 46 | 		parameters are kept, equivalently to if the parameters were frozen.
 47 | 
 48 | 	frozen: bool, optional
 49 | 		Whether all the parameters associated with this distribution are frozen.
 50 | 		If you want to freeze individual pameters, or individual values in those
 51 | 		parameters, you must modify the `frozen` attribute of the tensor or
 52 | 		parameter directly. Default is False.
 53 | 
 54 | 	check_data: bool, optional
 55 | 		Whether to check properties of the data and potentially recast it to
 56 | 		torch.tensors. This does not prevent checking of parameters but can
 57 | 		slightly speed up computation when you know that your inputs are valid.
 58 | 		Setting this to False is also necessary for compiling.
 59 | 	"""
 60 | 
 61 | 	def __init__(self, mins=None, maxs=None, inertia=0.0, frozen=False, 
 62 | 		check_data=True):
 63 | 		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
 64 | 		self.name = "Uniform"
 65 | 
 66 | 		self.mins = _check_parameter(_cast_as_parameter(mins), "mins", ndim=1)
 67 | 		self.maxs = _check_parameter(_cast_as_parameter(maxs), "maxs", ndim=1)
 68 | 
 69 | 		_check_shapes([self.mins, self.maxs], ["mins", "maxs"])
 70 | 
 71 | 		self._initialized = (mins is not None) and (maxs is not None)
 72 | 		self.d = self.mins.shape[-1] if self._initialized else None
 73 | 		self._reset_cache()
 74 | 
 75 | 	def _initialize(self, d):
 76 | 		"""Initialize the probability distribution.
 77 | 
 78 | 		This method is meant to only be called internally. It initializes the
 79 | 		parameters of the distribution and stores its dimensionality. For more
 80 | 		complex methods, this function will do more.
 81 | 
 82 | 
 83 | 		Parameters
 84 | 		----------
 85 | 		d: int
 86 | 			The dimensionality the distribution is being initialized to.
 87 | 		"""
 88 | 
 89 | 		self.mins = _cast_as_parameter(torch.zeros(d, dtype=self.dtype,
 90 | 			device=self.device))
 91 | 		self.maxs = _cast_as_parameter(torch.zeros(d, dtype=self.dtype,
 92 | 			device=self.device))
 93 | 
 94 | 		self._initialized = True
 95 | 		super()._initialize(d)
 96 | 
 97 | 	def _reset_cache(self):
 98 | 		"""Reset the internally stored statistics.
 99 | 
100 | 		This method is meant to only be called internally. It resets the
101 | 		stored statistics used to update the model parameters as well as
102 | 		recalculates the cached values meant to speed up log probability
103 | 		calculations.
104 | 		"""
105 | 
106 | 		if self._initialized == False:
107 | 			return
108 | 
109 | 		self.register_buffer("_x_mins", torch.full((self.d,), inf, 
110 | 			device=self.device))
111 | 		self.register_buffer("_x_maxs", torch.full((self.d,), -inf,
112 | 			device=self.device))
113 | 		self.register_buffer("_logps", -torch.log(self.maxs - self.mins))
114 | 
115 | 	def sample(self, n):
116 | 		"""Sample from the probability distribution.
117 | 
118 | 		This method will return `n` samples generated from the underlying
119 | 		probability distribution.
120 | 
121 | 
122 | 		Parameters
123 | 		----------
124 | 		n: int
125 | 			The number of samples to generate.
126 | 		
127 | 
128 | 		Returns
129 | 		-------
130 | 		X: torch.tensor, shape=(n, self.d)
131 | 			Randomly generated samples.
132 | 		"""
133 | 
134 | 		return torch.distributions.Uniform(self.mins, self.maxs).sample([n])
135 | 
136 | 	def log_probability(self, X):
137 | 		"""Calculate the log probability of each example.
138 | 
139 | 		This method calculates the log probability of each example given the
140 | 		parameters of the distribution. The examples must be given in a 2D
141 | 		format. For a Bernoulli distribution, each entry in the data must
142 | 		be either 0 or 1.
143 | 
144 | 		Note: This differs from some other log probability calculation
145 | 		functions, like those in torch.distributions, because it is not
146 | 		returning the log probability of each feature independently, but rather
147 | 		the total log probability of the entire example.
148 | 
149 | 
150 | 		Parameters
151 | 		----------
152 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
153 | 			A set of examples to evaluate.
154 | 
155 | 
156 | 		Returns
157 | 		-------
158 | 		logp: torch.Tensor, shape=(-1,)
159 | 			The log probability of each example.
160 | 		"""
161 | 
162 | 		X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 
163 | 			shape=(-1, self.d), check_parameter=self.check_data)
164 | 
165 | 		return torch.where((X >= self.mins) & (X <= self.maxs), self._logps, 
166 | 			float("-inf")).sum(dim=1)
167 | 
168 | 	def summarize(self, X, sample_weight=None):
169 | 		"""Extract the sufficient statistics from a batch of data.
170 | 
171 | 		This method calculates the sufficient statistics from optionally
172 | 		weighted data and adds them to the stored cache. The examples must be
173 | 		given in a 2D format. Sample weights can either be provided as one
174 | 		value per example or as a 2D matrix of weights for each feature in
175 | 		each example.
176 | 
177 | 
178 | 		Parameters
179 | 		----------
180 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
181 | 			A set of examples to summarize.
182 | 
183 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
184 | 			A set of weights for the examples. This can be either of shape
185 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
186 | 		"""
187 | 
188 | 		if self.frozen == True:
189 | 			return
190 | 
191 | 		X, sample_weight = super().summarize(X, sample_weight=sample_weight)
192 | 
193 | 		self._x_mins = torch.minimum(self._x_mins, X.min(dim=0).values)
194 | 		self._x_maxs = torch.maximum(self._x_maxs, X.max(dim=0).values)
195 | 
196 | 	def from_summaries(self):
197 | 		"""Update the model parameters given the extracted statistics.
198 | 
199 | 		This method uses calculated statistics from calls to the `summarize`
200 | 		method to update the distribution parameters. Hyperparameters for the
201 | 		update are passed in at initialization time.
202 | 
203 | 		Note: Internally, a call to `fit` is just a successive call to the
204 | 		`summarize` method followed by the `from_summaries` method.
205 | 		"""
206 | 
207 | 		if self.frozen == True:
208 | 			return
209 | 
210 | 		_update_parameter(self.mins, self._x_mins, self.inertia)
211 | 		_update_parameter(self.maxs, self._x_maxs, self.inertia)
212 | 		self._reset_cache()
213 | 


--------------------------------------------------------------------------------
/pomegranate/distributions/zero_inflated.py:
--------------------------------------------------------------------------------
  1 | # zero_inflated.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import time
  5 | import torch
  6 | 
  7 | from .._utils import _cast_as_tensor
  8 | from .._utils import _cast_as_parameter
  9 | from .._utils import _update_parameter
 10 | from .._utils import _check_parameter
 11 | from .._utils import _reshape_weights
 12 | 
 13 | from ._distribution import Distribution
 14 | 
 15 | 
 16 | class ZeroInflated(Distribution):
 17 | 	"""A wrapper for a zero-inflated distribution.
 18 | 
 19 | 	Some discrete distributions, e.g. Poisson or negative binomial, are used
 20 | 	to model data that has many more zeroes in it than one would expect from
 21 | 	the true signal itself. Potentially, this is because data collection devices
 22 | 	fail or other gaps exist in the data. A zero-inflated distribution is
 23 | 	essentially a mixture of these zero values and the real underlying
 24 | 	distribution.
 25 | 
 26 | 	Accordingly, this class serves as a wrapper that can be dropped in for
 27 | 	other probability distributions and makes them "zero-inflated". It is
 28 | 	similar to a mixture model between the distribution passed in and a dirac
 29 | 	delta distribution, except that the mixture happens independently for each
 30 | 	distribution as well as for each example.
 31 | 
 32 | 
 33 | 	Parameters
 34 | 	----------
 35 | 	distribution: pomegranate.distributions.Distribution
 36 | 		A pomegranate distribution object. It should probably be a discrete
 37 | 		distribution, but does not technically have to be.
 38 | 
 39 | 	priors: tuple, numpy.ndarray, torch.Tensor, or None. shape=(2,), optional
 40 | 		The prior probabilities over the given distribution and the dirac
 41 | 		delta component. Default is None.
 42 | 
 43 | 	max_iter: int, optional
 44 | 		The number of iterations to do in the EM step of fitting the
 45 | 		distribution. Default is 10.
 46 | 
 47 | 	tol: float, optional
 48 | 		The threshold at which to stop during fitting when the improvement
 49 | 		goes under. Default is 0.1.
 50 | 
 51 | 	inertia: float, [0, 1], optional
 52 | 		Indicates the proportion of the update to apply to the parameters
 53 | 		during training. When the inertia is 0.0, the update is applied in
 54 | 		its entirety and the previous parameters are ignored. When the
 55 | 		inertia is 1.0, the update is entirely ignored and the previous
 56 | 		parameters are kept, equivalently to if the parameters were frozen.
 57 | 
 58 | 	frozen: bool, optional
 59 | 		Whether all the parameters associated with this distribution are frozen.
 60 | 		If you want to freeze individual pameters, or individual values in those
 61 | 		parameters, you must modify the `frozen` attribute of the tensor or
 62 | 		parameter directly. Default is False.
 63 | 
 64 | 	verbose: bool, optional
 65 | 		Whether to print the improvement and timings during training.
 66 | 	"""
 67 | 
 68 | 	def __init__(self, distribution, priors=None, max_iter=10, 
 69 | 		tol=0.1, inertia=0.0, frozen=False, check_data=False, verbose=False):
 70 | 		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
 71 | 		self.name = "ZeroInflated"
 72 | 
 73 | 		self.distribution = distribution
 74 | 		self.priors = _check_parameter(_cast_as_parameter(priors), "priors", 
 75 | 			min_value=0, max_value=1, ndim=1, value_sum=1.0)
 76 | 
 77 | 		self.verbose = verbose
 78 | 		self._initialized = distribution._initialized is True
 79 | 		self.d = distribution.d if self._initialized else None
 80 | 		
 81 | 		self.max_iter = max_iter
 82 | 		self.tol = tol
 83 | 
 84 | 		if self.priors is None and self.d is not None:
 85 | 			self.priors = _cast_as_parameter(torch.ones(self.d, 
 86 | 				device=self.device) / 2)
 87 | 
 88 | 		self._reset_cache()
 89 | 
 90 | 	def _initialize(self, X):
 91 | 		"""Initialize the probability distribution.
 92 | 
 93 | 		This method is meant to only be called internally. It initializes the
 94 | 		parameters of the distribution and stores its dimensionality. For more
 95 | 		complex methods, this function will do more.
 96 | 
 97 | 
 98 | 		Parameters
 99 | 		----------
100 | 		X: list, numpy.ndarray, torch.Tensor, shape=(1, self.d)
101 | 			The data to use to initialize the model.
102 | 		"""
103 | 
104 | 		self.distribution._initialize(X.shape[1])
105 | 		self.distribution.fit(X)
106 | 
107 | 		self.priors = _cast_as_parameter(torch.ones(X.shape[1], 
108 | 			device=self.device) / 2)
109 | 		self._initialized = True
110 | 		super()._initialize(X.shape[1])
111 | 
112 | 	def _reset_cache(self):
113 | 		"""Reset the internally stored statistics.
114 | 
115 | 		This method is meant to only be called internally. It resets the
116 | 		stored statistics used to update the model parameters as well as
117 | 		recalculates the cached values meant to speed up log probability
118 | 		calculations.
119 | 		"""
120 | 
121 | 		if self._initialized == False:
122 | 			return
123 | 
124 | 		self.register_buffer("_w_sum", torch.zeros(self.d, 2, 
125 | 			device=self.device))
126 | 		self.register_buffer("_log_priors", torch.log(self.priors))
127 | 
128 | 	def _emission_matrix(self, X):
129 | 		"""Return the emission/responsibility matrix.
130 | 
131 | 		This method returns the log probability of each example under each
132 | 		distribution contained in the model with the log prior probability
133 | 		of each component added.
134 | 
135 | 
136 | 		Parameters
137 | 		----------
138 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
139 | 			A set of examples to evaluate. 
140 | 
141 | 	
142 | 		Returns
143 | 		-------
144 | 		e: torch.Tensor, shape=(-1, self.k)
145 | 			A set of log probabilities for each example under each distribution.
146 | 		"""
147 | 
148 | 		X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 
149 | 			shape=(-1, self.d))
150 | 
151 | 		e = torch.empty(X.shape[0], self.d, 2, device=self.device)
152 | 		e[:, :, 0] = self._log_priors.unsqueeze(0)
153 | 		e[:, :, 0] += self.distribution.log_probability(X).unsqueeze(1)
154 | 		
155 | 		e[:, :, 1] = torch.log(1 - self.priors).unsqueeze(0)
156 | 		e[:, :, 1] += torch.where(X == 0, 0, float("-inf"))
157 | 		return e
158 | 
159 | 	def fit(self, X, sample_weight=None):
160 | 		"""Fit the model to optionally weighted examples.
161 | 
162 | 		This method implements the core of the learning process. For a
163 | 		zero-inflated distribution, this involves performing EM until the
164 | 		distribution being fit converges.
165 | 
166 | 		This method is largely a wrapper around the `summarize` and
167 | 		`from_summaries` methods. It's primary contribution is serving as a
168 | 		loop around these functions and to monitor convergence.
169 | 
170 | 
171 | 		Parameters
172 | 		----------
173 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
174 | 			A set of examples to evaluate. 
175 | 
176 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
177 | 			A set of weights for the examples. This can be either of shape
178 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
179 | 
180 | 
181 | 		Returns
182 | 		-------
183 | 		self
184 | 		"""		
185 | 
186 | 		logp = None
187 | 		for i in range(self.max_iter):
188 | 			start_time = time.time()
189 | 
190 | 			last_logp = logp
191 | 			logp = self.summarize(X, sample_weight=sample_weight)
192 | 
193 | 			if i > 0:
194 | 				improvement = logp - last_logp
195 | 				duration = time.time() - start_time
196 | 
197 | 				if self.verbose:
198 | 					print("[{}] Improvement: {}, Time: {:4.4}s".format(i, 
199 | 						improvement, duration))
200 | 
201 | 				if improvement < self.tol:
202 | 					break
203 | 
204 | 			self.from_summaries()
205 | 
206 | 		self._reset_cache()
207 | 		return self
208 | 
209 | 	def summarize(self, X, sample_weight=None):
210 | 		"""Extract the sufficient statistics from a batch of data.
211 | 
212 | 		This method calculates the sufficient statistics from optionally
213 | 		weighted data and adds them to the stored cache. The examples must be
214 | 		given in a 2D format. Sample weights can either be provided as one
215 | 		value per example or as a 2D matrix of weights for each feature in
216 | 		each example.
217 | 
218 | 
219 | 		Parameters
220 | 		----------
221 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
222 | 			A set of examples to summarize.
223 | 
224 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
225 | 			A set of weights for the examples. This can be either of shape
226 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
227 | 		"""
228 | 
229 | 		X = _cast_as_tensor(X)
230 | 		if not self._initialized:
231 | 			self._initialize(X)
232 | 
233 | 		_check_parameter(X, "X", ndim=2, shape=(-1, self.d))
234 | 		sample_weight = _reshape_weights(X, _cast_as_tensor(sample_weight, 
235 | 			dtype=torch.float32), device=self.device)
236 | 
237 | 		e = self._emission_matrix(X)
238 | 		logp = torch.logsumexp(e, dim=2, keepdims=True)
239 | 		y = torch.exp(e - logp)
240 | 
241 | 		self.distribution.summarize(X, y[:, :, 0] * sample_weight)
242 | 
243 | 		if not self.frozen:
244 | 			self._w_sum += torch.sum(y * sample_weight.unsqueeze(-1), dim=(0, 1)) 
245 | 
246 | 		return torch.sum(logp)
247 | 
248 | 	def from_summaries(self):
249 | 		"""Update the model parameters given the extracted statistics.
250 | 
251 | 		This method uses calculated statistics from calls to the `summarize`
252 | 		method to update the distribution parameters. Hyperparameters for the
253 | 		update are passed in at initialization time.
254 | 
255 | 		Note: Internally, a call to `fit` is just a successive call to the
256 | 		`summarize` method followed by the `from_summaries` method.
257 | 		"""
258 | 
259 | 		self.distribution.from_summaries()
260 | 
261 | 		if self.frozen == True:
262 | 			return
263 | 
264 | 		priors = self._w_sum[:,0] / torch.sum(self._w_sum, dim=-1)
265 | 
266 | 		_update_parameter(self.priors, priors, self.inertia)
267 | 		self._reset_cache()
268 | 


--------------------------------------------------------------------------------
/pomegranate/hmm/__init__.py:
--------------------------------------------------------------------------------
1 | # __init__.py
2 | # Author: Jacob Schreiber <jmschreiber91@gmail.com>
3 | 
4 | from .dense_hmm import DenseHMM
5 | from .sparse_hmm import SparseHMM
6 | 


--------------------------------------------------------------------------------
/pomegranate/markov_chain.py:
--------------------------------------------------------------------------------
  1 | # markov_chain.py
  2 | # Author: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import torch
  5 | 
  6 | from ._utils import _cast_as_tensor
  7 | from ._utils import _update_parameter
  8 | from ._utils import _check_parameter
  9 | from ._utils import _reshape_weights
 10 | 
 11 | from .distributions._distribution import Distribution
 12 | from .distributions import Categorical
 13 | from .distributions import ConditionalCategorical
 14 | 
 15 | 
 16 | class MarkovChain(Distribution):
 17 | 	"""A Markov chain.
 18 | 
 19 | 	A Markov chain is the simplest sequential model which factorizes the
 20 | 	joint probability distribution P(X_{0} ... X_{t}) along a chain into the
 21 | 	product of a marginal distribution P(X_{0}) P(X_{1} | X_{0}) ... with
 22 | 	k conditional probability distributions for a k-th order Markov chain.
 23 | 
 24 | 	Despite sometimes being thought of as an independent model, Markov chains
 25 | 	are probability distributions over sequences just like hidden Markov
 26 | 	models. Because a Markov chain has the same theoretical properties as a
 27 | 	probability distribution, it can be used in any situation that a simpler 
 28 | 	distribution could, such as an emission distribution for a HMM or a 
 29 | 	component of a Bayes classifier.
 30 | 
 31 | 
 32 | 	Parameters
 33 | 	----------
 34 | 	distributions: tuple or list or None
 35 | 		A set of distribution objects. These objects do not need to be
 36 | 		initialized, i.e., can be "Categorical()". 
 37 | 
 38 | 	k: int or None
 39 | 		The number of conditional distributions to include in the chain, also
 40 | 		the number of steps back to model in the sequence. This must be passed
 41 | 		in if the distributions are not passed in.
 42 | 
 43 | 	n_categories: list, tuple, or None
 44 | 		A list or tuple containing the number of categories that each feature
 45 | 		has. 
 46 | 
 47 | 	inertia: float, [0, 1], optional
 48 | 		Indicates the proportion of the update to apply to the parameters
 49 | 		during training. When the inertia is 0.0, the update is applied in
 50 | 		its entirety and the previous parameters are ignored. When the
 51 | 		inertia is 1.0, the update is entirely ignored and the previous
 52 | 		parameters are kept, equivalently to if the parameters were frozen.
 53 | 
 54 | 	frozen: bool, optional
 55 | 		Whether all the parameters associated with this distribution are frozen.
 56 | 		If you want to freeze individual pameters, or individual values in those
 57 | 		parameters, you must modify the `frozen` attribute of the tensor or
 58 | 		parameter directly. Default is False.
 59 | 
 60 | 	check_data: bool, optional
 61 | 		Whether to check properties of the data and potentially recast it to
 62 | 		torch.tensors. This does not prevent checking of parameters but can
 63 | 		slightly speed up computation when you know that your inputs are valid.
 64 | 		Setting this to False is also necessary for compiling.
 65 | 	"""
 66 | 
 67 | 	def __init__(self, distributions=None, k=None, n_categories=None, 
 68 | 		inertia=0.0, frozen=False, check_data=True):
 69 | 		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
 70 | 		self.name = "MarkovChain"
 71 | 
 72 | 		self.distributions = _check_parameter(distributions, "distributions",
 73 | 			dtypes=(list, tuple))
 74 | 		self.k = _check_parameter(_cast_as_tensor(k, dtype=torch.int32), "k",
 75 | 			ndim=0)
 76 | 		self.n_categories = _check_parameter(n_categories, "n_categories",
 77 | 			dtypes=(list, tuple))
 78 | 
 79 | 		if distributions is None and k is None:
 80 | 			raise ValueError("Must provide one of 'distributions', or 'k'.")
 81 | 
 82 | 		if distributions is not None:
 83 | 			self.k = len(distributions) - 1
 84 | 
 85 | 		self.d = None
 86 | 		self._initialized = distributions is not None and distributions[0]._initialized
 87 | 		self._reset_cache()
 88 | 
 89 | 	def _initialize(self, d, n_categories):
 90 | 		"""Initialize the probability distribution.
 91 | 
 92 | 		This method is meant to only be called internally. It initializes the
 93 | 		parameters of the distribution and stores its dimensionality. For more
 94 | 		complex methods, this function will do more.
 95 | 
 96 | 
 97 | 		Parameters
 98 | 		----------
 99 | 		d: int
100 | 			The dimensionality the distribution is being initialized to.
101 | 
102 | 		n_categories: int
103 | 			The maximum number of categories to model. This single number is
104 | 			used as the maximum across all features and all timesteps.
105 | 		"""
106 | 
107 | 		if self.distributions is None:
108 | 			self.distributions = [Categorical()]
109 | 			self.distributions[0]._initialize(d, max(n_categories))
110 | 
111 | 			for i in range(self.k):
112 | 				distribution = ConditionalCategorical()
113 | 				distribution._initialize(d, [[n_categories[j]]*(i+2) 
114 | 					for j in range(d)])
115 | 
116 | 				self.distributions.append(distribution)
117 | 
118 | 		self.n_categories = n_categories
119 | 		self._initialized = True
120 | 		super()._initialize(d)
121 | 
122 | 	def _reset_cache(self):
123 | 		"""Reset the internally stored statistics.
124 | 
125 | 		This method is meant to only be called internally. It resets the
126 | 		stored statistics used to update the model parameters as well as
127 | 		recalculates the cached values meant to speed up log probability
128 | 		calculations.
129 | 		"""
130 | 
131 | 		if self._initialized:
132 | 			for distribution in self.distributions:
133 | 				distribution._reset_cache()
134 | 
135 | 	def sample(self, n):
136 | 		"""Sample from the probability distribution.
137 | 
138 | 		This method will return `n` samples generated from the underlying
139 | 		probability distribution. For a mixture model, this involves first
140 | 		sampling the component using the prior probabilities, and then sampling
141 | 		from the chosen distribution.
142 | 
143 | 
144 | 		Parameters
145 | 		----------
146 | 		n: int
147 | 			The number of samples to generate.
148 | 		
149 | 
150 | 		Returns
151 | 		-------
152 | 		X: torch.tensor, shape=(n, self.d)
153 | 			Randomly generated samples.
154 | 		"""
155 | 
156 | 		X = [self.distributions[0].sample(n)]
157 | 
158 | 		for distribution in self.distributions[1:]:
159 | 			X_ = torch.stack(X).permute(1, 0, 2)
160 | 			samples = distribution.sample(n, X_[:, -self.k-1:])
161 | 			X.append(samples)
162 | 
163 | 		return torch.stack(X).permute(1, 0, 2)
164 | 
165 | 	def log_probability(self, X):
166 | 		"""Calculate the log probability of each example.
167 | 
168 | 		This method calculates the log probability of each example given the
169 | 		parameters of the distribution. The examples must be given in a 3D
170 | 		format.
171 | 
172 | 
173 | 		Parameters
174 | 		----------
175 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, length, self.d)
176 | 			A set of examples to evaluate.
177 | 
178 | 		Returns
179 | 		-------
180 | 		logp: torch.Tensor, shape=(-1,)
181 | 			The log probability of each example.
182 | 		"""
183 | 
184 | 
185 | 		X = _check_parameter(_cast_as_tensor(X), "X", ndim=3, 
186 | 			check_parameter=self.check_data)
187 | 		self.d = X.shape[1]
188 | 
189 | 		logps = self.distributions[0].log_probability(X[:, 0])
190 | 		for i, distribution in enumerate(self.distributions[1:-1]):
191 | 			logps += distribution.log_probability(X[:, :i+2])
192 | 
193 | 		for i in range(X.shape[1] - self.k):
194 | 			j = i + self.k + 1
195 | 			logps += self.distributions[-1].log_probability(X[:, i:j])
196 | 
197 | 		return logps
198 | 
199 | 	def fit(self, X, sample_weight=None):
200 | 		"""Fit the model to optionally weighted examples.
201 | 
202 | 		This method will fit the provided distributions given the data and
203 | 		their weights. If only `k` has been provided, the relevant set of
204 | 		distributions will be initialized.
205 | 
206 | 
207 | 		Parameters
208 | 		----------
209 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, length, self.d)
210 | 			A set of examples to evaluate. 
211 | 
212 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
213 | 			A set of weights for the examples. This can be either of shape
214 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
215 | 
216 | 
217 | 		Returns
218 | 		-------
219 | 		self
220 | 		"""
221 | 
222 | 		self.summarize(X, sample_weight=sample_weight)
223 | 		self.from_summaries()
224 | 		return self
225 | 
226 | 	def summarize(self, X, sample_weight=None):
227 | 		"""Extract the sufficient statistics from a batch of data.
228 | 
229 | 		This method calculates the sufficient statistics from optionally
230 | 		weighted data and adds them to the stored cache for each distribution
231 | 		in the network. Sample weights can either be provided as one
232 | 		value per example or as a 2D matrix of weights for each feature in
233 | 		each example.
234 | 
235 | 
236 | 		Parameters
237 | 		----------
238 | 		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, length, self.d)
239 | 			A set of examples to summarize.
240 | 
241 | 		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
242 | 			A set of weights for the examples. This can be either of shape
243 | 			(-1, self.d) or a vector of shape (-1,). Default is ones.
244 | 
245 | 
246 | 		Returns
247 | 		-------
248 | 		logp: torch.Tensor, shape=(-1,)
249 | 			The log probability of each example.
250 | 		"""
251 | 
252 | 		if self.frozen:
253 | 			return
254 | 
255 | 		X = _check_parameter(_cast_as_tensor(X), "X", ndim=3, 
256 | 			check_parameter=self.check_data)
257 | 		sample_weight = _check_parameter(_cast_as_tensor(sample_weight), 
258 | 			"sample_weight", min_value=0, ndim=(1, 2), 
259 | 			check_parameter=self.check_data)
260 | 
261 | 		if not self._initialized:
262 | 			if self.n_categories is not None:
263 | 				n_keys = self.n_categories
264 | 			elif isinstance(X, torch.masked.MaskedTensor):
265 | 				n_keys = (torch.max(torch.max(X._masked_data, dim=0)[0], 
266 | 					dim=0)[0] + 1).type(torch.int32)
267 | 			else:
268 | 				n_keys = (torch.max(torch.max(X, dim=0)[0], dim=0)[0] + 1).type(
269 | 					torch.int32)
270 | 
271 | 			self._initialize(len(X[0][0]), n_keys)
272 | 
273 | 		if sample_weight is None:
274 | 			sample_weight = torch.ones_like(X[:, 0])
275 | 		elif len(sample_weight.shape) == 1: 
276 | 			sample_weight = sample_weight.reshape(-1, 1).expand(-1, X.shape[2])
277 | 		elif sample_weight.shape[1] == 1:
278 | 			sample_weight = sample_weight.expand(-1, X.shape[2])
279 | 
280 | 		_check_parameter(_cast_as_tensor(sample_weight), "sample_weight", 
281 | 			min_value=0, ndim=2, shape=(X.shape[0], X.shape[2]), 
282 | 			check_parameter=self.check_data)
283 | 
284 | 		self.distributions[0].summarize(X[:, 0], sample_weight=sample_weight)
285 | 		for i, distribution in enumerate(self.distributions[1:-1]):
286 | 			distribution.summarize(X[:, :i+2], sample_weight=sample_weight)
287 | 
288 | 		distribution = self.distributions[-1]
289 | 		for i in range(X.shape[1] - self.k):
290 | 			j = i + self.k + 1
291 | 			distribution.summarize(X[:, i:j], sample_weight=sample_weight)
292 | 
293 | 	def from_summaries(self):
294 | 		"""Update the model parameters given the extracted statistics.
295 | 
296 | 		This method uses calculated statistics from calls to the `summarize`
297 | 		method to update the distribution parameters. Hyperparameters for the
298 | 		update are passed in at initialization time.
299 | 
300 | 		Note: Internally, a call to `fit` is just a successive call to the
301 | 		`summarize` method followed by the `from_summaries` method.
302 | 		"""
303 | 
304 | 		if self.frozen:
305 | 			return
306 | 
307 | 		for distribution in self.distributions:
308 | 			distribution.from_summaries()
309 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy >= 1.22.2 
2 | scipy >= 1.6.2
3 | scikit-learn >= 1.0.2
4 | torch >= 1.9.0
5 | apricot-select >= 0.6.1
6 | networkx >= 2.8.4
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 | 	name='pomegranate',
 5 | 	version='1.1.2',
 6 | 	author='Jacob Schreiber',
 7 | 	author_email='jmschreiber91@gmail.com',
 8 | 	packages=['pomegranate', 'pomegranate.distributions', 'pomegranate.hmm'],
 9 | 	url='https://github.com/jmschrei/torchegranate',
10 | 	license='MIT',
11 | 	description='A PyTorch implementation of probabilistic models.',
12 | 	install_requires=[
13 | 		'numpy >= 1.22.2', 
14 | 		'scipy >= 1.6.2',
15 | 		'scikit-learn >= 1.0.2',
16 | 		'torch >= 1.9.0',
17 | 		'apricot-select >= 0.6.1',
18 | 		'networkx >= 2.8.4'
19 | 	]
20 | )


--------------------------------------------------------------------------------
/slides/pomegranate ODSC East 2019.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate ODSC East 2019.pdf


--------------------------------------------------------------------------------
/slides/pomegranate ODSC Europe 2020.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate ODSC Europe 2020.pdf


--------------------------------------------------------------------------------
/slides/pomegranate ODSC West 2017.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate ODSC West 2017.pdf


--------------------------------------------------------------------------------
/slides/pomegranate ODSC West 2018.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate ODSC West 2018.pdf


--------------------------------------------------------------------------------
/slides/pomegranate ODSC West 2019.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate ODSC West 2019.pdf


--------------------------------------------------------------------------------
/slides/pomegranate PyData NYC 2017.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate PyData NYC 2017.pdf


--------------------------------------------------------------------------------
/slides/pomegranate data intelligence 2017.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate data intelligence 2017.pdf


--------------------------------------------------------------------------------
/slides/pomegranate odsc east 2017 turorial.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate odsc east 2017 turorial.pdf


--------------------------------------------------------------------------------
/slides/pomegranate pydata seattle 2017.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate pydata seattle 2017.pdf


--------------------------------------------------------------------------------
/slides/pomegranate scipy 2017.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate scipy 2017.pdf


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/tests/__init__.py


--------------------------------------------------------------------------------
/tests/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/tests/distributions/__init__.py


--------------------------------------------------------------------------------
/tests/distributions/_utils.py:
--------------------------------------------------------------------------------
  1 | # _utils.py
  2 | # Contact: Jacob Schreiber <jmschreiber91@gmail.com>
  3 | 
  4 | import numpy
  5 | import torch
  6 | 
  7 | from ..tools import assert_raises
  8 | from numpy.testing import assert_array_almost_equal
  9 | 
 10 | 
 11 | def _test_initialization(d, x, name, inertia, frozen, dtype):
 12 | 	assert d.inertia == inertia
 13 | 	assert d.frozen == frozen
 14 | 	param = getattr(d, name)
 15 | 
 16 | 	if x is not None:
 17 | 		assert param.shape[0] == len(x)
 18 | 		assert param.dtype == dtype
 19 | 		assert_array_almost_equal(param, x)
 20 | 	else:
 21 | 		assert param == x
 22 | 
 23 | 
 24 | def _test_initialization_raises_one_parameter(distribution, valid_value, 
 25 | 	min_value=None, max_value=None):
 26 | 	assert_raises(ValueError, distribution, valid_value)
 27 | 	assert_raises(ValueError, distribution, [valid_value], inertia=-0.4)
 28 | 	assert_raises(ValueError, distribution, [valid_value], inertia=1.2)
 29 | 	assert_raises(ValueError, distribution, [valid_value], inertia=1.2, 
 30 | 		frozen="true")
 31 | 	assert_raises(ValueError, distribution, [valid_value], inertia=1.2, 
 32 | 		frozen=3)
 33 | 	
 34 | 	assert_raises(ValueError, distribution, inertia=-0.4)
 35 | 	assert_raises(ValueError, distribution, inertia=1.2)
 36 | 	assert_raises(ValueError, distribution, inertia=1.2, frozen="true")
 37 | 	assert_raises(ValueError, distribution, inertia=1.2, frozen=3)
 38 | 
 39 | 	if min_value is not None:
 40 | 		assert_raises(ValueError, distribution, [valid_value, min_value-0.1])
 41 | 
 42 | 	if max_value is not None:
 43 | 		assert_raises(ValueError, distribution, [valid_value, max_value+0.1])
 44 | 
 45 | 
 46 | def _test_initialization_raises_two_parameters(distribution, valid_value1,
 47 | 	valid_value2, min_value1=None, min_value2=None, max_value1=None,
 48 | 	max_value2=None):
 49 | 	
 50 | 	assert_raises(ValueError, distribution, valid_value1)
 51 | 	assert_raises(ValueError, distribution, None, valid_value2)
 52 | 	assert_raises(ValueError, distribution, valid_value1, valid_value2)
 53 | 	assert_raises(ValueError, distribution, [valid_value1], 
 54 | 		[valid_value2, valid_value2])
 55 | 	assert_raises(ValueError, distribution, [valid_value1, valid_value1], 
 56 | 		[valid_value2])
 57 | 
 58 | 	assert_raises(ValueError, distribution, [valid_value1, valid_value2], 
 59 | 		inertia=-0.4)
 60 | 	assert_raises(ValueError, distribution, [valid_value1, valid_value2], 
 61 | 		inertia=1.2)
 62 | 	assert_raises(ValueError, distribution, [valid_value1, valid_value2], 
 63 | 		inertia=1.2, frozen="true")
 64 | 	assert_raises(ValueError, distribution, [valid_value1, valid_value2], 
 65 | 		inertia=1.2, frozen=3)
 66 | 
 67 | 	assert_raises(ValueError, distribution, inertia=-0.4)
 68 | 	assert_raises(ValueError, distribution, inertia=1.2)
 69 | 	assert_raises(ValueError, distribution, inertia=1.2, frozen="true")
 70 | 	assert_raises(ValueError, distribution, inertia=1.2, frozen=3)
 71 | 
 72 | 	if min_value1 is not None:
 73 | 		assert_raises(ValueError, distribution, [valid_value1, min_value1-0.1],
 74 | 			[valid_value2, valid_value2])
 75 | 
 76 | 	if min_value2 is not None:
 77 | 		assert_raises(ValueError, distribution, [valid_value1, valid_value1],
 78 | 			[valid_value2, min_value2-0.1])
 79 | 
 80 | 	if max_value1 is not None:
 81 | 		assert_raises(ValueError, distribution, [valid_value1, max_value1+0.1],
 82 | 			[valid_value2, valid_value2])
 83 | 
 84 | 	if max_value2 is not None:
 85 | 		assert_raises(ValueError, distribution, [valid_value1, valid_value1],
 86 | 			[valid_value2, max_value2+0.1])
 87 | 
 88 | 
 89 | def _test_predictions(x, y, y_hat, dtype):
 90 | 	assert isinstance(y_hat, torch.Tensor)
 91 | 	assert y_hat.dtype == dtype
 92 | 	assert y_hat.shape == (len(x),)
 93 | 	assert_array_almost_equal(y, y_hat)
 94 | 
 95 | 
 96 | def _test_raises(d, name, X, w=None, min_value=None, max_value=None):
 97 | 	f = getattr(d, name)
 98 | 
 99 | 	assert_raises(ValueError, f, [X])
100 | 	assert_raises(ValueError, f, X[0])
101 | 	assert_raises((ValueError, TypeError, RuntimeError), f, X[0][0])
102 | 
103 | 	if d._initialized == True:
104 | 		assert_raises(ValueError, f, [x[:-1] for x in X])
105 | 
106 | 		if min_value is not None:
107 | 			assert_raises(ValueError, f, [[min_value-0.1 for i in range(d.d)]])
108 | 		
109 | 		if max_value is not None:
110 | 			assert_raises(ValueError, f, [[max_value+0.1 for i in range(d.d)]])
111 | 	else:	
112 | 		if min_value is not None:
113 | 			assert_raises(ValueError, f, [[min_value-0.1 for i in range(3)]])
114 | 		
115 | 		if max_value is not None:
116 | 			assert_raises(ValueError, f, [[max_value+0.1 for i in range(3)]])
117 | 
118 | 
119 | 	if w is not None:
120 | 		assert_raises(ValueError, f, [X], w)
121 | 		assert_raises(ValueError, f, X, [w])
122 | 		assert_raises(ValueError, f, [X], [w])
123 | 		assert_raises(ValueError, f, X, w[:len(w)-1])
124 | 		assert_raises(ValueError, f, X[:len(X)-1], w)
125 | 
126 | 
127 | def _test_efd_from_summaries(d, name1, name2, values):
128 | 	assert_array_almost_equal(getattr(d, name1), values)
129 | 	assert_array_almost_equal(getattr(d, name2), numpy.log(values))
130 | 	assert_array_almost_equal(d._w_sum, numpy.zeros(d.d))
131 | 	assert_array_almost_equal(d._xw_sum, numpy.zeros(d.d))
132 | 


--------------------------------------------------------------------------------
/tests/hmm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/tests/hmm/__init__.py


--------------------------------------------------------------------------------
/tests/tools.py:
--------------------------------------------------------------------------------
 1 | # tools.py
 2 | # Author: Jacob Schreiber <jmschreiber91@gmail.com>
 3 | 
 4 | '''
 5 | Tools taken from nose since it can no longer be installed after Py3.12.
 6 | '''
 7 | 
 8 | 
 9 | import re
10 | import unittest
11 | 
12 | 
13 | __all__ = ['ok_', 'eq_']
14 | 
15 | # Use the same flag as unittest itself to prevent descent into these functions:
16 | __unittest = 1
17 | 
18 | 
19 | def ok_(expr, msg=None):
20 |     """Shorthand for assert. Saves 3 whole characters!
21 |     """
22 |     if not expr:
23 |         raise AssertionError(msg)
24 | 
25 | 
26 | def eq_(a, b, msg=None):
27 |     """Shorthand for 'assert a == b, "%r != %r" % (a, b)
28 |     """
29 |     if not a == b:
30 |         raise AssertionError(msg or "%r != %r" % (a, b))
31 | 
32 | 
33 | #
34 | # Expose assert* from unittest.TestCase
35 | # - give them pep8 style names
36 | #
37 | caps = re.compile('([A-Z])')
38 | 
39 | def pep8(name):
40 |     return caps.sub(lambda m: '_' + m.groups()[0].lower(), name)
41 | 
42 | class Dummy(unittest.TestCase):
43 |     def nop():
44 |         pass
45 | _t = Dummy('nop')
46 | 
47 | for at in [ at for at in dir(_t)
48 |             if at.startswith('assert') and not '_' in at ]:
49 |     pepd = pep8(at)
50 |     vars()[pepd] = getattr(_t, at)
51 |     __all__.append(pepd)
52 | 
53 | del Dummy
54 | del _t
55 | del pep8
56 | 


--------------------------------------------------------------------------------