├── .github
    └── workflows
    │   ├── draft-pdf.yml
    │   ├── release_to_pypi.yml
    │   └── setup_and_run_tests.yml
├── .gitignore
├── .readthedocs.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENCE
├── README.md
├── docs
    ├── Makefile
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── api.rst
    │   ├── api
    │       ├── flow.rst
    │       ├── mcmc.rst
    │       ├── parallel.rst
    │       ├── prior.rst
    │       ├── sampler.rst
    │       ├── scaler.rst
    │       └── tools.rst
    │   ├── background.rst
    │   ├── blobs.ipynb
    │   ├── checkpoint.ipynb
    │   ├── conf.py
    │   ├── faq.rst
    │   ├── fitting.ipynb
    │   ├── flow.ipynb
    │   ├── images
    │       ├── advanced_corner.png
    │       ├── advanced_run.png
    │       ├── advanced_trace.png
    │       └── logo.gif
    │   ├── index.rst
    │   ├── install.rst
    │   ├── likelihood.ipynb
    │   ├── model_comparison.ipynb
    │   ├── parallelization.ipynb
    │   ├── priors.rst
    │   ├── quickstart.ipynb
    │   ├── results.rst
    │   └── sampling.ipynb
├── joss
    ├── paper.bib
    └── paper.md
├── logo.png
├── logo.svg
├── pocomc
    ├── __init__.py
    ├── _version.py
    ├── flow.py
    ├── geometry.py
    ├── input_validation.py
    ├── mcmc.py
    ├── parallel.py
    ├── particles.py
    ├── prior.py
    ├── sampler.py
    ├── scaler.py
    ├── student.py
    ├── threading.py
    └── tools.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
    ├── test_flow.py
    ├── test_prior.py
    ├── test_sampler.py
    ├── test_scaler.py
    ├── test_state.py
    └── test_tools.py


/.github/workflows/draft-pdf.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |     paths:
 6 |       - 'joss/paper.md'  # Only run workflow on pushes where paper.md was changed
 7 | 
 8 | jobs:
 9 |   paper:
10 |     runs-on: ubuntu-latest
11 |     name: Paper Draft
12 |     steps:
13 |       - name: Checkout
14 |         uses: actions/checkout@v2
15 |       - name: Build draft PDF
16 |         uses: openjournals/openjournals-draft-action@master
17 |         with:
18 |           journal: joss
19 |           # This should be the path to the paper within your repo.
20 |           paper-path: joss/paper.md
21 |       - name: Upload
22 |         uses: actions/upload-artifact@v1
23 |         with:
24 |           name: paper
25 |           # This is the output path where Pandoc will write the compiled
26 |           # PDF. Note, this should be the same directory as the input
27 |           # paper.md
28 |           path: joss/paper.pdf


--------------------------------------------------------------------------------
/.github/workflows/release_to_pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Publish pocomc to PyPI / GitHub
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     paths:
 8 |       - 'pocomc/_version.py'  # Only run workflow on pushes where _version.py was changed
 9 |   workflow_dispatch:  # Allows you to run this workflow manually from the Actions tab
10 | 
11 | jobs:
12 |   build-n-publish:
13 |     name: Build and publish to PyPI
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |       - name: Checkout source
18 |         uses: actions/checkout@v2
19 | 
20 |       - name: Set up Python
21 |         uses: actions/setup-python@v2
22 |         with:
23 |           python-version: "3.11"
24 | 
25 |       - name: Build source and wheel distributions
26 |         run: |
27 |           python -m pip install --upgrade build twine
28 |           python -m build
29 |           twine check --strict dist/*
30 |           
31 |       - name: Install pocomc from the wheel file
32 |         run: |
33 |           pip install dist/*.whl
34 |           
35 |       - name: List the installed packages
36 |         run: |
37 |           pip freeze
38 |       
39 |       - name: Run pocomc unittests
40 |         run: |
41 |           python -m unittest discover tests
42 |           
43 |       - name: Publish distribution to PyPI
44 |         uses: pypa/gh-action-pypi-publish@master
45 |         with:
46 |           user: __token__
47 |           password: ${{ secrets.PYPI_API_TOKEN }}
48 | 
49 |       - name: Get the version
50 |         id: get_version
51 |         run: echo ::set-output name=VERSION::$(cat pocomc/_version.py | grep version | cut -d'"' -f 2)
52 | 
53 |       - name: Create GitHub Release
54 |         id: create_release
55 |         uses: actions/create-release@v1
56 |         env:
57 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}  # This token is provided by Actions, no need to create our own
58 |         with:
59 |           tag_name: ${{ steps.get_version.outputs.VERSION }}
60 |           release_name: ${{ steps.get_version.outputs.VERSION }}
61 |           draft: false
62 |           prerelease: false
63 | 
64 |       - name: Get Asset name
65 |         run: |
66 |           export PKG=$(ls dist/ | grep tar)
67 |           set -- $PKG
68 |           echo "name=$1" >> $GITHUB_ENV
69 |       - name: Upload Release Asset (sdist) to GitHub
70 |         id: upload-release-asset
71 |         uses: actions/upload-release-asset@v1
72 |         env:
73 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
74 |         with:
75 |           upload_url: ${{ steps.create_release.outputs.upload_url }}
76 |           asset_path: dist/${{ env.name }}
77 |           asset_name: ${{ env.name }}
78 |           asset_content_type: application/zip
79 | 


--------------------------------------------------------------------------------
/.github/workflows/setup_and_run_tests.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | name: Setup pocomc and run tests
 3 | 
 4 | on:
 5 |   push:
 6 |     branches: [ "main", "dev"]
 7 |   pull_request:
 8 |     branches: [ "main", "dev" ]
 9 |   workflow_dispatch:
10 | 
11 | jobs:
12 |   build:
13 | 
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         python-version: ["3.8", "3.9", "3.10", "3.11"]
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v3
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v3
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip
29 |         python -m pip install flake8
30 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
31 |     - name: Lint with flake8
32 |       run: |
33 |         # stop the build if there are Python syntax errors or undefined names
34 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
35 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
36 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
37 |     - name: Run tests
38 |       run: |
39 |         python -m unittest discover tests
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | examples/.ipynb_checkpoints
 2 | pocomc.egg-info/
 3 | pocomc/__pycache__/
 4 | pocomc/utils/__pycache__/
 5 | pocomc/sinf/__pycache__/
 6 | docs/build/
 7 | .DS_Store
 8 | pocomc/docs/.DS_Store
 9 | pocomc/docs/build/.DS_Store
10 | pocomc/docs/source/.DS_Store
11 | *.egg-info
12 | *.pyc
13 | *.so
14 | *.dylib
15 | *.egg
16 | .DS_Store
17 | docs/.DS_Store
18 | docs/source/.DS_Store
19 | states/


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.11"
13 | 
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 |    configuration: docs/source/conf.py
17 | 
18 | # If using Sphinx, optionally build your docs in additional formats such as PDF
19 | # formats:
20 | #    - pdf
21 | 
22 | # Optionally declare the Python requirements required to build your docs
23 | python:
24 |    install:
25 |    - requirements: docs/requirements.txt
26 |    - requirements: requirements.txt
27 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | minaskar@gmail.com.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## How to contribute to pocoMC
 2 | 
 3 | ### Expectations
 4 | 
 5 | pocoMC is developed and maintained in my spare time and, while I try to be
 6 | responsive, I don't always get to every issue immediately.
 7 | 
 8 | ### Did you find a bug?
 9 | 
10 | **Ensure the bug was not already reported** by searching on GitHub under
11 | [Issues](https://github.com/minaskar/pocomc/issues). If you're unable to find an
12 | open issue addressing the problem, [open a new
13 | one](https://github.com/minaskar/pocomc/issues/new). Be sure to include a **title
14 | and clear description**, as much relevant information as possible, and the
15 | simplest possible **code sample** demonstrating the expected behavior that is
16 | not occurring.
17 | 
18 | ### Did you write a patch that fixes a bug?
19 | 
20 | Open a new GitHub pull request with the patch. Ensure the PR description
21 | clearly describes the problem and solution. Include the relevant issue number
22 | if applicable.
23 | 
24 | ### Do you intend to add a new feature or change an existing one?
25 | 
26 | First, [open a new issue](https://github.com/minaskar/pocomc/issues/new) and
27 | clearly describe your idea. We will then let you know if what you suggest 
28 | aligns with the vision of pocoMC. This way you might avoid doing unnecessary
29 | work and might even find some help from other people.
30 | 
31 | Cheers,
32 | 
33 | Minas
34 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![logo](logo.png)
 2 | 
 3 | **pocoMC is a Python implementation of the Preconditioned Monte Carlo method for accelerated Bayesian inference**
 4 | 
 5 | [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://github.com/minaskar/pocomc/blob/master/LICENSE)
 6 | [![Documentation Status](https://readthedocs.org/projects/pocomc/badge/?version=latest)](https://pocomc.readthedocs.io/en/latest/?badge=latest)
 7 | 
 8 | 
 9 | # Getting started
10 | 
11 | ## Brief introduction
12 | 
13 | ``pocoMC`` is a Python package for fast Bayesian posterior and model evidence estimation. It leverages 
14 | the Preconditioned Monte Carlo (PMC) algorithm, offering significant speed improvements over 
15 | traditional methods like MCMC and Nested Sampling. Ideal for large-scale scientific problems 
16 | with expensive likelihood evaluations, non-linear correlations, and multimodality, ``pocoMC`` 
17 | provides efficient and scalable posterior sampling and model evidence estimation. Widely used 
18 | in cosmology and astronomy, ``pocoMC`` is user-friendly, flexible, and actively maintained.
19 | 
20 | ## Documentation
21 | 
22 | Read the docs at [pocomc.readthedocs.io](https://pocomc.readthedocs.io) for more information, examples and tutorials.
23 | 
24 | ## Installation
25 | 
26 | To install ``pocomc`` using ``pip`` run:
27 | 
28 | ```bash
29 | pip install pocomc
30 | ```
31 | 
32 | or, to install from source:
33 | 
34 | ```bash
35 | git clone https://github.com/minaskar/pocomc.git
36 | cd pocomc
37 | python setup.py install
38 | ```
39 | 
40 | ## Basic example
41 | 
42 | For instance, if you wanted to draw samples from a 10-dimensional Rosenbrock distribution with a uniform prior, you would do something like:
43 | 
44 | ```python
45 | import pocomc as pc
46 | import numpy as np
47 | from scipy.stats import uniform
48 | 
49 | n_dim = 10  # Number of dimensions
50 | 
51 | prior = pc.Prior(n_dim*[uniform(-10.0, 20.0)]) # U(-10,10)
52 | 
53 | def log_likelihood(x):
54 |     return -np.sum(10.0*(x[:,::2]**2.0 - x[:,1::2])**2.0 \
55 |             + (x[:,::2] - 1.0)**2.0, axis=1)
56 | 
57 | sampler = pc.Sampler(
58 |     prior=prior,
59 |     likelihood=log_likelihood,
60 |     vectorize=True,
61 | )
62 | sampler.run()
63 | 
64 | samples, weights, logl, logp = sampler.posterior() # Weighted posterior samples
65 | 
66 | logz, logz_err = sampler.evidence() # Bayesian model evidence estimate and uncertainty
67 | ```
68 | 
69 | 
70 | # Attribution & Citation
71 | 
72 | Please cite the following papers if you found this code useful in your research:
73 | 
74 | ```bash
75 | @article{karamanis2022accelerating,
76 |     title={Accelerating astronomical and cosmological inference with preconditioned Monte Carlo},
77 |     author={Karamanis, Minas and Beutler, Florian and Peacock, John A and Nabergoj, David and Seljak, Uro{\v{s}}},
78 |     journal={Monthly Notices of the Royal Astronomical Society},
79 |     volume={516},
80 |     number={2},
81 |     pages={1644--1653},
82 |     year={2022},
83 |     publisher={Oxford University Press}
84 | }
85 | 
86 | @article{karamanis2022pocomc,
87 |     title={pocoMC: A Python package for accelerated Bayesian inference in astronomy and cosmology},
88 |     author={Karamanis, Minas and Nabergoj, David and Beutler, Florian and Peacock, John A and Seljak, Uros},
89 |     journal={arXiv preprint arXiv:2207.05660},
90 |     year={2022}
91 | }
92 | ```
93 | 
94 | # Licence
95 | 
96 | Copyright 2022-Now Minas Karamanis and contributors.
97 | 
98 | ``pocoMC`` is free software made available under the GPL-3.0 License. For details see the `LICENSE` file.
99 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | furo
2 | myst_nb


--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
 1 | =============
 2 | API Reference
 3 | =============
 4 | 
 5 | ``pocoMC`` consists mainly of the following parts:
 6 | 
 7 | .. toctree::
 8 |     :maxdepth: 1
 9 | 
10 |     api/sampler
11 |     api/prior
12 |     api/flow
13 |     api/tools
14 |     api/mcmc
15 |     api/scaler
16 |     api/parallel
17 | 
18 | 


--------------------------------------------------------------------------------
/docs/source/api/flow.rst:
--------------------------------------------------------------------------------
1 | Flow
2 | ====
3 | 
4 | General flow object
5 | -------------------
6 | .. autoclass:: pocomc.flow.Flow
7 |     :members:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/source/api/mcmc.rst:
--------------------------------------------------------------------------------
 1 | MCMC
 2 | ====
 3 | 
 4 | Samplers
 5 | --------
 6 | 
 7 | .. autofunction:: pocomc.mcmc.preconditioned_pcn
 8 | 
 9 | .. autofunction:: pocomc.mcmc.preconditioned_rwm
10 | 
11 | .. autofunction:: pocomc.mcmc.pcn
12 | 
13 | .. autofunction:: pocomc.mcmc.rwm
14 | 
15 | 


--------------------------------------------------------------------------------
/docs/source/api/parallel.rst:
--------------------------------------------------------------------------------
1 | Parallel
2 | ========
3 | 
4 | .. autoclass:: pocomc.parallel.MPIPool
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/api/prior.rst:
--------------------------------------------------------------------------------
1 | Prior
2 | =====
3 | 
4 | General prior object
5 | --------------------
6 | .. autoclass:: pocomc.prior.Prior
7 |     :members:


--------------------------------------------------------------------------------
/docs/source/api/sampler.rst:
--------------------------------------------------------------------------------
1 | Sampler
2 | =======
3 | 
4 | .. autoclass:: pocomc.Sampler
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/api/scaler.rst:
--------------------------------------------------------------------------------
1 | Scaler
2 | ======
3 | 
4 | .. autoclass:: pocomc.scaler.Reparameterize
5 |     :members:


--------------------------------------------------------------------------------
/docs/source/api/tools.rst:
--------------------------------------------------------------------------------
 1 | Tools
 2 | =====
 3 | 
 4 | Resample particles
 5 | ------------------
 6 | 
 7 | .. autofunction:: pocomc.tools.systematic_resample
 8 | 
 9 | 
10 | Compute effective sample size
11 | -----------------------------
12 | 
13 | .. autofunction:: pocomc.tools.compute_ess
14 | 
15 | .. autofunction:: pocomc.tools.effective_sample_size
16 | 
17 | .. autofunction:: pocomc.tools.unique_sample_size
18 | 
19 | 
20 | Progress bar
21 | ------------
22 | 
23 | .. autoclass:: pocomc.tools.ProgressBar
24 |     :members:
25 | 
26 | Function wrapper
27 | ----------------
28 | 
29 | .. autoclass:: pocomc.tools.FunctionWrapper
30 |     :members:


--------------------------------------------------------------------------------
/docs/source/background.rst:
--------------------------------------------------------------------------------
  1 | .. _background:
  2 | 
  3 | Background
  4 | ==========
  5 | 
  6 | 
  7 | Bayesian inference
  8 | ------------------
  9 | 
 10 | In the Bayesian context, one is often interested to approximate the *posterior distribution* :math:`\mathcal{P}(\theta)\equiv p(\theta\vert d,\mathcal{M})`,
 11 | that is, the probability distribution of the parameters :math:`\theta` given the data :math:`d`
 12 | and the model :math:`\mathcal{M}`. This is given by Bayes' theorem:
 13 | 
 14 | .. math::
 15 |     p(\theta\vert d,\mathcal{M})= \frac{p(d\vert \theta,\mathcal{M})p(\theta\vert\mathcal{M})}{p(d\vert\mathcal{M})}
 16 | 
 17 | where
 18 | 
 19 | .. math::
 20 |     \mathcal{L}(\theta) \equiv p(d\vert \theta,\mathcal{M})
 21 | 
 22 | is the *likelihood function*,
 23 | 
 24 | .. math::
 25 |     \pi(\theta) \equiv p(\theta\vert\mathcal{M})
 26 | 
 27 | is the *prior probability density*, and
 28 | 
 29 | .. math::
 30 |     \mathcal{Z} \equiv p(d\vert\mathcal{M})
 31 | 
 32 | is the so called *model evidence* or *marginal likelihood*.
 33 | 
 34 | Parameter estimation
 35 | ^^^^^^^^^^^^^^^^^^^^
 36 | 
 37 | The task of parameter estimation consists of finding the probability distribution of the parameters :math:`\theta`
 38 | of a model :math:`\mathcal{M}` given some data :math:`d`. In practice this is achieved by approximating the 
 39 | posterior distribution by a collection of *samples*. The distribution of these samples can then be used to 
 40 | approximate various expectation values (e.g. mean, median, standard deviation, credible intervals, 1-D and 
 41 | 2-D marginal posteriors etc.)
 42 | 
 43 | .. math::
 44 |     \mathbb{E}_{\mathcal{P}(\theta)}\left[ f(\theta)\right] \equiv \int f(\theta) \mathcal{P}(\theta) d\theta = \sum_{i=1}^{n}f(\theta_{i})
 45 | 
 46 | as sums over the samples drawn from the posterior
 47 | 
 48 | .. math::
 49 |     \theta_{i} \sim \mathcal{P}(\theta)
 50 | 
 51 | Model comparison
 52 | ^^^^^^^^^^^^^^^^
 53 | 
 54 | For the task of Bayesian model comparison, one is interested in the ratio of posterior probabilities of models
 55 | :math:`\mathcal{M}_{i}` and :math:`\mathcal{M}_{j}`, given by
 56 | 
 57 | .. math::
 58 |     \frac{p(\mathcal{M}_{i}\vert d)}{p(\mathcal{M}_{j}\vert d)} = \frac{p(d\vert\mathcal{M}_{i})}{p(d\vert\mathcal{M}_{j})} \times \frac{p(\mathcal{M}_{i})}{p(\mathcal{M}_{j})}
 59 | 
 60 | where the first term on the right-hand-side is the so called *Bayes factor* and the second term is the ratio of
 61 | prior probabilities of the two models. The latter is often set to 1 (i.e. no model is preferred a priori). The
 62 | Bayes factor on the other hand is simply the ratio of the model evidences of the two models, or
 63 | 
 64 | .. math::
 65 |     BF_{ij} \equiv \frac{p(d\vert\mathcal{M}_{i})}{p(d\vert\mathcal{M}_{j})} = \frac{\mathcal{Z}_{i}}{\mathcal{Z}_{j}}
 66 | 
 67 | 
 68 | Preconditioned Monte Carlo
 69 | --------------------------
 70 | 
 71 | The Preconditioned Monte Carlo (PMC) algorithm is a variant of the Persistent Sampling (PS) framework, which is a generalization
 72 | of the Sequential Monte Carlo (SMC) algorithm. The PMC algorithm is designed to sample from a sequence of probability distributions
 73 | :math:`\mathcal{P}_{t}(\theta)`, where the target distribution :math:`\mathcal{P}_{t}(\theta)` is defined by
 74 | 
 75 | .. math::
 76 |     \mathcal{P}_{t}(\theta) \propto \mathcal{L}(\theta)^{\beta_{t}}\pi(\theta)
 77 | 
 78 | where :math:`\mathcal{L}(\theta)` is the likelihood function and :math:`\pi(\theta)` is the prior probability density. The effective
 79 | inverse temperature parameter :math:`\beta_{t}` is initialized to 0 and is gradually increased to 1. When :math:`\beta_{t}=0`, the target
 80 | distribution is the prior distribution, and when :math:`\beta_{t}=1`, the target distribution is the posterior distribution. The inverse
 81 | temperature parameter is increased in each iteration by a small step size :math:`\Delta\beta` until it reaches 1. The :math:`\Delta\beta`
 82 | is computed adaptively in each iteration to ensure PMC maintains a constant number of effective particles. In each iteration, the PMC 
 83 | algorithm samples from the target distribution :math:`\mathcal{P}_{t}(\theta)` using a set of particles by applying a sequence of three steps:
 84 | 
 85 | 1. **Reweighting**: The particles are reweighted to target the distribution :math:`\mathcal{P}_{t}(\theta)`.
 86 | 2. **Resampling**: The particles are resampled according to their weights to ensure that the effective number of particles is constant.
 87 | 3. **Mutation**: The particles are mutated by applying a number of MCMC.
 88 | 
 89 | The PMC algorithm terminates when the inverse temperature parameter reaches 1. The samples obtained from the PMC algorithm can be used to
 90 | approximate the posterior distribution of the parameters :math:`\theta` given the data :math:`d` and the model :math:`\mathcal{M}`. The PMC
 91 | algorithm is particularly useful for sampling from high-dimensional and multimodal posterior distributions. Furthemore, the PMC algorithm
 92 | offers an estimate of the logarithm of the model evidence :math:`\log\mathcal{Z}` which can be used for Bayesian model comparison.
 93 | 
 94 | The high sampling efficiency and robustness of the PMC algorithm is derived by three key features:
 95 | 
 96 | 1. **Persistent Sampling**: The PMC algorithm maintains a set of particles throughout the entire run of the algorithm. This allows the PMC
 97 |    algorithm to reuse the particles from previous iterations to sample from the target distribution in the current iteration. This is particularly
 98 |    useful when the target distribution changes smoothly from one iteration to the next.
 99 | 2. **Normalizing Flow Preconditioning**: The PMC algorithm uses a normalizing flow to precondition each target distribution :math:`\mathcal{P}_{t}(\theta)`.
100 |    The normalizing flow is a sequence of invertible transformations that maps a simple distribution to the target distribution. The normalizing
101 |    flow is trained to approximate the target distribution using a set of particles. Sampling in the target distribution is then performed by
102 |    sampling from the simple distribution and applying the inverse of the normalizing flow. The normalizing flow preconditioning allows the PMC
103 |    algorithm to sample from complex and multimodal target distributions.
104 | 3. **t-preconditioned Crank-Nicolson**: The PMC algorithm uses a t-preconditioned Crank-Nicolson integrator to evolve the particles in the target
105 |    distribution. The t-preconditioned Crank-Nicolson algorithm is an MCMC method that scales well with the dimensionality of the target distribution.
106 |    For targets that are close to Gaussian, the t-preconditioned Crank-Nicolson algorithm is particularly efficient and can scale to very high dimensions.
107 |    For non-Gaussian targets (e.g., multimodal distributions), the t-preconditioned Crank-Nicolson algorithm can be combined with the normalizing flow
108 |    preconditioning to sample from the target distribution efficiently even in high dimensions.
109 | 
110 | Unlike traditional samplers that rely on Random-walk Metropolis, Slice Sampling, Rejection Sampling, Importance Sampling, or Independence Metropolis, PMC
111 | can scale to high-dimensions without desolving into random-walk behavior.


--------------------------------------------------------------------------------
/docs/source/blobs.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Blobs"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "Sometimes it is useful to store additional metadata or derived parameters during the run so that the user does not re-compute them afterwards. This can be achieved easily in ``pocoMC`` using the *blobs* framework, inspired by ``zeus`` and ``emcee`` samplers.\n",
15 |     "\n",
16 |     "Any additional derived parameters can be returned by the log-likelihood function. The dtypes of these derived parameters should be defined using the ``blobs_dtype`` argument of the ``Sampler`` class.\n",
17 |     "\n",
18 |     "For instance, for a Gaussian likelihood (with zero mean and unit variance in 5D) where we want to store the median value of the parameters and the number of positive parameters, we would do something like this:"
19 |    ]
20 |   },
21 |   {
22 |    "cell_type": "code",
23 |    "execution_count": 1,
24 |    "metadata": {},
25 |    "outputs": [
26 |     {
27 |      "name": "stderr",
28 |      "output_type": "stream",
29 |      "text": [
30 |       "Iter: 25it [01:11,  2.84s/it, calls=11264, beta=1, logZ=-8.23, ESS=3.84e+3, acc=0.857, steps=2, logP=-15.1, eff=0.93]"
31 |      ]
32 |     },
33 |     {
34 |      "name": "stdout",
35 |      "output_type": "stream",
36 |      "text": [
37 |       "Median:  [ 0.28968189 -2.48432868  1.40069292]\n",
38 |       "Number of positive parameters:  [3 1 4]\n"
39 |      ]
40 |     },
41 |     {
42 |      "name": "stderr",
43 |      "output_type": "stream",
44 |      "text": [
45 |       "\n"
46 |      ]
47 |     }
48 |    ],
49 |    "source": [
50 |     "import numpy as np\n",
51 |     "from scipy.stats import norm\n",
52 |     "import pocomc as pc\n",
53 |     "\n",
54 |     "prior = pc.Prior(5*[norm(0,5)])\n",
55 |     "\n",
56 |     "def log_likelihood(x):\n",
57 |     "    return -0.5 * np.dot(x,x), np.median(x), np.sum(x>0, dtype=int)\n",
58 |     "\n",
59 |     "sampler = pc.Sampler(prior,\n",
60 |     "                    log_likelihood,\n",
61 |     "                    blobs_dtype=[('median', float), ('n_positive', int)],\n",
62 |     "                    )\n",
63 |     "\n",
64 |     "sampler.run()\n",
65 |     "\n",
66 |     "samples, weights, logl, logp, blobs = sampler.posterior(return_blobs=True)\n",
67 |     "\n",
68 |     "print(\"Median: \", blobs[\"median\"][:3])\n",
69 |     "print(\"Number of positive parameters: \", blobs[\"n_positive\"][:3])\n"
70 |    ]
71 |   }
72 |  ],
73 |  "metadata": {
74 |   "kernelspec": {
75 |    "display_name": "dev-env",
76 |    "language": "python",
77 |    "name": "python3"
78 |   },
79 |   "language_info": {
80 |    "codemirror_mode": {
81 |     "name": "ipython",
82 |     "version": 3
83 |    },
84 |    "file_extension": ".py",
85 |    "mimetype": "text/x-python",
86 |    "name": "python",
87 |    "nbconvert_exporter": "python",
88 |    "pygments_lexer": "ipython3",
89 |    "version": "3.10.13"
90 |   }
91 |  },
92 |  "nbformat": 4,
93 |  "nbformat_minor": 2
94 | }
95 | 


--------------------------------------------------------------------------------
/docs/source/checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Checkpointing"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "A useful option, especially for long runs, is to be able to store the state of ``pocoMC`` in a file and also the to use\n",
 15 |     "that file in order to later continue the same run. This can help avoid disastrous situations in which a run is interrupted\n",
 16 |     "or terminated prematurely (e.g. due to time limitation in computing clusters or possible crashes).\n",
 17 |     "\n",
 18 |     "Fortunately, ``pocoMC`` offers both options to save and load a previous state of the sampler."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "## Save\n",
 26 |     "\n",
 27 |     "In order to save the state of the sampler during the run, one has to specify how often to save the state in a file. This is\n",
 28 |     "done using the ``save_every`` argument in the ``run`` method. The default is ``save_every=None`` which means that no state\n",
 29 |     "is saved during the run. If instead we want to store the state of ``pocoMC`` every e.g. ``3`` iterations, we would do\n",
 30 |     "something like:"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "sampler.run(save_every = 3)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "The default directory in which the state files are saved is a folder named ``states`` in the current directory. One can change\n",
 47 |     "this using the ``output_dir`` argument when initialising the sampler (e.g. ``output_dir = \"new_run\"``). By default, the state\n",
 48 |     "files follow the naming convention ``pmc_{i}.state`` where ``i`` is the iteration index. For instance, if ``save_every=3`` was \n",
 49 |     "specified then the ``output_dir`` directory will include the files ``pmc_3.state``, ``pmc_6.state``, etc. One can also change\n",
 50 |     "the label from ``pmc`` to anything else by using the ``output_label`` argument when initialising the sampler (e.g. \n",
 51 |     "``output_label=\"grav_waves\"``)."
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "## Load"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "Loading a previous state of the sampler and resuming the run from that point requires to provide the path to the specific state\n",
 66 |     "file to the ``run`` method using the ``resume_state_path`` argument. For instance, if we want to continue the run from the \n",
 67 |     "``pmc_3.state`` which is in the ``states`` directory, we would do:"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "sampler.run(resume_state_path = \"states/pmc_3.state\")"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "## Load and Add More Samples\n",
 84 |     "\n",
 85 |     "It is possible to add more samples to a finished run. This is useful when one wants to experiment with *small* runs until they get their analysis right, and then increase the number of required posterior samples to get publication-quality results. When ``save_every`` is not ``None``, pocoMC will save a *final* file when sampling is done. By default, this is called ``pmc_final.state``. We can load this state and change the termination criteria in order to add more samples, as follows:"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "sampler.run(n_total=16384, # This is the number of samples we want to draw in total, including the ones we already have.\n",
 95 |     "            n_evidence=16384, # This is the number of samples we want to draw for the evidence estimation.\n",
 96 |     "            resume_state_path = \"states/pmc_final.state\")"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "In this case, we chose to terminate sampling when the total ESS exceeds ``n_total=16384``, which is higher than the default value of ``n_total=4096``. Furthermore, we also provided a higher number of samples used for the evidence estimation. This means that the new evidence estimate will be more accurate than the original. However, could have chose to set ``n_evidence=0`` and only added more posterior samples."
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": []
110 |   }
111 |  ],
112 |  "metadata": {
113 |   "language_info": {
114 |    "name": "python"
115 |   }
116 |  },
117 |  "nbformat": 4,
118 |  "nbformat_minor": 2
119 | }
120 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('./../../'))
16 | 
17 | import pocomc
18 | 
19 | 
20 | # -- Project information -----------------------------------------------------
21 | 
22 | project = 'pocoMC'
23 | copyright = '2022-2024, Minas Karamanis'
24 | author = 'Minas Karamanis'
25 | 
26 | # The full version, including alpha/beta/rc tags
27 | release = pocomc.__version__
28 | 
29 | 
30 | # -- General configuration ---------------------------------------------------
31 | 
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = ["sphinx.ext.autodoc",
36 |     "sphinx.ext.mathjax",
37 |     "sphinx.ext.napoleon",
38 |     "myst_nb",
39 | ]
40 | 
41 | master_doc = "index"
42 | 
43 | myst_enable_extensions = ["dollarmath", "colon_fence"]
44 | 
45 | source_suffix = {
46 |     ".rst": "restructuredtext",
47 |     ".ipynb": "myst-nb",
48 | }
49 | 
50 | # Add any paths that contain templates here, relative to this directory.
51 | templates_path = ['_templates']
52 | 
53 | # List of patterns, relative to source directory, that match files and
54 | # directories to ignore when looking for source files.
55 | # This pattern also affects html_static_path and html_extra_path.
56 | exclude_patterns = []
57 | 
58 | 
59 | # -- Options for HTML output -------------------------------------------------
60 | 
61 | # The theme to use for HTML and HTML Help pages.  See the documentation for
62 | # a list of builtin themes.
63 | #
64 | #html_theme = "sphinx_book_theme"
65 | #html_theme = "sphinx_rtd_theme"
66 | #html_theme = 'pydata_sphinx_theme'
67 | #html_theme = 'sphinx_material'
68 | html_theme = "furo"
69 | html_title = "pocoMC"
70 | html_logo = "./../../logo.png"
71 | logo_only = True
72 | 
73 | html_theme_options = {
74 |     #"logo_only" : True,
75 |     #'collapse_navigation': True, 
76 |     #'navigation_depth': 4,
77 |     "announcement": (
78 |         "⚠️ The new release 1.1.0 includes major performance and quality-of-life updates. Please check the new syntax and features! ⚠️"
79 |     ),
80 |     'sidebar_hide_name': True,
81 | }
82 | 
83 | nb_execution_mode = "off"
84 | 
85 | # Add any paths that contain custom static files (such as style sheets) here,
86 | # relative to this directory. They are copied after the builtin static files,
87 | # so a file named "default.css" will overwrite the builtin "default.css".
88 | #html_static_path = ['_static']
89 | 


--------------------------------------------------------------------------------
/docs/source/faq.rst:
--------------------------------------------------------------------------------
  1 | .. _faq:
  2 | 
  3 | FAQ
  4 | ===
  5 | 
  6 | This page contains a collection of common questions and answers regarding
  7 | the use of ``pocoMC``.
  8 | 
  9 | What is the philosophy behind pocoMC?
 10 | -------------------------------------
 11 | The philosophy behind pocoMC is to provide a simple, easy-to-use, and efficient
 12 | sampler for Bayesian inference. The sampler is designed to be easy to use and
 13 | requires minimal tuning, while still providing efficient sampling for a wide
 14 | range of problems. The sampler is designed to be a drop-in replacement for
 15 | nested sampling, providing similar functionality with improved efficiency. The
 16 | main target audience for pocoMC is researchers who want to perform Bayesian
 17 | inference on complex models, but do not want to spend a lot of time tuning the
 18 | sampler. Furthermore, pocoMC recognizes that many researchers are not experts in
 19 | Bayesian inference, and aims to provide a simple and intuitive interface for
 20 | performing Bayesian inference. Finally, pocoMC is designed to be efficient and tailored
 21 | to computationally expensive models that often arise in scientific research and engineering.
 22 | 
 23 | 
 24 | How does pocoMC compare to other samplers?
 25 | ------------------------------------------
 26 | pocoMC is a novel sampler that achieves the efficiency of gradient-based samplers
 27 | (e.g., Hamiltonian Monte Carlo) without requiring gradients. pocoMC uses a normalizing
 28 | flow to map the target distribution to a simple distribution, and then applies
 29 | t-preconditioned Crank-Nicolson to sample from the simple distribution. This allows
 30 | pocoMC to explore the target distribution more efficiently than traditional samplers
 31 | such as Metropolis-Hastings and nested sampling. In practice, we have found that pocoMC
 32 | often outperforms gradient-based samplers in terms of efficiency and robustness, especially
 33 | for complex target distributions. Compared to nested sampling, pocoMC generally requires 
 34 | fewer iterations to go from the prior to the posterior, and is often more efficient in terms
 35 | of computational cost. However, for low dimensional problems or simple target distributions,
 36 | nested sampling may still be a good choice. In general, pocoMC is a versatile and efficient
 37 | sampler that is well-suited for a wide range of problems.
 38 | 
 39 | 
 40 | What is Preconditioned Monte Carlo?
 41 | -----------------------------------
 42 | Preconditioned Monte Carlo (PMC) is a general framework for sampling from complex target distributions
 43 | using simple distributions. The idea behind PMC is to use Persistent Sampling (i.e., a generalization of
 44 | Sequential Monte Carlo) combined with normalizing flow preconditioning and a novel gradient-free Markov
 45 | kernel called t-preconditioned Crank-Nicolson. The normalizing flow is used to map the target distribution
 46 | to a simple distribution, and then t-preconditioned Crank-Nicolson is used to sample from the simple distribution.
 47 | Persistent sampling is used to maintain a set of active particles that explore the target distribution efficiently, 
 48 | starting from the prior and gradually moving towards the posterior. The combination of normalizing flow preconditioning,
 49 | t-preconditioned Crank-Nicolson, and persistent sampling allows PMC to efficiently explore complex target distributions
 50 | without requiring gradients. PMC is a general framework that can be applied to a wide range of problems, and pocoMC is
 51 | an implementation of PMC that is tailored to Bayesian inference in science and engineering.
 52 | 
 53 | 
 54 | Does the sampler scale well to high dimensions?
 55 | -----------------------------------------------
 56 | Yes, the sampler scales well to high dimensions. The sampler uses a normalizing
 57 | flow to scale to high dimensions. The normalizing flow is a bijective transformation
 58 | that maps a simple distribution to a complex distribution. By turning the complex target 
 59 | distribution into a simple distribution, the sampler can take advantage of the symmetries
 60 | of the simple distribution to explore the target distribution more efficiently even in
 61 | high dimensions. Training the normalizing flow in high dimensions requires an increased
 62 | number of particles, which in turn can increase the computational cost. This means that 
 63 | in high-dimensional problems, there are some dimininishing returns in terms of the number
 64 | of particles used. However, we were able to sample from targets with more than 100 dimensions
 65 | very efficiently, often outperforming gradient-based samplers.
 66 | 
 67 | 
 68 | Does the sampler use gradients to scale to high dimensions?
 69 | -----------------------------------------------------------
 70 | No, the sampler does not use gradients to scale to high dimensions. The way that pocoMC is able to 
 71 | scale to high dimensions is by taking advantage of the geometry of the target distribution. The sampler
 72 | uses a normalizing flow to map the target distribution to a simple distribution. Then, instead of applying
 73 | gradient-based samplers (e.g., Hamiltonian Monte Carlo) to the target distribution, the sampler applies
 74 | t-preconditioned Crank-Nicolson to the simple distribution. This method is able to scale to extremely high
 75 | dimensions, often outperforming gradient-based samplers, assuming that the target distribution can be
 76 | efficiently mapped to a simple distribution using a normalizing flow.
 77 | 
 78 | 
 79 | Is the normalizing flow used as an emulator for the posterior? 
 80 | --------------------------------------------------------------
 81 | No, the normalizing flow is used as a preconditioner, meaning that it is used to transform the target distribution
 82 | into a simple distribution. The normalizing flow is not used as an emulator for the posterior. The sampler still 
 83 | samples from the target distribution, but it does so by sampling from the simple distribution and then transforming
 84 | the samples back to the target distribution using the inverse of the normalizing flow. This allows the sampler to
 85 | take advantage of the symmetries of the simple distribution to explore the target distribution more efficiently.
 86 | 
 87 | 
 88 | When does the sampling terminate?
 89 | ---------------------------------
 90 | The sampling terminates when the effective inverse temperature parameter ``beta`` reaches 1.0 and the effective 
 91 | sample size (ESS) exceeds the predefined threshold (``n_total=4096`` by default). The effective inverse temperature 
 92 | parameter ``beta`` is a measure of how close the sampler is to the posterior distribution.
 93 | 
 94 | 
 95 | Can I use pocoMC to sample from a target distribution without normalizing flow preconditioning?
 96 | -----------------------------------------------------------------------------------------------
 97 | Yes, you can use pocoMC to sample from a target distribution without normalizing flow preconditioning. In this case,
 98 | the sampler will sample directly from the target distribution using t-preconditioned Crank-Nicolson. This can be useful
 99 | if the target distribution is already simple and does not require normalizing flow preconditioning. However, in general,
100 | we recommend using normalizing flow preconditioning, as it can significantly improve the efficiency of the sampler.
101 | 
102 | 
103 | How many effective particles should I use?
104 | ------------------------------------------
105 | It depends. The number of effective particles that you should use depends on the complexity of the target distribution
106 | and the computational resources available. In general, we recommend using as many effective particles as possible, as this
107 | will improve the efficiency of the sampler. However, the number of effective particles that you can use is limited by the
108 | computational resources available. In practice, we have found that using 512 effective particles is often sufficient
109 | to sample from most target distributions efficiently. However, you may need to experiment with different numbers of 
110 | effective particles to find the optimal number for your problem.
111 | 
112 | 
113 | How many active particles should I use?
114 | ---------------------------------------
115 | No more than half of the effective particles. The number of active particles that you should use depends on the number of
116 | effective particles that you are using. In general, we recommend using no more than half of the effective particles as active
117 | particles. For example, if you are using 512 effective particles, then you should use no more than 256 active particles. Using
118 | more active particles introduces correlations between the particles, which can reduce the efficiency of the sampler.
119 | 
120 | 
121 | How do I know if the sampler is working correctly?
122 | --------------------------------------------------
123 | There are several ways to check if the sampler is working correctly. One way is to run the sampler with two sets of settings,
124 | one more conservative than the other. If the results are consistent between the two runs, then the sampler is likely working
125 | correctly. For instance, you can run the sampler with 512 effective particles and 256 active particles, and then run the sampler
126 | with 256 effective particles and 128 active particles. If the results are consistent between the two runs, then the sampler is
127 | likely working correctly.
128 | 
129 | 
130 | Are there any indications that the sampler is not working correctly?
131 | --------------------------------------------------------------------
132 | Yes, there are a few indications that the sampler is not working correctly. One indication is that the acceptance rate of the 
133 | Markov kernel is too low. If the acceptance rate is too low, then the sampler is not exploring the target distribution efficiently.
134 | Under normal circumstances, the acceptance rate (``acc`` in the progress bar) should be around 0.2-0.8. Another indication that the 
135 | sampler is not working correctly is that the efficiency of the sampler is too low. If the efficiency of the sampler is too low, then 
136 | the sampler is not exploring the target distribution efficiently. Under normal circumstances, the efficiency (``eff`` in the progress 
137 | bar) of the sampler should be around 0.1-1.0. Finally, another indication that the sampler is not working correctly is that the samples 
138 | are not consistent between runs. If the samples are not consistent between runs, then the sampler is not exploring the target distribution efficiently.
139 | 
140 | 
141 | Where does the name pocoMC come from?
142 | -------------------------------------
143 | The name pocoMC comes from the Spanish and Italian word "poco", which means "little" or "few". The name pocoMC was chosen because the sampler
144 | uses a small number of particles to explore the target distribution efficiently. The name pocoMC is also a play on the word "poco",
145 | which shares some common sounds with the word "preconditioned". Finally, the name was inspired by the name of the developer's cat, Poco.


--------------------------------------------------------------------------------
/docs/source/flow.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Normalizing Flow"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Normalizing Flow Preconditioning\n",
 15 |     "\n",
 16 |     "The source of the high sampling of efficiency and flexibility of ``pocoMC`` is its advanced preconditioning strategy. Preconditioning is Preconditioning is a technique used to make hard problems easier to solve. The main idea is to transform the original problem into a new one that is easier to solve. When the problem is a sampling problem (e.g., sampling from a probability distribution), preconditioning can be used to transform the original distribution into a new one that is easier to sample from (e.g., a distribution that is closer to the normal distribution). \n",
 17 |     "\n",
 18 |     "To transform an arbitrary, often complex, probability distribution into a simple one, we need to define a flexible invertible transformation that can be applied to the complex distribution. This transformation is called the normalizing flow. The normalizing flow is a sequence of invertible transformations that map a simple distribution to a complex distribution. The normalizing flow is a powerful tool for generative modeling, density estimation, and variational inference. \n",
 19 |     "\n",
 20 |     "``pocoMC`` supports a plethora of normalizing flows implemented through the ``zuko`` package. The user can\n",
 21 |     "choose either a predefined flow or define their own flow."
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "## Predefined Flows\n",
 29 |     "\n",
 30 |     "The predefined flows are of two types: \n",
 31 |     "\n",
 32 |     "1) **Masked Autoregressive Flows (MAF):** Masked Autoregressive Flow (MAF) is a type of normalizing flow that utilizes autoregressive models to parameterize the transformation from a simple base distribution to a more complex target distribution. It achieves this by applying a series of invertible transformations, each conditioned on previous variables in an autoregressive manner. The main advantage of MAF is its ability to efficiently compute the log-likelihood of the transformed data due to its autoregressive structure, which allows for exact likelihood evaluation. This makes MAF particularly useful for density estimation and generative modeling tasks where likelihood-based training is crucial.\n",
 33 |     "\n",
 34 |     "2) **Neural Spline Flows (NSF):** Neural Spline Flow (NSF) extends the concept of normalizing flows by using neural networks to parameterize piecewise monotonic rational quadratic splines as the invertible transformations. These splines provide a flexible way to model complex distributions while ensuring smooth and differentiable transformations. NSF combines the expressive power of neural networks with the efficiency of spline-based transformations, allowing for efficient sampling and exact likelihood computation. This makes NSF particularly effective for modeling high-dimensional data with complex, multimodal distributions, enhancing the flexibility and accuracy of normalizing flow-based generative models.\n",
 35 |     "\n",
 36 |     "The predefined MAF and NSF flows are ``'maf3'``, ``'maf6'``, ``'maf12'``, ``'nsf3'``, ``'nsf6'``, and ``'nsf12'``. By default, ``pocoMC`` uses the ``'nsf6'`` flow, meaning a Neural Spline Flow with 6 transformations. This balances flexibility and computational cost. The user can change the flow by setting the ``flow`` parameter in the ``pocoMC`` ``Sampler`` class as follows:"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "sampler = pc.Sampler(prior, likelihood, flow='maf12')"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "## Custom Flows\n",
 53 |     "\n",
 54 |     "The user can also define their own normalizing flow. This is done by creating a flow using the ``zuko`` package and passing it to the ``Sampler`` class. For example, the following code creates a MAF flow with 10 transformations, 3-layered neural networks, 128 hidden units per layer, and residual connections between layers:"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "import zuko\n",
 64 |     "\n",
 65 |     "flow = zuko.flows.MAF(n_dim, # Number of dimensions of the posterior\n",
 66 |     "                      transforms=10, \n",
 67 |     "                      hidden_features=[128] * 3,\n",
 68 |     "                      residual=True,)\n",
 69 |     "\n",
 70 |     "sampler = pc.Sampler(prior, likelihood, flow=flow)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "The advantage of defining a custom flow is that the user can tailor the flow to their specific problem. The disadvantage\n",
 78 |     "is that the user must have a good understanding of the normalizing flow architecture and how it affects the sampling\n",
 79 |     "process. The predefined flows are designed to be flexible and easy to use, so the user should only define a custom flow\n",
 80 |     "if they have a good reason to do so."
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "## Training\n",
 88 |     "\n",
 89 |     "### Training Configuration\n",
 90 |     "\n",
 91 |     "The flow is trained in each iteration of the sampler automatically. The training process is quite quick\n",
 92 |     "due to the fact that the flow is not trained from scratch in each iteration, but rather the training is\n",
 93 |     "continued from the previous iteration. The user can control the training configuration by passing a dictionary\n",
 94 |     "with the desired configuration to the `train_config` argument of the `Sampler` class. The default configuration\n",
 95 |     "is:"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "train_config = dict(validation_split=0.5, # Fraction of the data to use for validation\n",
105 |     "                    epochs=5000, # Maximum number of epochs to train for\n",
106 |     "                    batch_size=np.minimum(n_effective//2, 512), # Batch size\n",
107 |     "                    patience=n_dim, # Number of epochs to wait before early stopping\n",
108 |     "                    learning_rate=1e-3, # Learning rate\n",
109 |     "                    annealing=False, # Whether to use a learning rate schedule\n",
110 |     "                    gaussian_scale=None, # Standard deviation of the Gaussian prior on the weights used for regularization\n",
111 |     "                    laplace_scale=None, # Scale of the Laplace prior on the weights used for regularization\n",
112 |     "                    noise=None, # Standard deviation of the Gaussian noise added to the input data\n",
113 |     "                    shuffle=True, # Whether to shuffle the data before training\n",
114 |     "                    clip_grad_norm=1.0, # Maximum norm of the gradient\n",
115 |     "                    verbose=0, # Verbosity level\n",
116 |     "                    )"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "We do not recommend changing the default training configuration unless you are familiar with the training process and the impact of the hyperparameters on the results. The default configuration is designed to work well for a wide range of problems. If you do want to change the configuration, we recommend starting with the default values and only changing one hyperparameter at a time.\n",
124 |     "\n",
125 |     "### Training Frequency\n",
126 |     "\n",
127 |     "The normalizing flow is not always trained in every iteration. Instead, this is controlled by the ``train_frequency`` parameter. By default, the value of this parameter is ``None`` and the training frequency is determined by the number of effective and active particles respectively as follows:\n",
128 |     "\n",
129 |     "$$\n",
130 |     "f = \\max\\left( \\frac{n_{\\text{effective}}}{2\\times n_{\\text{active}}} , 1\\right)\n",
131 |     "$$\n",
132 |     "\n",
133 |     "This means that for the default values ``n_effective=512`` and ``n_active=256``, we train the flow every iteration. However, for larger number of effective particles, or equivalently smaller number of active particles, the normalizing flow is trained more sparsely.\n",
134 |     "\n",
135 |     "The user can also enter an integer value to ``train_frequency`` to specify exactly how often training occurs. The only exemption is when ``beta=1.0``, when training occurs in every iteration."
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": []
142 |   }
143 |  ],
144 |  "metadata": {
145 |   "language_info": {
146 |    "name": "python"
147 |   }
148 |  },
149 |  "nbformat": 4,
150 |  "nbformat_minor": 2
151 | }
152 | 


--------------------------------------------------------------------------------
/docs/source/images/advanced_corner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minaskar/pocomc/77cbe330f00fe1d258d6db36c15aa959abdb83e1/docs/source/images/advanced_corner.png


--------------------------------------------------------------------------------
/docs/source/images/advanced_run.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minaskar/pocomc/77cbe330f00fe1d258d6db36c15aa959abdb83e1/docs/source/images/advanced_run.png


--------------------------------------------------------------------------------
/docs/source/images/advanced_trace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minaskar/pocomc/77cbe330f00fe1d258d6db36c15aa959abdb83e1/docs/source/images/advanced_trace.png


--------------------------------------------------------------------------------
/docs/source/images/logo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minaskar/pocomc/77cbe330f00fe1d258d6db36c15aa959abdb83e1/docs/source/images/logo.gif


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
  1 | |
  2 | 
  3 | .. title:: pocoMC documentation
  4 | 
  5 | .. figure:: ./images/logo.gif
  6 |     :scale: 100 %
  7 |     :align: center
  8 | 
  9 | |
 10 | 
 11 |     ``pocoMC`` is a Python package for fast Bayesian posterior and model evidence estimation. It leverages 
 12 |     the Preconditioned Monte Carlo (PMC) algorithm, offering significant speed improvements over 
 13 |     traditional methods like MCMC and Nested Sampling. Ideal for large-scale scientific problems 
 14 |     with expensive likelihood evaluations, non-linear correlations, and multimodality, ``pocoMC`` 
 15 |     provides efficient and scalable posterior sampling and model evidence estimation. Widely used 
 16 |     in cosmology and astronomy, ``pocoMC`` is user-friendly, flexible, and actively maintained.
 17 | 
 18 | .. admonition:: Where to start?
 19 |     :class: tip
 20 | 
 21 |     🖥 A good place to get started is with the :doc:`install` and then the
 22 |     :doc:`quickstart` guide. If you are not familiar with Bayesian inference
 23 |     have a look at the :doc:`background`.
 24 | 
 25 |     📖 For more details, check out the :doc:`likelihood` through :doc:`blobs` information, 
 26 |     as well as the :doc:`fitting` and :doc:`model_comparison` tutorials.
 27 | 
 28 |     💡 If you're running into problems getting ``pocoMC`` to do what you want, first
 29 |     check out the :doc:`faq` page, for some general tips and tricks.
 30 | 
 31 |     🐛 If :doc:`faq` doesn't solve your problems, or if you find bugs,
 32 |     then head on over to the `GitHub issues page <https://github.com/minaskar/pocomc/issues>`_.
 33 | 
 34 |     👈 Check out the sidebar to find the full table of contents.
 35 | 
 36 | 
 37 | 
 38 | .. toctree::
 39 |    :hidden:
 40 |    :maxdepth: 1
 41 |    :caption: User Guide:
 42 | 
 43 |    install
 44 |    quickstart.ipynb
 45 |    likelihood.ipynb
 46 |    priors
 47 |    sampling.ipynb
 48 |    results
 49 |    parallelization.ipynb
 50 |    flow.ipynb
 51 |    checkpoint.ipynb
 52 |    blobs.ipynb
 53 | 
 54 | 
 55 | .. toctree::
 56 |    :hidden:
 57 |    :maxdepth: 1
 58 |    :caption: Tutorials:
 59 | 
 60 |    fitting
 61 |    model_comparison
 62 | 
 63 | .. toctree::
 64 |    :hidden:
 65 |    :maxdepth: 1
 66 |    :caption: Discussion:
 67 | 
 68 |    background
 69 |    faq
 70 | 
 71 | .. toctree::
 72 |    :hidden:
 73 |    :maxdepth: 1
 74 |    :caption: API Documentation:
 75 | 
 76 |    api
 77 |    GitHub Repository <https://github.com/minaskar/pocomc>
 78 | 
 79 | 
 80 | 
 81 | Attribution & Citation
 82 | ======================
 83 | 
 84 | Please cite the following if you find this code useful in your
 85 | research. The BibTeX entries for the papers are::
 86 | 
 87 |     @article{karamanis2022accelerating,
 88 |         title={Accelerating astronomical and cosmological inference with preconditioned Monte Carlo},
 89 |         author={Karamanis, Minas and Beutler, Florian and Peacock, John A and Nabergoj, David and Seljak, Uro{\v{s}}},
 90 |         journal={Monthly Notices of the Royal Astronomical Society},
 91 |         volume={516},
 92 |         number={2},
 93 |         pages={1644--1653},
 94 |         year={2022},
 95 |         publisher={Oxford University Press}
 96 |     }
 97 | 
 98 |     @article{karamanis2022pocomc,
 99 |         title={pocoMC: A Python package for accelerated Bayesian inference in astronomy and cosmology},
100 |         author={Karamanis, Minas and Nabergoj, David and Beutler, Florian and Peacock, John A and Seljak, Uros},
101 |         journal={arXiv preprint arXiv:2207.05660},
102 |         year={2022}
103 |     }
104 | 
105 | 
106 | Authors & License
107 | =================
108 | 
109 | Copyright 2022-2024 Minas Karamanis and contributors.
110 | 
111 | ``pocoMC`` is free software made available under the ``GPL-3.0 License``.
112 | 
113 | 
114 | Changelog
115 | =========
116 | 
117 | **1.2.6 (20/09/24)**
118 | 
119 | - Removed unnecessary log-likelihood evaluations during evidence estimation
120 | 
121 | **1.2.5 (16/09/24)**
122 | 
123 | - Removed unnecessary log-likelihood evaluations during MCMC sampling.
124 | 
125 | **1.2.4 (28/08/24)**
126 | 
127 | - Fix bug in periodic and reflective parameters.
128 | 
129 | **1.2.3 (27/08/24)**
130 | 
131 | - Added support for periodic and reflective parameters.
132 | - Changed default normalizing flow to ``nsf6``.
133 | 
134 | **1.2.2 (20/06/24)**
135 | 
136 | - Fixed bug in ``posterior`` method related to blobs.
137 | 
138 | **1.2.1 (14/06/24)**
139 | 
140 | - Added support for log-likelihoods that return ``-np.inf`` inside the prior volume.
141 | 
142 | **1.2.0 (11/06/24)**
143 | 
144 | - Added ``MPIPool`` for parallelization.
145 | - Fixed bugs in checkpointing when using MPI in NFS4 and BeeGFS filesystems.
146 | - Automatically save final checkpoint file when finishing the run if ``save_every`` is not ``None``.
147 | - Added option to continue sampling after completing the run.
148 | 
149 | **1.1.0 (31/05/24)**
150 | 
151 | - Fix robustness issues with the Crank-Nicolson sampler.
152 | - Added predefined normalizing flows.
153 | - Added support for derived parameters through the ``blobs`` framework.
154 | - Added ``dynamic`` mode for determining the ESS based on the unique sample size (USS).
155 | - Added internal ``multiprocess`` pool for parallelization.
156 | - Improved documentation and tutorials.
157 | 
158 | **1.0.2 (18/02/24)**
159 | 
160 | - Minor improvements and bug fixes.
161 | 
162 | **1.0.0 (28/01/24)**
163 | 
164 | - First stable release.
165 | - Major refactoring of the code.
166 | - Added support for multiple normalizing flows through ``zuko``.
167 | - Added preconditioned Crank-Nicolson sampler.
168 | - Added support for multilayer SMC.
169 | 
170 | **0.2.2 (22/08/22)**
171 | 
172 | - Fixed bridge sampling estimator.
173 | - Improved likelihood call counter.
174 | 
175 | **0.1.2 (27/07/22)**
176 | 
177 | - Bridge sampling estimator for the model evidence.
178 | - Added probit transform for bounded parameters.
179 | 
180 | **0.1.11 (12/07/22)**
181 | 
182 | - Include saving and loading the state of the sampler. Useful for resuming runs from files.
183 | 
184 | **0.1.0 (12/07/22)**
185 | 
186 | - First version
187 | 


--------------------------------------------------------------------------------
/docs/source/install.rst:
--------------------------------------------------------------------------------
 1 | .. _install:
 2 | 
 3 | Installation
 4 | ============
 5 | 
 6 | Dependencies
 7 | ------------
 8 | 
 9 | **pocoMC** depends on ``numpy``, ``torch``, ``zuko``, ``tqdm``, ``scipy``, ``dill``, and ``multiprocess``.
10 | Optionally, you can install ``mpi4py`` for parallelization using the provided ``MPIPool``.
11 | 
12 | Using pip
13 | ---------
14 | 
15 | The easiest way to install the most recent stable version of ``pocomc`` is
16 | with `pip <http://www.pip-installer.org/>`_:
17 | 
18 | .. code-block:: bash
19 | 
20 |     pip install pocomc
21 | 
22 | 
23 | From source
24 | -----------
25 | 
26 | Alternatively, you can get the source by cloning `the git
27 | repository <https://github.com/minaskar/pocomc>`_:
28 | 
29 | .. code-block:: bash
30 | 
31 |     git clone https://github.com/minaskar/pocomc.git
32 | 
33 | Once you've downloaded the source, you can navigate into the root source
34 | directory and run:
35 | 
36 | .. code-block:: bash
37 | 
38 |     pip install .


--------------------------------------------------------------------------------
/docs/source/likelihood.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Likelihood"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "The first step in a Bayesian analysis is to define the likelihood function. If you are unfamiliar with this term, I suggest you have a look at the Background section."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## Standard Case"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "For practical purposes it is more convenient to work with the log-likelihood function. This is defined as $\\log \\mathcal{L}(\\theta)=p(D\\vert\\theta)$, where $D$ are the data and $\\theta$ are the parameters of the model that we are trying to fit to the data. The (log-)likelihood function is not probability density and its specific form depends on the problem. For the vast majority of cases, the likelihood is assumed to be Gaussian.\n",
 29 |     "\n",
 30 |     "Suppose that we want our *likelihood* function to be a *Gaussian density* with 10 parameters or in 10-D, we would do\n",
 31 |     "something like:"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "import numpy as np\n",
 41 |     "\n",
 42 |     "# Define the dimensionality of our problem.\n",
 43 |     "n_dim = 10\n",
 44 |     "\n",
 45 |     "# Define our 3-D correlated multivariate normal log-likelihood.\n",
 46 |     "C = np.identity(n_dim)\n",
 47 |     "C[C==0] = 0.95\n",
 48 |     "Cinv = np.linalg.inv(C)\n",
 49 |     "lnorm = -0.5 * (np.log(2 * np.pi) * n_dim + np.log(np.linalg.det(C)))\n",
 50 |     "\n",
 51 |     "def log_like(x):\n",
 52 |     "    return -0.5 * np.dot(x, np.dot(Cinv, x)) + lnorm"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "The inclusion of the normalisation factor ``lnorm`` is not strictly necessary as it does not depend on ``x`` and thus does vary. This is a highly artificial scenario with no data or model. For an example of how to fit a model to some data using a Gaussian likelihood visit the Tutorials section."
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "## Additional Arguments\n",
 67 |     "\n",
 68 |     "If the log-likelihood function relies on additional arguments in the sequence ``log_likelihood(x, *args, **kwargs)``, then one can use the ``likelihood_args`` and ``likelihood_kwargs`` when initializing the sampler to provide them."
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "def log_likelihood(x, data, sigma, **kwargs):\n",
 78 |     "    sigma_prime = kwargs.get(\"scale\") * sigma\n",
 79 |     "    return -0.5 * np.sum((x - data)**2 / sigma_prime**2)\n",
 80 |     "\n",
 81 |     "import pocomc as pc\n",
 82 |     "from scipy.stats import norm\n",
 83 |     "\n",
 84 |     "n_dim = 5\n",
 85 |     "prior = pc.Prior(n_dim * [norm(loc=0, scale=10)])\n",
 86 |     "\n",
 87 |     "# Random data and sigma values\n",
 88 |     "data = np.random.randn(n_dim)\n",
 89 |     "sigma = np.ones(n_dim)\n",
 90 |     "\n",
 91 |     "sampler = pc.Sampler(prior, log_likelihood, likelihood_args=(data, sigma), likelihood_kwargs={\"scale\": 1.0})"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "## Vectorization\n",
 99 |     "\n",
100 |     "Sometimes it is possible to define a vectorized log-likelihood function that instead of accepting as input a single set of parameters (i.e., array of shape ``(n_dim,)``) and returning the corresponding scalar value of the natural logarithm of likelihood, it takes as input an array of shape ``(N, n_dim)`` and returns an array of shape ``(N,)``. ``pocoMC`` can take advantage of the vectorization by setting ``vectorize=True``. A simple example for a Gaussian likelihood of zero mean and unit variance is given below:"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "def log_likelihood(x):\n",
110 |     "    return -0.5 * np.sum(x**2, axis=1)\n",
111 |     "\n",
112 |     "import pocomc as pc\n",
113 |     "from scipy.stats import norm\n",
114 |     "\n",
115 |     "prior = pc.Prior(5 * [norm(loc=0, scale=10)])\n",
116 |     "\n",
117 |     "sampler = pc.Sampler(prior, log_likelihood, vectorize=True)"
118 |    ]
119 |   }
120 |  ],
121 |  "metadata": {
122 |   "language_info": {
123 |    "name": "python"
124 |   }
125 |  },
126 |  "nbformat": 4,
127 |  "nbformat_minor": 2
128 | }
129 | 


--------------------------------------------------------------------------------
/docs/source/parallelization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Parallelization"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "If you want to run computations in parallel, ``pocoMC`` can use a user-defined ``pool`` to execute a variety of expensive operations \n",
 15 |     "in parallel rather than in serial. ``pocoMC`` allows for both internal and external parallelization options."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "## Internal Parallelization"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "The simplest way to parallelize ``pocoMC``, especially when running on a single machine (e.g., laptop or single CPU node on an HPC cluster) is to use the internal parallelization offered by ``pocoMC``. This option essentially relies on the ``multiprocess`` package to perform the computation of the likelihood function for all active particles in parallel.\n",
 30 |     "\n",
 31 |     "To achieve this, the user simply has to provide the desired number of CPU processes. This should not exceed the number of available physical CPUs cores (e.g., 12 for a modern MacBook Pro). The number of processes is provided through the ``pool`` argument during the initialization of the sampler class:"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "import pocomc as pc\n",
 41 |     "\n",
 42 |     "sampler = pc.Sampler(prior, log_likelihood, pool=10) # For 10 parallel processes"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "## External Parallelization"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "Alternatively, the user can provide an external pool to use instead of the internal ``multiprocess`` one."
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "### SMP"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "If you have an external shared-memory multiprocessing (SMP) pool that you want to use instead of the internal ``multiprocess`` one, then you can provide it through the ``pool`` argument. For instance, to use the ``multiprocessing`` pool, one would do:"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "from multiprocessing import Pool\n",
 80 |     "\n",
 81 |     "import pocomc as pc\n",
 82 |     "\n",
 83 |     "with Pool(10) as pool:\n",
 84 |     "    sampler = pc.Sampler(prior, log_likelihood, pool=pool) # For 10 parallel processes"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "### MPI"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "When running on a High-Performance Computing (HPC) cluster with multiple nodes of many CPUs each, it may be beneficial to use Message Passing Interface (MPI) parallelization. A simple way to achieve this is using the provided ``MPIPool`` as follows. Please note that you will need to have ``mpi4py`` installed to use this option."
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "import pocomc as pc\n",
108 |     "\n",
109 |     "if __name__ == '__main__':\n",
110 |     "    with pc.parallel.MPIPool() as pool:\n",
111 |     "        sampler = pc.Sampler(prior, log_likelihood, pool=pool)\n",
112 |     "        sampler.run()"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "The above script should be executed via ``mpiexec -n 256 python script.py`` where 256 is the number of processes."
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "markdown",
124 |    "metadata": {},
125 |    "source": [
126 |     "## Notes"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "Since ``numpy`` is doing some internal parallelisation using OpenMP, it is a good idea to limit this to a single CPU when running ``pocoMC`` in  parallel in order to avoid any unwanted interference. To do this, one can deactivate OpenMP manually using the following at the begining of their code:"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "import os\n",
143 |     "\n",
144 |     "os.environ[\"OMP_NUM_THREADS\"] = \"1\""
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "Note also that parallelization incures some non-negligible communication overhead. For most applications, this overhead is minimal and only contributes to small increase in the total run time. However, if the cost of evaluating the likelihood function is really low (i.e., usually less than 10 ms), then the computational overhead may be comparable to that cost. As a result, parallelization is a good idea only when the likelihood function is more expensive than the overhead time."
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": []
158 |   }
159 |  ],
160 |  "metadata": {
161 |   "language_info": {
162 |    "name": "python"
163 |   }
164 |  },
165 |  "nbformat": 4,
166 |  "nbformat_minor": 2
167 | }
168 | 


--------------------------------------------------------------------------------
/docs/source/priors.rst:
--------------------------------------------------------------------------------
  1 | Prior Probability
  2 | =================
  3 | 
  4 | Standard priors
  5 | ---------------
  6 | 
  7 | The next step is to define the *prior* probability distribution. This encodes our knowledge about the parameters of the model
  8 | before we have seen any data.
  9 | 
 10 | ``pocoMC`` offers two ways to define a prior. The first is to utilise ready-made priors from the ``scipy.stats`` package. For instance,
 11 | if we want our prior to be a *uniform* distribution on the interval :math:`[-10,10]` for all 10 of the parameters, we would do::
 12 | 
 13 |     from scipy.stats import uniform
 14 | 
 15 |     prior = pc.Prior(n_dim * [uniform(loc=-10.0, scale=20.0)]) # Uniform prior on [-10,10] for all 10 parameters.
 16 | 
 17 | Suppose now that we want a different prior for each parameter. For instance, we want the first five parameters to have a flat/uniform
 18 | prior :math:`x_{i}\sim\mathcal{U}(-10,10)` for :math:`i=0,1,\dots,4` and the last five to have a Gaussian/normal prior
 19 | with mean :math:`\mu=0` and standard deviation :math:`\sigma=3`, i.e. :math:`x_{i}\sim\mathcal{N}(0,3^{2})` for :math:`i=5,6,\dots,9`.
 20 | We would do::
 21 | 
 22 |     from scipy.stats import uniform, norm
 23 | 
 24 |     prior = pc.Prior([uniform(loc=-10.0, scale=20.0), # Uniform prior on [-10,10] for the first parameter.
 25 |                       uniform(loc=-10.0, scale=20.0), # Uniform prior on [-10,10] for the second parameter.
 26 |                       uniform(loc=-10.0, scale=20.0), # Uniform prior on [-10,10] for the third parameter.
 27 |                       uniform(loc=-10.0, scale=20.0), # Uniform prior on [-10,10] for the fourth parameter.
 28 |                       uniform(loc=-10.0, scale=20.0), # Uniform prior on [-10,10] for the fifth parameter.
 29 |                       norm(loc=0.0, scale=3.0), # Normal prior with mean=0 and std=3 for the sixth parameter.
 30 |                       norm(loc=0.0, scale=3.0), # Normal prior with mean=0 and std=3 for the seventh parameter.
 31 |                       norm(loc=0.0, scale=3.0), # Normal prior with mean=0 and std=3 for the eighth parameter.
 32 |                       norm(loc=0.0, scale=3.0), # Normal prior with mean=0 and std=3 for the ninth parameter.
 33 |                       norm(loc=0.0, scale=3.0), # Normal prior with mean=0 and std=3 for the tenth parameter.
 34 |                      ])
 35 | 
 36 | or simply::
 37 | 
 38 |     from scipy.stats import uniform, norm
 39 | 
 40 |     prior = pc.Prior([uniform(loc=-10.0, scale=20.0)] * 5 + [norm(loc=0.0, scale=3.0)] * 5)
 41 | 
 42 | One is free to use any of the priors available in the ``scipy.stats`` package. For a full list see `here <https://docs.scipy.org/doc/scipy/reference/stats.html>`_.
 43 | 
 44 | Custom priors
 45 | -------------
 46 | 
 47 | The second way to define a prior is to define a class including the ``logpdf`` and ``rvs`` methods and ``dim`` 
 48 | and ``bounds`` attributes. This can be useful when the prior has some conditional/hierarchical structure.
 49 | As an example, let us assume we have a three-parameter model where the prior for the third parameter depends 
 50 | on the values for the first two. This might be the case in, e.g., a hierarchical model where the prior over ``c`` 
 51 | is a Normal distribution whose mean ``m`` and standard deviation ``s`` are determined by a corresponding 
 52 | “hyper-prior”. We can easily set up a prior transform for this model by just going through the variables in order. 
 53 | This would look like::
 54 |     
 55 |         import numpy as np
 56 |         from scipy.stats import norm
 57 |     
 58 |         class CustomPrior:
 59 |             def __init__(self):
 60 |                 self.dim = 3
 61 |                 self.bounds = np.array([[-np.inf, np.inf], 
 62 |                                         [0.0, 10], 
 63 |                                         [-np.inf, np.inf]])
 64 |                 self.hyper_mean = 0.0
 65 |                 self.hyper_std = 3.0
 66 |     
 67 |             def logpdf(self, x):
 68 |                 m, s, c = x
 69 |                 return norm.logpdf(c, loc=m, scale=s)
 70 |     
 71 |             def rvs(self, size=1):
 72 |                 m = np.random.normal(loc=self.hyper_mean, scale=self.hyper_std, size=size)
 73 |                 s = np.random.uniform(low=0.0, high=10.0, size=size)
 74 |                 c = np.random.normal(loc=m, scale=s, size=size)
 75 |                 return np.array([m, s, c]).T
 76 | 
 77 |         prior = CustomPrior()
 78 | 
 79 | 
 80 | Boundary conditions
 81 | -------------------
 82 | 
 83 | By default, ``pocoMC`` assumes that all parameters specified in the prior have hard bounds. In other words, each
 84 | parameter is free to vary in a prespecified range. Anytime a value is proposed by ``pocoMC`` that lies outside of
 85 | this range, it is automatically rejected. This is the desired behavior for most problems, since individual parameters 
 86 | are often either defined everywhere (i.e. from negative infinity to infinity) or over a finite range (e.g., from -1 to 
 87 | +1).
 88 | 
 89 | However, there are problems in which specific parameters may behave differently. ``pocoMC`` supports two such cases:
 90 | 
 91 | - **Periodic boundary conditions**. In this case, ``pocoMC`` assumes that the parameter is periodic. For example, 
 92 |   if the parameter is on the interval ``[0, 2*np.pi]``, then the parameter can be wrapped around to the other side
 93 |   of the interval. This can be useful for phase parameters that might be periodic e.g. on a range ``[0,2*np.pi]``.
 94 | - **Reflective boundary conditions**. In this case, ``pocoMC`` assumes that the parameter is reflective. For example,
 95 |   if the parameter is on the interval ``[0, 1]``, then the parameter can be flipped around to the other side of the
 96 |   interval. This can be useful for parameters that are ratios where ``a/b`` and  ``b/a`` are equivalent.
 97 | 
 98 | Given the above, it is possible to set the ``periodic`` and ``reflective`` attributes of the prior. For example, in 
 99 | a five-parameter model, if we want the first two parameters to be periodic, and the third and fourth to be reflective, 
100 | we would do::
101 | 
102 |     from scipy.stats import uniform, norm
103 | 
104 |     prior = pc.Prior([
105 |         uniform(loc=0.0, scale=2*np.pi), # this parameter is periodic
106 |         uniform(loc=0.0, scale=2*np.pi), # this parameter is periodic
107 |         uniform(loc=0.0, scale=1.0), # this parameter is reflective
108 |         uniform(loc=0.0, scale=1.0), # this parameter is reflective
109 |         norm(loc=0.0, scale=3.0), # this parameter is neither periodic nor reflective
110 |     ])
111 | 
112 |     sampler = pc.Sampler(prior, 
113 |                         loglike, 
114 |                         periodic=[0,1], 
115 |                         reflective=[2,3])
116 | 
117 | As you can see, nothing changes in the definition of the prior. Instead, we just need to provide the indices of the
118 | parameters that should be periodic and reflective to the sampler.


--------------------------------------------------------------------------------
/docs/source/results.rst:
--------------------------------------------------------------------------------
 1 | Results
 2 | =======
 3 | 
 4 | Simple
 5 | ------
 6 | 
 7 | Once the run is complete we can look at the results. This can be done in two ways. The first is to use the ``posterior`` and ``evidence``
 8 | methods of the sampler. For instance, if we want to get the samples from the posterior we would do::
 9 | 
10 |     samples, weights, logl, logp = sampler.posterior()
11 | 
12 | The ``samples`` argument is an array with the samples from the posterior. The ``weights`` argument is an array with the weights of the
13 | samples from the posterior. The ``logl`` argument is an array with the values of the log-likelihood for the samples from the posterior.
14 | The ``logp`` argument is an array with the values of the log-prior for the samples from the posterior.
15 | 
16 | If we want to get samples from the posterior without the weights we would do::
17 | 
18 |     samples, logl, logp = sampler.posterior(resample=True)
19 | 
20 | This resamples the particles and is useful when we want to use the samples for parameter inference and we do not want to deal with the weights.
21 | 
22 | The samples from the posterior can be used for parameter inference. For instance, we can get the mean and standard deviation of the
23 | posterior for each parameter by doing::
24 | 
25 |     mean = np.mean(samples, axis=0)
26 |     std = np.std(samples, axis=0)
27 | 
28 | Or, we can utilize a third-party package such as ``corner`` (`available here <https://corner.readthedocs.io/en/latest/>`_) to plot the posterior samples::
29 | 
30 |     import corner
31 | 
32 |     fig = corner.corner(samples, labels=[f"$x_{i}$" for i in range(n_dim)]); # If we do not want to use the weights.
33 |     # fig = corner.corner(samples, weights=weights, labels=[f"$x_{i}$" for i in range(n_dim)]); # If we want to use the weights.
34 | 
35 | Similarly, we can also get the estimate of the model evidence / marginal likelihood by doing::
36 | 
37 |     logZ, logZerr = sampler.evidence()
38 | 
39 | The ``logZ`` argument is the estimate of the logarithm of the model evidence. The ``logZerr`` argument is the error on the estimate
40 | of the logarithm of the model evidence. The error is estimated using the bootstrap method.
41 | 
42 | Advanced
43 | --------
44 | 
45 | An alternative, and more advanced way, to look at the results is to use the ``results`` dictionary of the sampler, as follows::
46 | 
47 |     results = sampler.results
48 | 
49 | This is a dictionary includes the following keys::
50 | 
51 |     ``u``, ``x``, ``logdetj``, ``logl``, ``logp``, ``logw``, ``blobs``, ``iter``, ``logz``, ``calls``, ``steps``, ``efficiency``, ``ess``, ``accept``, ``beta``.
52 | 
53 | The ``u`` key is an array with the samples from the latent space. The ``x`` key is an array with the samples from the parameter space.
54 | The ``logdetj`` key is an array with the values of the log-determinant of the Jacobian of the normalizing flow for each sample. The ``logl``
55 | key is an array with the values of the log-likelihood for each sample. The ``logp`` key is an array with the values of the log-prior for
56 | each sample. The ``logw`` key is an array with the values of the log-importance weights for each sample. The ``blobs`` key is an array with
57 | the values of the blobs for each sample. Blobs are additional quantities that are computed during the sampling procedure. The ``iter`` key is an array with
58 | the iteration index for each sample. The ``logz`` key is an array with the values of the logarithm of the model evidence for each iteration.
59 | The ``calls`` key is an array with the total number of log-likelihood calls for each iteration. The ``steps`` key is an array with the
60 | number of MCMC steps per iteration. The ``efficiency`` key is an array with the efficiency of the sampling procedure for each iteration.
61 | The ``ess`` key is an array with the effective sample size for each iteration. The ``accept`` key is an array with the acceptance rate
62 | for each iteration. The ``beta`` key is an array with the value of the inverse temperature for each iteration.


--------------------------------------------------------------------------------
/docs/source/sampling.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Sampling"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "This section explores the various options that are available to the user when it comes to the initialization of the sampler and running the sampler."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## Sampler Initialization\n",
 22 |     "\n",
 23 |     "We assume that the user has already defined their prior probability ``prior`` and log-likelihood function ``likelihood``, and if the latter requires any additional arguments (e.g., ``vectorize``, ``likelihood_args``, ``likelihood_kwargs``) to provide to the sampler the user has already done it. Please see the relevant sections in the documentation if you have not done so already.\n",
 24 |     "\n",
 25 |     "### Number of Particles\n",
 26 |     "\n",
 27 |     "By far, the most important parameters that control the performance of ``pocoMC`` are the number of **effective** and **active** particles. These are controlled respectively by the parameters ``n_effective`` and ``n_active``.\n",
 28 |     "\n",
 29 |     "- **Number of Effective Particles (``n_effective``):** Monte Carlo samplers such as ``pocoMC`` represent probability distributions as ensembles of particles. The greater the number of particles, the better the more accurate the representation of the target distribution. Generally, simpler distributions (i.e., close to Gaussian) require low number of particles, whereas complex distributions (i.e., skewed or multimodal) require large number of particles. Unlike *Nested Sampling* methods, *Preconditioned Monte Carlo*, and thus ``pocoMC``, utilize unequally weighted particles with some particles contributing more and some less to the aforementioned representation. As a result, we talk about *effective* particles. We recommend values around ``500`` for most problems and increasing that to ``2000-4000`` particularly challenging distributions. If you are not sure about what value to use, do not worry. This can be determined automatically by ``pocoMC`` (see the *Dynamic Particle Allocation* section below). The default value is ``n_effective=512``.\n",
 30 |     "\n",
 31 |     "- **Number of Active Particles (``n_active``):** ``pocoMC`` propages the effective particles from the prior to the posterior through a series of steps or iterations. However, in each iteration, not all effective particles are updated. Instead, only a subset of those, called *active particles* are updated. Generally, the number of active particles ``n_active`` should be no more than half the number of effective particles ``n_effective`` for best performance. The default value is ``n_active=256``.\n",
 32 |     "\n",
 33 |     "Here is an example of how to manually specify the number of effective and active particles. If only one of them is specified and the other is None, then the other is set automatically."
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "sampler = pc.Sampler(prior, likelihood, n_effective=1024, n_active=512)"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "### Dynamic Particle Allocation\n",
 50 |     "\n",
 51 |     "For most applications, the default number of effective and active particles are more than sufficient to guarantee fast and accurate sampling of the posterior distribution. However, there are cases where the difficulty of the target distribution varies during the run. In this case it is beneficial to dynamically adjust the number of effective particles ``n_effective`` automatically (i.e., increase it when the problem becomes more difficult and decrease it when it becomes easier). This dynamic allocation of effective particles is achieved by setting ``dynamic=True`` in the initialization of the sampler. By default, dynamic allocation is turned on."
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "sampler = pc.Sampler(prior, likelihood, dynamic=True)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "### Markov chain Monte Carlo\n",
 68 |     "\n",
 69 |     "``pocoMC`` relies on Markov chain Monte Carlo (MCMC) methods to diversify the active particles in each iteration. This is the most computationally expensive part of the algorithm and great care has been taken so that it is performed as efficiently as possible. \n",
 70 |     "\n",
 71 |     "``pocoMC`` supports two different Markov kernels that can be specified using the ``sample`` argument:\n",
 72 |     "\n",
 73 |     "- **t-preconditioned Crank-Nicolson (``sample='tpcn'``):** The t-preconditioned Crank-Nicolson (tpCN) MCMC algorithm is an advanced technique designed to improve sampling efficiency in high-dimensional and complex distributions. Unlike traditional methods (e.g, Random-walk Metropolis) where Gaussian perturbations are added directly to the current state, tpCN shifts these perturbations toward higher probability regions, enhancing the algorithm’s scalability to high dimensions. This shift is achieved by combining the current state with a preconditioned perturbation, balancing exploration and stability. The preconditioning step adapts to the structure of the target distribution, ensuring consistent acceptance rates and efficient exploration. Consequently, tpCN provides a robust and effective approach for sampling in high-dimensional spaces, outperforming simpler MCMC methods in such challenging settings.\n",
 74 |     "\n",
 75 |     "- **Random-walk Metropolis (``sample='rwm'``):** The is an MCMC method designed to sample from complex probability distributions. It generates new samples by adding a Gaussian-distributed perturbation to the current state and then accepts or rejects the proposed state based on the Metropolis acceptance criterion, which ensures that the samples converge to the target distribution. Despite its simplicity and broad applicability, the Random-walk Metropolis algorithm scales poorly to high-dimensional spaces. As the dimensionality increases, the probability of accepting proposed moves decreases, leading to inefficient exploration of the target distribution and slow convergence. This makes it less suitable for high-dimensional problems where more sophisticated methods (e.g., t-preconditioned Crank-Nicolson, NUTS, etc.) are often required.\n",
 76 |     "\n",
 77 |     "By default, ``sample='tpcn'`` and the t-preconditioned Crank-Nicolson Markov kernel is utilized. The Random-walk Metropolis kernel is only intended to be used for testing.\n",
 78 |     "\n",
 79 |     "The purpose of the Markov kernel is to diversify the particles and make the equilibrate in each target distribution. This means that in each iteration we need to ensure that MCMC has ran long enough. To do this, ``pocoMC`` monitors the mean (unnormalized) posterior log-probability until it stops increasing for at least ``N`` steps, where ``N`` is given by:\n",
 80 |     "\n",
 81 |     "$$\n",
 82 |     "N = n_{\\text{steps}}\\times\\left(\\frac{2.38/\\sqrt{n_{\\text{dim}}}}{\\sigma}\\right)^{2}\n",
 83 |     "$$\n",
 84 |     "\n",
 85 |     "where ``n_dim`` is the dimensionality of the problem (i.e., number of parameteres) and $\\sigma$ is the proposal scale of tpCN or RWM that is determined adaptively by ``pocoMC`` and ``n_steps`` is a hyperparameter. By default, it is equal to the dimensionality of the problem, that is ``n_steps=n_dim``. However, for highly challenging problems, the user can provide a different value. For instance:"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "sampler = pc.Sampler(prior, likelihood, n_steps=2*n_dim)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "metadata": {},
100 |    "source": [
101 |     "### Preconditioning\n",
102 |     "\n",
103 |     "By default, ``pocoMC`` utilizes a normalizing flow to precondition (i.e., simplify) the target distribution. However, for sufficiently nice distributions that do not deviate significantly from a Gaussian distribution, this procedure may be unnecessary. In these cases, when the posterior is close to a Gaussian distribution and the computational cost of the likelihood is low, one can turn off the preconditioning and avoid training and evaluating the normalizing flow. This can be achieve simply by doing:"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "sampler = pc.Sampler(prior, likelihood, precondition=False)"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "## Running\n",
120 |     "\n",
121 |     "Once the sampler has been configured and initialized, running it in order to sample from the posterior distribution is quite simple. All the user has to do is call the ``run()`` method of the ``Sampler`` object. Optionally, there are three parameters that can be adjusted:\n",
122 |     "\n",
123 |     " - ``n_total`` : This parameter specifies the effective sample size (or unique sample size if ``metric='uss'``) that is required before the sampling terminates. The default value is ``n_total=4096``. This is usually more than enough to construct nice diagrams showing the 1D and 2D marginal posteriors of various parameters. However, for publication purposes one may want to increase ``n_total`` to about ~ ``10_000``. Similarly, for preliminary runs one can also decrese this (e.g., ``n_total=1024``).\n",
124 |     " - ``n_evidence`` : This parameter specified the number of samples that will be generated from the normalizing flow at the end of the run to compute the *importance sampling* estimate of the log model evidence $\\log\\mathcal{Z}$. Higher values will generally lead to more accurate estimates. If you do not require an estimate of the model evidence, you can set ``n_evidence=0``.\n",
125 |     " - ``progress`` : This is a simple boolean parameter controlling whether or not a progress bar appears during running. By default ``progress=True``."
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "sampler.run(\n",
135 |     "    n_total=4096,\n",
136 |     "    n_evidence=4096,\n",
137 |     "    progress=True,\n",
138 |     ")"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "## Continue Running after Completion\n",
146 |     "It is possible to continue running the sampler after sampling has been completed in order to add more samples. This can be useful if the user requires more samples to be able to approximate posterior or estimate the evidence more accurately. This can be achieved easily by calling the ``run()`` method again with higher ``n_total`` and/or ``n_evidence`` values. For instance:"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": [
155 |     "sampler.run(\n",
156 |     "    n_total=16384,\n",
157 |     "    n_evidence=16384,\n",
158 |     ")"
159 |    ]
160 |   }
161 |  ],
162 |  "metadata": {
163 |   "language_info": {
164 |    "name": "python"
165 |   }
166 |  },
167 |  "nbformat": 4,
168 |  "nbformat_minor": 2
169 | }
170 | 


--------------------------------------------------------------------------------
/joss/paper.bib:
--------------------------------------------------------------------------------
  1 | @article{karamanis2022accelerating,
  2 |  author = {Karamanis, Minas and Beutler, Florian and Peacock, John A and Nabergoj, David and Seljak, Uro\v{s}},
  3 |  title = {Accelerating astronomical and cosmological inference with preconditioned {Monte} {Carlo}},
  4 |  journal = {Mon. Not. R. Astron Soc.},
  5 |  volume = {516},
  6 |  number = {2},
  7 |  pages = {1644--1653},
  8 |  year = {2022},
  9 |  publisher = {Oxford University Press (OUP)},
 10 |  doi = {10.1093/mnras/stac2272},
 11 |  source = {Crossref},
 12 |  url = {https://doi.org/10.1093/mnras/stac2272},
 13 |  issn = {0035-8711, 1365-2966},
 14 |  month = aug,
 15 | }
 16 | 
 17 | @article{trotta2017bayesian,
 18 |  author = {Trotta, Roberto},
 19 |  title = {{Bayesian} Methods in Cosmology},
 20 |  journal = {arXiv e-prints},
 21 |  keywords = {Astrophysics - Cosmology and Nongalactic Astrophysics, Astrophysics - Instrumentation and Methods for Astrophysics, Statistics - Methodology},
 22 |  year = {2017},
 23 |  month = jan,
 24 |  eid = {arXiv:1701.01467},
 25 |  pages = {arXiv:1701.01467},
 26 |  archiveprefix = {arXiv},
 27 |  eprint = {1701.01467},
 28 |  primaryclass = {astro-ph.CO},
 29 |  adsurl = {https://ui.adsabs.harvard.edu/abs/2017arXiv170101467T},
 30 |  adsnote = {Provided by the SAO/NASA Astrophysics Data System},
 31 | }
 32 | 
 33 | @article{sharma2017markov,
 34 |  author = {Sharma, Sanjib},
 35 |  title = {{Markov} Chain {Monte} {Carlo} Methods for {Bayesian} Data Analysis in Astronomy},
 36 |  journal = {Annu. Rev. Astron. Astr.},
 37 |  year = {2017},
 38 |  doi = {10.1146/annurev-astro-082214-122339},
 39 |  number = {1},
 40 |  source = {Crossref},
 41 |  url = {https://doi.org/10.1146/annurev-astro-082214-122339},
 42 |  volume = {55},
 43 |  publisher = {Annual Reviews},
 44 |  issn = {0066-4146, 1545-4282},
 45 |  pages = {213--259},
 46 |  month = aug,
 47 | }
 48 | 
 49 | @article{skilling2006nested,
 50 |  author = {Skilling, John},
 51 |  title = {Nested sampling for general {Bayesian} computation},
 52 |  journal = {Bayesian Anal.},
 53 |  volume = {1},
 54 |  number = {4},
 55 |  pages = {833--859},
 56 |  year = {2006},
 57 |  publisher = {Institute of Mathematical Statistics},
 58 |  doi = {10.1214/06-ba127},
 59 |  source = {Crossref},
 60 |  url = {https://doi.org/10.1214/06-ba127},
 61 |  issn = {1936-0975},
 62 |  month = dec,
 63 | }
 64 | 
 65 | @article{dinh2016density,
 66 |  author = {Dinh, Laurent and Sohl-Dickstein, Jascha and Bengio, Samy},
 67 |  title = {Density estimation using {Real} {NVP}},
 68 |  journal = {arXiv e-prints},
 69 |  keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence, Computer Science - Neural and Evolutionary Computing, Statistics - Machine Learning},
 70 |  year = {2016},
 71 |  month = may,
 72 |  eid = {arXiv:1605.08803},
 73 |  pages = {arXiv:1605.08803},
 74 |  archiveprefix = {arXiv},
 75 |  eprint = {1605.08803},
 76 |  primaryclass = {cs.LG},
 77 |  adsurl = {https://ui.adsabs.harvard.edu/abs/2016arXiv160508803D},
 78 |  adsnote = {Provided by the SAO/NASA Astrophysics Data System},
 79 | }
 80 | 
 81 | @article{papamakarios2017masked,
 82 |  author = {Papamakarios, George and Pavlakou, Theo and Murray, Iain},
 83 |  title = {Masked autoregressive flow for density estimation},
 84 |  journal = {Adv Neural Inf Process Syst},
 85 |  volume = {30},
 86 |  year = {2017},
 87 | }
 88 | 
 89 | @article{del2006sequential,
 90 |  author = {Del Moral, Pierre and Doucet, Arnaud and Jasra, Ajay},
 91 |  title = {Sequential {Monte} {Carlo} samplers},
 92 |  journal = {J. R. Stat. Soc. B},
 93 |  volume = {68},
 94 |  number = {3},
 95 |  pages = {411--436},
 96 |  year = {2006},
 97 |  publisher = {Wiley},
 98 |  doi = {10.1111/j.1467-9868.2006.00553.x},
 99 |  source = {Crossref},
100 |  url = {https://doi.org/10.1111/j.1467-9868.2006.00553.x},
101 |  issn = {1369-7412, 1467-9868},
102 |  month = jun,
103 | }
104 | 
105 | @article{papamakarios2021normalizing,
106 |  author = {Papamakarios, George and Nalisnick, Eric T and Rezende, Danilo Jimenez and Mohamed, Shakir and Lakshminarayanan, Balaji},
107 |  title = {Normalizing Flows for Probabilistic Modeling and Inference.},
108 |  journal = {J. Mach. Learn. Res.},
109 |  volume = {22},
110 |  number = {57},
111 |  pages = {1--64},
112 |  year = {2021},
113 | }
114 | 
115 | @article{vretinaris2022postmerger,
116 |  author = {Vretinaris, George and Vretinaris, Stamatis and Mermigkas, Christos and Karamanis, Minas and Stergioulas, Nikolaos},
117 |  title = {Robust and fast parameter estimation of gravitational waves from neutron star merger remnants},
118 |  journal = {in prep},
119 |  year = {2022},
120 | }
121 | 
122 | @inproceedings{jia2020normalizing,
123 |  author = {Jia, He and Seljak, Uros},
124 |  title = {Normalizing constant estimation with Gaussianized bridge sampling},
125 |  booktitle = {Symposium on Advances in Approximate Bayesian Inference},
126 |  pages = {1--14},
127 |  year = {2020},
128 |  organization = {PMLR},
129 | }
130 | 
131 | @article{hoffman2019neutra,
132 |  author = {Hoffman, Matthew and Sountsov, Pavel and Dillon, Joshua V. and Langmore, Ian and Tran, Dustin and Vasudevan, Srinivas},
133 |  title = {{NeuTra}-lizing Bad Geometry in {Hamiltonian} {Monte} {Carlo} Using Neural Transport},
134 |  journal = {arXiv e-prints},
135 |  keywords = {Statistics - Computation, Statistics - Machine Learning},
136 |  year = {2019},
137 |  month = mar,
138 |  eid = {arXiv:1903.03704},
139 |  pages = {arXiv:1903.03704},
140 |  archiveprefix = {arXiv},
141 |  eprint = {1903.03704},
142 |  primaryclass = {stat.CO},
143 |  adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv190303704H},
144 |  adsnote = {Provided by the SAO/NASA Astrophysics Data System},
145 | }
146 | 
147 | @article{gabrie2021efficient,
148 |  author = {Gabri\'e, Marylou and Rotskoff, Grant M. and Vanden-Eijnden, Eric},
149 |  title = {Adaptive {Monte} {Carlo} augmented with normalizing flows},
150 |  journal = {Proc. Natl. Acad. Sci.},
151 |  year = {2022},
152 |  doi = {10.1073/pnas.2109420119},
153 |  number = {10},
154 |  source = {Crossref},
155 |  url = {https://doi.org/10.1073/pnas.2109420119},
156 |  volume = {119},
157 |  publisher = {Proceedings of the National Academy of Sciences},
158 |  issn = {0027-8424, 1091-6490},
159 |  month = mar,
160 | }
161 | 
162 | @inproceedings{brofos2022adaptation,
163 |  author = {Brofos, James and Gabri\'e, Marylou and Brubaker, Marcus A and Lederman, Roy R},
164 |  title = {Adaptation of the Independent {Metropolis}-{Hastings} Sampler with Normalizing Flow Proposals},
165 |  booktitle = {International Conference on Artificial Intelligence and Statistics},
166 |  pages = {5949--5986},
167 |  year = {2022},
168 |  organization = {PMLR},
169 | }
170 | 
171 | @article{gabrie2022adaptive,
172 |  author = {Gabri\'e, Marylou and Rotskoff, Grant M. and Vanden-Eijnden, Eric},
173 |  title = {Adaptive {Monte} {Carlo} augmented with normalizing flows},
174 |  journal = {Proc. Natl. Acad. Sci.},
175 |  volume = {119},
176 |  number = {10},
177 |  pages = {e2109420119},
178 |  year = {2022},
179 |  publisher = {Proceedings of the National Academy of Sciences},
180 |  doi = {10.1073/pnas.2109420119},
181 |  source = {Crossref},
182 |  url = {https://doi.org/10.1073/pnas.2109420119},
183 |  issn = {0027-8424, 1091-6490},
184 |  month = mar,
185 | }
186 | 
187 | @article{moss2020accelerated,
188 |  author = {Moss, Adam},
189 |  title = {Accelerated {Bayesian} inference using deep learning},
190 |  journal = {Mon. Not. R. Astron Soc.},
191 |  volume = {496},
192 |  number = {1},
193 |  pages = {328--338},
194 |  year = {2020},
195 |  publisher = {Oxford University Press (OUP)},
196 |  doi = {10.1093/mnras/staa1469},
197 |  source = {Crossref},
198 |  url = {https://doi.org/10.1093/mnras/staa1469},
199 |  issn = {0035-8711, 1365-2966},
200 |  month = may,
201 | }
202 | 


--------------------------------------------------------------------------------
/joss/paper.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: 'pocoMC: A Python package for accelerated Bayesian inference in astronomy and cosmology'
  3 | tags:
  4 |   - Python
  5 |   - astronomy
  6 | authors:
  7 |   - name: Minas Karamanis
  8 |     orcid: 0000-0001-9489-4612
  9 |     corresponding: true
 10 |     affiliation: "1, 3"
 11 |   - name: David Nabergoj
 12 |     orcid: 0000-0001-6882-627X
 13 |     affiliation: 2
 14 |   - name: Florian Beutler
 15 |     orcid: 0000-0003-0467-5438
 16 |     affiliation: 1
 17 |   - name: John A. Peacock
 18 |     orcid: 0000-0002-1168-8299
 19 |     affiliation: 1
 20 |   - name: Uroš Seljak
 21 |     orcid: 0000-0003-2262-356X
 22 |     affiliation: 3
 23 | 
 24 | affiliations:
 25 |  - name: Institute for Astronomy, University of Edinburgh, Royal Observatory, Blackford Hill, Edinburgh EH9 3HJ, UK
 26 |    index: 1
 27 |  - name: Faculty of Computer and Information Science, University of Ljubljana, Ve\v{c}na pot 113, 1000 Ljubljana, Slovenia
 28 |    index: 2
 29 |  - name: Physics Department, University of California and Lawrence Berkeley National Laboratory Berkeley, CA 94720, USA
 30 |    index: 3
 31 | date: 12 July 2022
 32 | bibliography: paper.bib
 33 | 
 34 | ---
 35 | 
 36 | # Summary
 37 | 
 38 | `pocoMC` is a Python package for accelerated Bayesian inference in astronomy and 
 39 | cosmology. The code is designed to sample efficiently from posterior distributions
 40 | with non-trivial geometry, including strong multimodality and non-linearity. To this end,
 41 | `pocoMC` relies on the Preconditioned Monte Carlo algorithm which utilises a Normalising
 42 | Flow to decorrelate the parameters of the posterior. It facilitates both tasks of
 43 | parameter estimation and model comparison, focusing especially on computationally expensive
 44 | applications. It allows fitting arbitrary models defined as a log-likelihood function and a
 45 | log-prior probability density function in Python. Compared to popular alternatives (e.g.
 46 | nested sampling) `pocoMC` can speed up the sampling procedure by orders of magnitude, cutting
 47 | down the computational cost substantially. Finally, parallelisation to computing clusters
 48 | manifests linear scaling.
 49 | 
 50 | # Statement of need
 51 | 
 52 | Over the past few decades, the volume of astronomical and cosmological data has 
 53 | increased substantially. At the same time, theoretical and phenomenological models
 54 | in these fields have grown even more complex. As a response to that, a number of methods
 55 | aiming at efficient Bayesian computation have been developed with the sole task of
 56 | comparing those models to the available data [@trotta2017bayesian; @sharma2017markov]. 
 57 | In the Bayesian context, scientific inference proceeds through the use of Bayes' theorem:
 58 | \begin{equation}\label{eq:bayes}
 59 | \mathcal{P}(\theta) = \frac{\mathcal{L}(\theta)\pi(\theta)}{\mathcal{Z}}
 60 | \end{equation}
 61 | where the posterior $\mathcal{P}(\theta)\equiv p(\theta\vert d,\mathcal{M})$ is the
 62 | probability of the parameters $\theta$ given the data $d$ and the model $\mathcal{M}$.
 63 | The other components of this equation are: the likelihood function 
 64 | $\mathcal{L}(\theta)\equiv p(d\vert \theta,\mathcal{M})$, the prior $\pi(\theta) \equiv p(\theta\vert \mathcal{M})$,
 65 | and the model evidence $\mathcal{Z}=p(d\vert \mathcal{M})$. The prior and the
 66 | likelihood are usually provided as input in this equation and one seeks to estimate the 
 67 | posterior and the evidence. Knowledge of the posterior, in the form of samples, 
 68 | is paramount for the task of parameter estimation whereas the ratio of model 
 69 | evidences yields the Bayes factor which is the cornerstone of Bayesian model comparison.
 70 | 
 71 | Markov chain Monte Carlo (MCMC) has been established as the standard tool for 
 72 | Bayesian computation in astronomy and cosmology, either as a standalone algorithm
 73 | or as part of another method [e.g., nested sampling, @skilling2006nested]. However, 
 74 | as MCMC relies on the local exploration of the posterior, the presence of a non-linear
 75 | correlation between parameters and multimodality can at best hinder its performance
 76 | and at worst violate its theoretical guarantees of convergence (i.e. ergodicity). Usually,
 77 | those challenges are partially addressed by reparameterising the model using a common
 78 | change-of-variables parameter transformation. However, guessing the right kind of
 79 | reparameterisation _a priori_ is not trivial as it often requires a deep knowledge of
 80 | the physical model and its symmetries. These problems are usually complicated further by the substantial
 81 | computational cost of evaluating astronomical and cosmological models. `pocoMC` is 
 82 | designed to tackle exactly these kinds of difficulties by automatically reparameterising
 83 | the model such that the parameters of the model are approximately uncorrelated and standard techniques 
 84 | can be applied. As a result, `pocoMC` produces both samples from the posterior distribution and an
 85 | unbiased estimate of the model evidence thus facilitating both scientific tasks with excellent 
 86 | efficiency and robustness. Compared to popular alternatives such as nested sampling, `pocoMC`
 87 | can reduce the computational cost, and thus, the total run time of the analysis by orders of magnitude,
 88 | in both artificial and realistic applications [@karamanis2022accelerating]. Finally, the code is well-tested
 89 | and is currently used for research work in the field of gravitational wave parameter estimation [@vretinaris2022postmerger].
 90 | 
 91 | # Method
 92 | 
 93 | `pocoMC` implements the Preconditioned Monte Carlo (PMC) algorithm. PMC combines
 94 | the popular Sequential Monte Carlo [SMC, @del2006sequential] method with a Normalising Flow [NF, @papamakarios2021normalizing]. 
 95 | The latter works as a preconditioner for the target distribution of the former. 
 96 | As SMC evolves a population of particles, starting from the prior distribution 
 97 | and gradually approaching the posterior distribution, the NF transforms the 
 98 | parameters of the target distribution such that any correlation between parameters
 99 | or presence of multimodality is removed. The effect of this bijective transformation
100 | is the substantial rise in the sampling efficiency of the algorithm as the particles
101 | are allowed to sample freely from the target without being hindered by its locally-curved 
102 | geometry. The method is explained in detail in the accompanying publication [@karamanis2022accelerating],
103 | and we provide only a summary here. NFs have been used extensively to
104 | accelerate various sampling algorithms, including Hamiltonian Monte Carlo [@hoffman2019neutra],
105 | Metropolis adjusted Langevin algorithm [@gabrie2021efficient], adaptive Independence
106 | Metropolis-Hastings [@brofos2022adaptation], adaptive MCMC [@gabrie2022adaptive], and
107 | nested sampling [@moss2020accelerated].
108 | 
109 | ## Sequential Monte Carlo
110 | 
111 | The basic idea of basic SMC is to sample from the posterior distribution $\mathcal{P}(\theta)$ by first
112 | defining a path of intermediate distributions starting from the prior $\pi(\theta)$. In the
113 | case of `pocoMC` the path has the form:
114 | \begin{equation}\label{eq:path}
115 | p_{t}(\theta) = \pi(\theta)^{1-\beta_{t}} \mathcal{P}(\theta)^{\beta_{t}}
116 | \end{equation}
117 | where $0=\beta_{1}<\beta_{2}<\dots<\beta_{T}=1$. Starting from the prior, each distribution with density $p_{t}(\theta)$ is
118 | sampled in turn using a collection of particles propagated by a number of MCMC steps. Before MCMC sampling,
119 | the particles are re-weighted using importance sampling and then re-sampled to account for the transition from
120 | $p_{t}(\theta)$ to $p_{t+1}(\theta)$. `pocoMC` utilises the importance weights of this step to define an estimator
121 | for the effective sample size (ESS) of the population of particles. Maintaining a fixed value of ESS during the run
122 | allows `pocoMC` to adaptively specify the $\beta_{t}$ schedule.
123 | 
124 | ## Preconditioned Monte Carlo
125 | 
126 | In vanilla SMC, standard MCMC methods (e.g. Metropolis-Hastings) are used to update the positions
127 | of the particles during each iteration. This however can become highly inefficient if the distribution
128 | $p_{t}(\theta)$ is characterised by a non-trivial geometry. `pocoMC`, which is based on PMC, utilises
129 | a NF to learn an invertible transformation that simplifies
130 | the geometry of the distribution by mapping $p_{t}(\theta)$ into a zero-mean unit-variance normal distribution.
131 | Sampling then proceeds in the latent space in which correlations are substantially reduced. The positions of
132 | the particles are transformed back to the original parameter space at the end of each iteration. This way,
133 | PMC and `pocoMC` can sample from very challenging posteriors very efficiently using simple Metropolis-Hastings
134 | updates in the preconditioned/uncorrelated latent space.
135 | 
136 | # Features
137 | 
138 | - User-friendly black-box API: only the log-likelihood, log-prior and some prior samples required from the user.
139 | - The default configuration sufficient for most applications: no tuning is required but is possible for experienced users.
140 | - Comprehensive plotting tools: posterior corner, trace, and run plots are all supported.
141 | - Model evidence estimation using Gaussianized Bridge Sampling [@jia2020normalizing].
142 | - Support for both MAF and RealNVP normalising flows with added regularisation [@papamakarios2017masked; @dinh2016density].
143 | - Straightforward parallelisation using MPI or multiprocessing.
144 | - Well-tested and documented: continuous integration, unit tests, a wide range of examples, and [extensive documentation](https://pocomc.readthedocs.io/).
145 | 
146 | # Acknowledgments
147 | 
148 | MK would like to thank Jamie Donald-McCann and Richard Grumitt for providing constructive comments and George Vretinaris for feedback on an early version of the code. This project has received funding from the European Research Council (ERC) under the European Union's Horizon 2020 research and innovation program (grant agreement 853291), and from the U.S. Department of Energy, Office of Science, Office of Advanced Scientific Computing Research under Contract No. DE-AC02-05CH11231 at Lawrence Berkeley National Laboratory to enable research for Data-intensive Machine Learning and Analysis. FB is a University Research Fellow.
149 | 
150 | # References


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/minaskar/pocomc/77cbe330f00fe1d258d6db36c15aa959abdb83e1/logo.png


--------------------------------------------------------------------------------
/logo.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <svg
 3 |    version="1.0"
 4 |    width="1971px"
 5 |    height="473px"
 6 |    viewBox="0 0 1971 473"
 7 |    preserveAspectRatio="xMidYMid meet"
 8 |    id="svg13"
 9 |    sodipodi:docname="logo.svg"
10 |    inkscape:version="1.3.2 (091e20e, 2023-11-25)"
11 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
12 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
13 |    xmlns="http://www.w3.org/2000/svg"
14 |    xmlns:svg="http://www.w3.org/2000/svg">
15 |   <defs
16 |      id="defs13" />
17 |   <sodipodi:namedview
18 |      id="namedview13"
19 |      pagecolor="#ffffff"
20 |      bordercolor="#000000"
21 |      borderopacity="0.25"
22 |      inkscape:showpageshadow="2"
23 |      inkscape:pageopacity="0.0"
24 |      inkscape:pagecheckerboard="0"
25 |      inkscape:deskcolor="#d1d1d1"
26 |      inkscape:zoom="0.41907661"
27 |      inkscape:cx="984.3069"
28 |      inkscape:cy="236.23366"
29 |      inkscape:window-width="1920"
30 |      inkscape:window-height="1027"
31 |      inkscape:window-x="1512"
32 |      inkscape:window-y="25"
33 |      inkscape:window-maximized="1"
34 |      inkscape:current-layer="svg13" />
35 |   <path
36 |      d="M 117,375.4 C 106.1,374 90.3,369.8 79.8,365.5 78.5,365 78,365.3 78,366.4 c 0,0.9 -0.7,1.6 -1.5,1.6 -1.3,0 -1.5,-6.8 -1.5,-55 v -55 h 2.5 c 1.7,0 2.5,0.6 2.5,1.7 0,0.9 1.7,5.1 3.7,9.2 15.8,31.9 58.9,39 84.2,13.9 21.4,-21.1 21,-55.5 -0.8,-76 -16.7,-15.8 -39,-19.3 -59.7,-9.2 -11.7,5.7 -23.3,18.8 -26.4,29.8 -1.3,4.7 -1.7,5 -4.4,4 -1.4,-0.6 -1.6,-6.3 -1.6,-55 0,-47.7 0.2,-54.4 1.5,-54.4 0.8,0 1.5,0.7 1.5,1.6 0,1.1 0.5,1.4 1.7,0.9 6.5,-2.5 20,-6.6 26.8,-8.1 12.2,-2.6 36.9,-2.4 50.2,0.4 50.6,10.9 88.7,48.2 101.4,99.2 2,8.3 2.3,12 2.4,29 0,18 -0.2,20.3 -2.7,30.3 -6.4,25.1 -17.6,44.8 -35.8,62.8 -17.8,17.6 -39.3,29.4 -64,35 -8.3,1.9 -33.1,3.3 -41,2.3 z"
37 |      id="path1"
38 |      style="fill:#095e9e" />
39 |   <path
40 |      d="M 415.5,374.5 C 382,369 351.9,350.9 331.6,324 318.8,307 309.8,286.2 306.4,265.5 304.3,252.6 305,229.4 307.9,217 320.7,161.4 365.3,121.5 422,115 c 38.8,-4.6 79.7,10.8 107.8,40.4 23.4,24.8 35.5,55.7 35.4,90.1 -0.1,36.5 -13.8,68.5 -40.3,94 -18.3,17.5 -37.8,27.9 -62.9,33.6 -11.6,2.7 -34.6,3.3 -46.5,1.4 z m 33,-78 c 18.4,-4.8 33.6,-20.3 38,-38.6 9,-38.2 -23.2,-72 -61.6,-64.9 -3,0.5 -9.1,2.8 -13.5,5 -19.8,9.7 -31.5,31.9 -28.6,53.9 2.9,21.3 19.1,39.7 39.5,44.7 7.3,1.7 19.2,1.7 26.2,-0.1 z"
41 |      id="path2"
42 |      style="fill:#095e9e" />
43 |   <path
44 |      d="m 720,374.5 c -36.7,-5.8 -69.1,-26.7 -89.4,-57.5 -21.2,-32.1 -27,-71.6 -16.2,-109.2 12.5,-43 48.5,-77.9 92.2,-89.3 43.4,-11.3 89.2,0.5 121.6,31.4 8.5,8.1 17.8,19 17.8,20.8 0,0.6 -59.6,42 -62.1,43.1 -0.4,0.2 -3.6,-2.5 -7.1,-6 -3.5,-3.5 -9,-7.8 -12.4,-9.5 -24.3,-12.9 -54.6,-5 -69.6,18.1 -11,17 -11,40.2 0,57.2 18.2,28.1 56.9,32.9 80.9,9.9 l 6.3,-6 30.2,22.6 c 16.6,12.5 30.4,23.2 30.6,23.8 0.6,1.9 -22.5,24.2 -30.5,29.5 -27.9,18.3 -61.4,25.9 -92.3,21.1 z"
45 |      id="path3"
46 |      style="fill:#095e9e" />
47 |   <path
48 |      d="m 994,374.5 c -53.2,-8.4 -95.4,-47.9 -107.8,-100.9 -2.2,-9.6 -2.6,-13.4 -2.6,-28.1 0,-12.8 0.4,-19.1 1.8,-25.4 10.4,-48 43.4,-85 88.6,-99.5 14.7,-4.7 23.4,-6.1 39.5,-6 15.9,0 24.9,1.3 38.5,5.6 20.6,6.5 36.5,16.3 52.6,32.3 19.4,19.2 30.3,39.1 36.5,66 1.8,8.2 2.2,13 2.2,26.5 0,18.1 -1.2,25.6 -6.4,41.5 -20,60 -81.3,97.7 -142.9,88 z m 32.3,-78 c 23.7,-6.2 39.7,-26.8 39.7,-51.2 0,-15.1 -5.1,-27.7 -15.4,-37.8 -27.9,-27.6 -74.3,-16.6 -87.3,20.7 -3.2,9.2 -3.4,23.2 -0.4,32.9 5.2,16.8 19.9,30.7 37.1,35.3 7.2,1.9 19.2,2 26.3,0.1 z"
49 |      id="path4"
50 |      style="fill:#095e9e" />
51 |   <path
52 |      d="m 1799,374.9 c -81,-9.1 -147,-69.4 -163.4,-149.1 -3.9,-18.9 -4.7,-42.7 -2.1,-61.9 11.2,-84.4 76.4,-149.7 161.7,-161.9 10.7,-1.6 38.7,-1.3 50,0.4 48,7.4 91.9,32.9 120.5,69.9 2.5,3.2 4.4,6 4.2,6.1 -0.9,0.8 -63,45.6 -65.3,47.1 l -2.6,1.7 -8.4,-8.9 c -26.9,-28.6 -66.1,-39.5 -103.1,-28.8 -11.2,3.3 -18,6.4 -28.1,13 -42.1,27.4 -57.9,83.4 -36.5,129.1 24.8,52.9 87.4,74.5 139.7,48.1 9.1,-4.6 22,-14.3 27.8,-20.9 l 4.4,-5.1 32.6,26.7 32.7,26.7 -2,2.7 c -3.5,5 -22.5,22.8 -31.1,29.1 -10,7.4 -24.1,15.9 -33.6,20.2 -30.2,13.5 -66,19.3 -97.4,15.8 z"
53 |      id="path5"
54 |      style="fill:#095e9e" />
55 |   <path
56 |      d="m 1202,188 c 0,-172.7 0.1,-182 1.8,-182 1.2,0 12.3,20.1 42.2,76.2 22.3,41.9 41.3,77 42.3,78 0.9,1 1.7,3 1.7,4.4 0,2.2 -0.2,2.5 -1.5,1.4 -1.3,-1.1 -1.5,10.3 -1.5,101.4 V 370 h -42.5 -42.5 z"
57 |      id="path6"
58 |      style="fill:#095e9e" />
59 |   <path
60 |      d="m 1490,267.5 c 0,-61.5 -0.4,-102.5 -0.9,-102.5 -0.5,0 -1.2,0.6 -1.4,1.3 -0.3,0.6 -0.6,-0.5 -0.6,-2.5 -0.1,-3.1 7,-17.1 40.7,-80.3 C 1566.6,10.7 1568.6,7 1571.3,7 h 2.7 V 188.5 370 h -42 -42 z"
61 |      id="path7"
62 |      style="fill:#095e9e" />
63 |   <path
64 |      d="M 0,366 C 0,263.7 0.1,259 1.8,259 c 1.4,0 2,1.4 3,7.2 0.7,3.9 2.6,11.3 4.3,16.2 9.8,29.3 28.9,53.4 55.4,70.1 l 9,5.6 0.3,-52 L 74,254 h 2.4 c 1.9,0 2.5,0.7 3,3.2 0.4,1.9 0.2,3.4 -0.4,3.6 -0.7,0.3 -1,34.9 -1,106.3 V 473 H 39 0 Z"
65 |      id="path8"
66 |      style="fill:#095389" />
67 |   <path
68 |      d="m 1355.2,291.8 c -3.3,-6.2 -8.1,-15.3 -10.7,-20.3 -28.3,-53.9 -56.2,-105.4 -58,-107 -2.3,-2 -84.5,-155.2 -84.5,-157.4 0,-0.7 1.1,-1.1 2.7,-0.9 2.4,0.3 5.9,6.3 40.2,70.8 l 37.6,70.5 0.3,-70.8 L 1283,6 h 2.3 c 1.9,0.1 8.8,12.5 51.7,93.1 27.2,51.2 50,93.7 50.5,94.5 0.7,1.1 13.7,-22.3 51.4,-93.1 27.7,-52 50.7,-94.5 51.2,-94.5 0.5,0 1,31.8 1.1,71.1 l 0.3,71.2 31.7,-59.4 c 17.4,-32.7 34.5,-64.7 38,-71.2 6,-11.3 6.4,-11.7 9.5,-11.7 2.6,0 3.3,0.4 3.3,2 0,1.1 -0.6,2 -1.2,2 -1,0 -72.8,132.8 -81.3,150.4 -1.1,2.2 -2.3,4.3 -2.6,4.6 -0.7,0.7 -61.9,115.7 -70.6,132.8 l -2.7,5.2 h -27.2 -27.3 z"
69 |      id="path9"
70 |      style="fill:#095389" />
71 |   <path
72 |      d="M 75.3,234.3 C 74.3,233.9 74,222.5 74,181.9 V 130 l -7.7,4.6 c -15.2,9 -31.4,24.3 -41.3,38.9 -9.9,14.7 -18.7,36.3 -20.6,50.5 -0.5,4 -1,5 -2.5,5 C 0.1,229 0,227.1 0,174.5 V 120 h 39 39 v 53.9 c 0,39.1 0.3,54 1.1,54.3 0.8,0.3 0.9,1.5 0.4,3.6 -0.6,3.1 -1.6,3.6 -4.2,2.5 z"
73 |      id="path10"
74 |      style="fill:#095389" />
75 |   <g
76 |      fill="#09496d"
77 |      id="g13">
78 |     <path
79 |        d="M66.2 358.3 c-32.1 -18.6 -53.4 -46.5 -63 -82.3 -3.4 -12.9 -4.3 -41.7 -1.7 -55.5 5.7 -31.2 22.8 -59.3 48.1 -79.5 8.4 -6.7 24.9 -16.4 26.4 -15.5 0.7 0.4 1 18.7 1 53.1 0 28.8 0.4 52.4 0.8 52.4 0.4 0 0.8 5.8 0.7 13 0 7.1 -0.3 13 -0.7 13 -0.5 0 -0.8 23.9 -0.8 53 0 42.6 -0.3 53 -1.3 53 -0.7 0 -5 -2.1 -9.5 -4.7z"
80 |        id="path11" />
81 |     <path
82 |        d="M1284.4 159.5 c-3.8 -6.7 -81.4 -152.6 -81.4 -153 0 -0.3 19.1 -0.5 42.5 -0.5 27.7 0 42.5 0.3 42.5 1 0 0.6 -0.4 1 -1 1 -0.6 0 -1.1 26.7 -1.2 76.8 -0.2 57.6 -0.6 76.2 -1.4 74.7z"
83 |        id="path12" />
84 |     <path
85 |        d="M1488.2 83.8 l0.3 -77.3 43 -0.1 c23.7 -0.1 42.3 0 41.5 0.2 -1.6 0.5 -4.4 5.7 -52.5 96.1 -17.1 32 -31.3 58.3 -31.8 58.3 -0.4 0 -0.6 -34.8 -0.5 -77.2z"
86 |        id="path13" />
87 |   </g>
88 | </svg>
89 | 


--------------------------------------------------------------------------------
/pocomc/__init__.py:
--------------------------------------------------------------------------------
 1 | __bibtex__ = """
 2 | @article{karamanis2022accelerating,
 3 |   title={Accelerating astronomical and cosmological inference with preconditioned Monte Carlo},
 4 |   author={Karamanis, Minas and Beutler, Florian and Peacock, John A and Nabergoj, David and Seljak, Uro{\v{s}}},
 5 |   journal={Monthly Notices of the Royal Astronomical Society},
 6 |   volume={516},
 7 |   number={2},
 8 |   pages={1644--1653},
 9 |   year={2022},
10 |   publisher={Oxford University Press}
11 | }
12 | 
13 | @article{karamanis2022pocomc,
14 |   title={pocoMC: A Python package for accelerated Bayesian inference in astronomy and cosmology},
15 |   author={Karamanis, Minas and Nabergoj, David and Beutler, Florian and Peacock, John A and Seljak, Uros},
16 |   journal={arXiv preprint arXiv:2207.05660},
17 |   year={2022}
18 | }
19 | """
20 | __url__ = "https://pocomc.readthedocs.io"
21 | __author__ = "Minas Karamanis"
22 | __email__ = "minaskar@gmail.com"
23 | __license__ = "GPL-3.0"
24 | __description__ = "A Python implementation of Preconditioned Monte Carlo for accelerated Bayesian Computation"
25 | 
26 | 
27 | from .flow import *
28 | from .sampler import *
29 | from .prior import *
30 | from .parallel import *
31 | from ._version import version
32 | 
33 | __version__ = version
34 | 


--------------------------------------------------------------------------------
/pocomc/_version.py:
--------------------------------------------------------------------------------
1 | version = "1.2.6"
2 | 


--------------------------------------------------------------------------------
/pocomc/flow.py:
--------------------------------------------------------------------------------
  1 | from typing import Union, Optional, Tuple, Dict, List
  2 | 
  3 | import numpy as np
  4 | import copy
  5 | import time 
  6 | import zuko
  7 | import torch
  8 | from torch.utils.data import DataLoader, TensorDataset
  9 | from torch.optim.lr_scheduler import ReduceLROnPlateau
 10 | 
 11 | from .tools import torch_double_to_float
 12 | 
 13 | class Flow:
 14 |     """
 15 |     Normalizing flow model.
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     n_dim : ``int``
 20 |         Number of dimensions of the distribution to be modeled.
 21 |     flow : ``zuko.flows.Flow`` or str, optional
 22 |         Normalizing flow model. Default: ``nsf3``.
 23 | 
 24 |     Attributes
 25 |     ----------
 26 |     n_dim : ``int``
 27 |         Number of dimensions of the distribution to be modeled.
 28 |     flow : ``zuko.flows.Flow``
 29 |         Normalizing flow model.
 30 |     transform : ``zuko.transforms.Transform``
 31 |         Transformation object.
 32 |     
 33 |     Examples
 34 |     --------
 35 |     >>> import torch
 36 |     >>> import pocomc
 37 |     >>> flow = pocomc.Flow(2)
 38 |     >>> x = torch.randn(100, 2)
 39 |     >>> u, logdetj = flow(x)
 40 |     >>> x_, logdetj_ = flow.inverse(u)
 41 |     >>> log_prob = flow.log_prob(x)
 42 |     >>> x_, log_prob_ = flow.sample(100)
 43 |     >>> history = flow.fit(x)
 44 |     """
 45 | 
 46 |     def __init__(self, n_dim, flow='nsf3'):
 47 |         self.n_dim = n_dim
 48 | 
 49 |         def next_power_of_2(n):
 50 |             return 1 if n == 0 else 2**(n - 1).bit_length()
 51 |         
 52 |         n_hidden = np.maximum(next_power_of_2(3*n_dim), 32)
 53 | 
 54 |         if flow == 'maf3':
 55 |             self.flow = zuko.flows.MAF(n_dim, 
 56 |                                        transforms=3, 
 57 |                                        hidden_features=[n_hidden] * 3,
 58 |                                        residual=True,)
 59 |         elif flow == 'maf6':
 60 |             self.flow = zuko.flows.MAF(n_dim, 
 61 |                                        transforms=6, 
 62 |                                        hidden_features=[n_hidden] * 3,
 63 |                                        residual=True,)
 64 |         elif flow == 'maf12':
 65 |             self.flow = zuko.flows.MAF(n_dim, 
 66 |                                        transforms=12, 
 67 |                                        hidden_features=[n_hidden] * 3,
 68 |                                        residual=True,)
 69 |         elif flow == 'nsf3':
 70 |             self.flow = zuko.flows.NSF(features=n_dim, 
 71 |                                        bins=8, 
 72 |                                        transforms=3, 
 73 |                                        hidden_features=[n_hidden] * 3,
 74 |                                        residual=True)
 75 |         elif flow == 'nsf6':
 76 |             self.flow = zuko.flows.NSF(features=n_dim, 
 77 |                                        bins=8, 
 78 |                                        transforms=6, 
 79 |                                        hidden_features=[n_hidden] * 3,
 80 |                                        residual=True)
 81 |         elif flow == 'nsf12':
 82 |             self.flow = zuko.flows.NSF(features=n_dim, 
 83 |                                        bins=8, 
 84 |                                        transforms=12, 
 85 |                                        hidden_features=[n_hidden] * 3,
 86 |                                        residual=True)
 87 |         elif isinstance(flow, zuko.flows.Flow):
 88 |             self.flow = flow
 89 |         else:
 90 |             raise ValueError('Invalid flow type. Choose from: maf3, maf6, maf12, nsf3, nsf6, nsf12, or provide a zuko.flows.Flow object.')
 91 | 
 92 |     @property
 93 |     def transform(self):
 94 |         """
 95 |         Transformation object.
 96 |         """
 97 |         return self.flow().transform
 98 | 
 99 |     def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
100 |         """
101 |         Forward transformation.
102 |         Inputs are transformed from the original (relating to the distribution to be modeled) to the latent space.
103 | 
104 |         Parameters
105 |         ----------
106 |         x : ``torch.Tensor``
107 |             Samples to transform.
108 |         Returns
109 |         -------
110 |         u : ``torch.Tensor``
111 |             Transformed samples in latent space with the same shape as the original space inputs.
112 |         """
113 |         x = torch_double_to_float(x)
114 |         return self.transform.call_and_ladj(x)
115 | 
116 |     def inverse(self, u: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
117 |         """
118 |         Inverse transformation.
119 |         Inputs are transformed from the latent to the original space (relating to the distribution to be modeled).
120 |         
121 |         Parameters
122 |         ----------
123 |         u : ``torch.Tensor``
124 |             Samples to transform.
125 |         Returns
126 |         -------
127 |         x : ``torch.Tensor``
128 |             Transformed samples in the original space with the same shape as the latent space inputs.
129 |         """
130 |         u = torch_double_to_float(u)
131 |         x, logdetj = self.transform.inv.call_and_ladj(u)
132 |         return x, logdetj
133 | 
134 |     def log_prob(self, x: torch.Tensor) -> torch.Tensor:
135 |         """
136 |         Compute log probability of samples.
137 |         
138 |         Parameters
139 |         ----------
140 |         x : ``torch.Tensor``
141 |             Input samples
142 |         Returns
143 |         -------
144 |         Log-probability of samples.
145 |         """
146 |         x = torch_double_to_float(x)
147 |         return self.flow().log_prob(x)
148 | 
149 |     def sample(self, size: int = 1) -> Tuple[torch.Tensor, torch.Tensor]:
150 |         """
151 |         Draw random samples from the normalizing flow.
152 | 
153 |         Parameters
154 |         ----------
155 |         size : ``int``
156 |             Number of samples to generate. Default: 1.
157 |         Returns
158 |         -------
159 |         samples, log_prob : ``tuple``
160 |             Samples as a ``torch.Tensor`` with shape ``(size, n_dimensions)`` and log probability values with shape ``(size, )``.
161 |         """
162 |         x, log_p = self.flow().rsample_and_log_prob((size,))
163 |         return x, log_p
164 | 
165 |     def fit(self,
166 |             x,
167 |             weights=None,
168 |             validation_split=0.0,
169 |             epochs=1000,
170 |             batch_size=1000,
171 |             patience=20,
172 |             learning_rate=1e-3,
173 |             weight_decay=0,
174 |             laplace_scale=None,
175 |             gaussian_scale=None,
176 |             annealing=True,
177 |             noise=None,
178 |             shuffle=True,
179 |             clip_grad_norm=1.0,
180 |             verbose=0,
181 |             ):
182 |         """
183 | 
184 |         Parameters
185 |         ----------
186 |         x : ``torch.Tensor``
187 |             Input samples.
188 |         weights : ``torch.Tensor``, optional
189 |             Weights for each sample. Default: ``None``.
190 |         validation_split : ``float``, optional
191 |             Fraction of samples to use for validation. Default: 0.0.
192 |         epochs : ``int``, optional
193 |             Number of epochs. Default: 1000.
194 |         batch_size : ``int``, optional
195 |             Batch size. Default: 1000.
196 |         patience : ``int``, optional
197 |             Number of epochs without improvement before early stopping. Default: 20.
198 |         learning_rate : ``float``, optional
199 |             Learning rate. Default: 1e-3.
200 |         weight_decay : ``float``, optional
201 |             Weight decay. Default: 0.
202 |         laplace_scale : ``float``, optional
203 |             Laplace regularization scale. Default: ``None``.
204 |         gaussian_scale : ``float``, optional
205 |             Gaussian regularization scale. Default: ``None``.
206 |         annealing : ``bool``, optional
207 |             Whether to use learning rate annealing. Default: ``True``.
208 |         noise : ``float``, optional
209 |             Noise scale. Default: ``None``.
210 |         shuffle : ``bool``, optional
211 |             Whether to shuffle samples. Default: ``True``.
212 |         clip_grad_norm : ``float``, optional
213 |             Maximum gradient norm. Default: 1.0.
214 |         verbose : ``int``, optional
215 |             Verbosity level. Default: 0.
216 | 
217 |         Returns
218 |         -------
219 |         history : ``dict``
220 |             Dictionary with loss history.
221 | 
222 |         Examples
223 |         --------
224 |         >>> import torch
225 |         >>> import pocomc
226 |         >>> flow = pocomc.Flow(2)
227 |         >>> x = torch.randn(100, 2)
228 |         >>> history = flow.fit(x)    
229 |         """
230 |         x = torch_double_to_float(x)
231 | 
232 |         n_samples, n_dim = x.shape
233 | 
234 |         if shuffle:
235 |             rand_indx = torch.randperm(n_samples)
236 |             x = x[rand_indx]
237 |             if weights is not None:
238 |                 weights = weights[rand_indx]
239 | 
240 |         if noise is not None:
241 |             min_dists = torch.empty(n_samples)
242 |             for i in range(n_samples):
243 |                 min_dist = torch.linalg.norm(x[i] - x, axis=1)
244 |                 min_dists[i] = torch.min(min_dist[min_dist > 0.0])
245 |             mean_min_dist = torch.mean(min_dist)
246 | 
247 |         if validation_split > 0.0:
248 |             x_train = x[:int(validation_split * n_samples)]
249 |             x_valid = x[int(validation_split * n_samples):]
250 |             if weights is None:
251 |                 train_dl = DataLoader(TensorDataset(x_train), batch_size, shuffle)
252 |                 val_dl = DataLoader(TensorDataset(x_valid), batch_size, shuffle)
253 |             else:
254 |                 weights_train = weights[:int(validation_split * n_samples)]
255 |                 weights_valid = weights[int(validation_split * n_samples):]
256 |                 train_dl = DataLoader(TensorDataset(x_train, weights_train), batch_size, shuffle)
257 |                 val_dl = DataLoader(TensorDataset(x_valid, weights_valid), batch_size, shuffle)
258 |             validation = True
259 |         else:
260 |             x_train = x
261 |             if weights is None:
262 |                 train_dl = DataLoader(TensorDataset(x_train), batch_size, shuffle)
263 |             else:
264 |                 weights_train = weights
265 |                 train_dl = DataLoader(TensorDataset(x_train, weights_train), batch_size, shuffle)
266 |             validation = False
267 | 
268 |         optimizer = torch.optim.AdamW(self.flow.parameters(), 
269 |                                       learning_rate,
270 |                                       weight_decay=weight_decay,
271 |                                       )
272 | 
273 |         if annealing:
274 |             scheduler = ReduceLROnPlateau(optimizer, 
275 |                                           mode='min',
276 |                                           factor=0.2,
277 |                                           patience=patience, 
278 |                                           threshold=0.0001, 
279 |                                           threshold_mode='abs', 
280 |                                           min_lr=1e-6,
281 |                                          )
282 | 
283 |         history = dict()  # Collects per-epoch loss
284 |         history['loss'] = []
285 |         history['val_loss'] = []
286 |         if validation:
287 |             monitor = 'val_loss'
288 |         else:
289 |             monitor = 'loss'
290 | 
291 |         best_epoch = 0
292 |         best_loss = np.inf
293 |         best_model = copy.deepcopy(self.flow.state_dict())
294 | 
295 |         start_time_sec = time.time()
296 | 
297 |         for epoch in range(epochs):
298 |             self.flow.train()
299 |             train_loss = 0.0
300 | 
301 |             for batch in train_dl:
302 | 
303 |                 optimizer.zero_grad()
304 |                 if noise is None:
305 |                     x_ = batch[0]
306 |                 else:
307 |                     x_ = batch[0] + noise * mean_min_dist * torch.randn_like(batch[0])
308 |                 if weights is None:
309 |                     loss = -self.flow().log_prob(x_).sum()
310 |                 else:
311 |                     loss = -self.flow().log_prob(x_) * batch[1] * 1000.0
312 |                     loss = loss.sum() / batch[1].sum()
313 | 
314 |                 if laplace_scale is not None or gaussian_scale is not None:
315 |                     loss -= regularization_loss(self.flow, laplace_scale, gaussian_scale)
316 | 
317 |                 loss.backward()
318 |                 torch.nn.utils.clip_grad_norm_(self.flow.parameters(), clip_grad_norm)
319 |                 optimizer.step()
320 | 
321 |                 train_loss += loss.data.item()
322 | 
323 |             train_loss = train_loss / len(train_dl.dataset)
324 | 
325 |             history['loss'].append(train_loss)
326 |             
327 |             if validation:
328 |                 self.flow.eval()
329 |                 val_loss = 0.0
330 | 
331 |                 for batch in val_dl:
332 | 
333 |                     if noise is None:
334 |                         x_ = batch[0]
335 |                     else:
336 |                         x_ = batch[0] + noise * mean_min_dist * torch.randn_like(batch[0])
337 |                     if weights is None:
338 |                         loss = -self.flow().log_prob(x_).sum()
339 |                     else:
340 |                         loss = -self.flow().log_prob(x_) * batch[1] * 1000.0
341 |                         loss = loss.sum() / batch[1].sum()
342 |                     
343 |                     if laplace_scale is not None or gaussian_scale is not None:
344 |                         loss -= regularization_loss(self.flow, laplace_scale, gaussian_scale)
345 | 
346 |                     val_loss += loss.data.item()
347 | 
348 |                 val_loss = val_loss / len(val_dl.dataset)
349 | 
350 |                 history['val_loss'].append(val_loss)
351 |         
352 |             if annealing and validation:
353 |                 scheduler.step(val_loss)
354 |             elif annealing:
355 |                 scheduler.step(train_loss)
356 | 
357 |             if verbose > 1:
358 |                 try:
359 |                     print('Epoch %3d/%3d, train loss: %5.2f, val loss: %5.2f' % (epoch + 1, epochs, train_loss, val_loss))
360 |                 except:  # TODO specify type of exception
361 |                     print('Epoch %3d/%3d, train loss: %5.2f' % (epoch + 1, epochs, train_loss))
362 | 
363 |             # Monitor loss
364 |             if history[monitor][-1] < best_loss:
365 |                 best_loss = history[monitor][-1]
366 |                 best_epoch = epoch
367 |                 best_model = copy.deepcopy(self.flow.state_dict())
368 | 
369 |             if epoch - best_epoch >= int(1.5 * patience):
370 |                 self.flow.load_state_dict(best_model)
371 |                 if verbose > 0:
372 |                     print('Finished early after %3d epochs' % best_epoch)
373 |                     print('Best loss achieved %5.2f' % best_loss)
374 |                 break
375 |         
376 |         if verbose > 0:
377 |             end_time_sec = time.time()
378 |             total_time_sec = end_time_sec - start_time_sec
379 |             time_per_epoch_sec = total_time_sec / epochs
380 |             print()
381 |             print('Time total:     %5.2f sec' % total_time_sec)
382 |             print('Time per epoch: %5.2f sec' % time_per_epoch_sec)
383 | 
384 |         return history
385 | 
386 | 
387 | def regularization_loss(model, laplace_scale=None, gaussian_scale=None):
388 |     """
389 |     Compute regularization loss.
390 | 
391 |     Parameters
392 |     ----------
393 |     model : ``zuko.flows.Flow``
394 |         Normalizing flow model.
395 |     laplace_scale : ``float``, optional
396 |         Laplace regularization scale. Default: ``None``.
397 |     gaussian_scale : ``float``, optional
398 |         Gaussian regularization scale. Default: ``None``.
399 |     
400 |     Returns
401 |     -------
402 |     Regularization loss.
403 |     """
404 |     total_laplace = 0.0
405 |     total_gaussian = 0.0
406 | 
407 |     for i, transform in enumerate(model.transforms):
408 |         if hasattr(transform, "hyper"):
409 |             for parameter_name, parameter in transform.hyper.named_parameters():
410 |                 if parameter_name.endswith('weight'):
411 |                     if laplace_scale is not None:
412 |                         total_laplace += parameter.abs().sum()
413 |                     if gaussian_scale is not None:
414 |                         total_gaussian += parameter.square().sum()
415 |     
416 |     total = 0.0
417 |     if laplace_scale is not None:
418 |         total += - total_laplace / laplace_scale
419 |     if gaussian_scale is not None:
420 |         total += - total_gaussian / (2.0 * gaussian_scale**2.0)
421 | 
422 |     return total


--------------------------------------------------------------------------------
/pocomc/geometry.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .student import fit_mvstud
 4 | from .tools import systematic_resample
 5 | 
 6 | class Geometry:
 7 |     """
 8 |     Geometry class for the POCOMC algorithm.
 9 | 
10 |     Attributes
11 |     ----------
12 |     normal_mean : array_like
13 |         Mean of the normal distribution.
14 |     normal_cov : array_like
15 |         Covariance matrix of the normal distribution.
16 |     t_mean : array_like
17 |         Mean of the t distribution.
18 |     t_cov : array_like
19 |         Covariance matrix of the t distribution.
20 |     t_nu : float
21 |         Degrees of freedom of the t distribution.
22 |     """
23 | 
24 |     def __init__(self):
25 |         self.normal_mean = None
26 |         self.normal_cov = None
27 |         self.t_mean = None
28 |         self.t_cov = None
29 |         self.t_nu = None
30 | 
31 |     def fit(self, theta, weights=None):
32 |         """
33 | 
34 |         Parameters
35 |         ----------
36 |         theta : array_like
37 |             Array of samples.
38 |         weights : array_like, optional
39 |             Array of weights. The default is None.
40 |         """
41 |         
42 |         # Learn normal distribution
43 |         if weights is None:
44 |             self.normal_mean = np.mean(theta, axis=0)
45 |             self.normal_cov = np.cov(theta.T)
46 |         else:
47 |             self.normal_mean = np.average(theta, axis=0, weights=weights)
48 |             self.normal_cov = np.cov(theta.T, aweights=weights)
49 | 
50 |         # Learn t distribution
51 |         if weights is not None:
52 |             idx_resampled = systematic_resample(len(theta), weights=weights)
53 |             theta_resampled = theta[idx_resampled]
54 |             self.t_mean, self.t_cov, self.t_nu = fit_mvstud(theta_resampled)
55 |         else:
56 |             self.t_mean, self.t_cov, self.t_nu = fit_mvstud(theta)
57 | 
58 |         if ~np.isfinite(self.t_nu):
59 |             self.t_nu = 1e6


--------------------------------------------------------------------------------
/pocomc/input_validation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def assert_array_2d(x: np.ndarray):
 5 |     if len(x.shape) != 2:
 6 |         raise ValueError(f"Input should have 2 dimensions, but got {len(x.shape)}")
 7 | 
 8 | 
 9 | def assert_array_1d(x: np.ndarray):
10 |     if len(x.shape) != 1:
11 |         raise ValueError(f"Input should have 1 dimension, but got {len(x.shape)}")
12 | 
13 | 
14 | def assert_equal_type(x, y):
15 |     if type(x) != type(y):
16 |         raise ValueError(f"Expected inputs to have equal types, but got {type(x)} and {type(y)}")
17 | 
18 | 
19 | def assert_arrays_equal_shape(x: np.ndarray,
20 |                               y: np.ndarray):
21 |     if x.shape != y.shape:
22 |         raise ValueError(f"Inputs should have equal shape, but got {x.shape} and {y.shape}")
23 | 
24 | 
25 | def assert_array_within_interval(x: np.ndarray,
26 |                                  left: np.ndarray,
27 |                                  right: np.ndarray,
28 |                                  left_open: bool = False,
29 |                                  right_open: bool = False):
30 |     left = left.copy()
31 |     left[np.isnan(left)] = -np.inf
32 | 
33 |     right = right.copy()
34 |     right[np.isnan(right)] = np.inf
35 | 
36 |     if left_open and right_open:
37 |         condition = (left < x) & (x < right)
38 |         interval_string = f'({left}, {right})'
39 |     elif left_open and not right_open:
40 |         condition = (left < x) & (x <= right)
41 |         interval_string = f'({left}, {right}]'
42 |     elif not left_open and right_open:
43 |         condition = (left <= x) & (x < right)
44 |         interval_string = f'[{left}, {right})'
45 |     else:
46 |         condition = (left <= x) & (x <= right)
47 |         interval_string = f'[{left}, {right}]'
48 | 
49 |     if not np.all(condition):
50 |         x_min = np.min(x)
51 |         x_max = np.max(x)
52 |         raise ValueError(f"Expected input to be within interval {interval_string}, "
53 |                          f"but got minimum = {x_min} and maximum = {x_max}")
54 | 
55 | 
56 | def assert_array_float(x: np.ndarray):
57 |     if not np.issubdtype(x.dtype, np.floating):
58 |         raise ValueError(f"Expected input to have dtype float, but got {x.dtype}")
59 | 


--------------------------------------------------------------------------------
/pocomc/parallel.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import atexit
  3 | 
  4 | MPI = None
  5 | 
  6 | def _import_mpi(use_dill=False):
  7 |     global MPI
  8 |     try:
  9 |         from mpi4py import MPI as _MPI
 10 |         if use_dill:
 11 |             import dill
 12 |             _MPI.pickle.__init__(dill.dumps, dill.loads, dill.HIGHEST_PROTOCOL)
 13 |         MPI = _MPI
 14 |     except:
 15 |         raise ImportError("Please install mpi4py")
 16 | 
 17 |     return MPI
 18 | 
 19 | 
 20 | class MPIPool:
 21 |     r"""A processing pool that distributes tasks using MPI.
 22 |     With this pool class, the master process distributes tasks to worker
 23 |     processes using an MPI communicator.
 24 |     
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     comm : :class:`mpi4py.MPI.Comm`, optional
 29 |         An MPI communicator to distribute tasks with. If ``None``, this uses
 30 |         ``MPI.COMM_WORLD`` by default.
 31 |     use_dill : bool, optional
 32 |         If ``True``, use dill for pickling objects. This is useful for
 33 |         pickling functions and objects that are not picklable by the default
 34 |         pickle module. Default is ``True``.
 35 | 
 36 |     Notes
 37 |     -----
 38 |     This implementation is inspired by @juliohm in `this module
 39 |     <https://github.com/juliohm/HUM/blob/master/pyhum/utils.py#L24>`_
 40 |     and was adapted from schwimmbad.
 41 |     """
 42 | 
 43 |     def __init__(self, comm=None, use_dill=True):
 44 | 
 45 |         global MPI
 46 |         if MPI is None:
 47 |             MPI = _import_mpi(use_dill=use_dill)
 48 | 
 49 |         self.comm = MPI.COMM_WORLD if comm is None else comm
 50 | 
 51 |         self.master = 0
 52 |         self.rank = self.comm.Get_rank()
 53 | 
 54 |         atexit.register(lambda: MPIPool.close(self))
 55 | 
 56 |         if not self.is_master():
 57 |             # workers branch here and wait for work
 58 |             self.wait()
 59 |             sys.exit(0)
 60 | 
 61 |         self.workers = set(range(self.comm.size))
 62 |         self.workers.discard(self.master)
 63 |         self.size = self.comm.Get_size() - 1
 64 | 
 65 |         if self.size == 0:
 66 |             raise ValueError("Tried to create an MPI pool, but there "
 67 |                              "was only one MPI process available. "
 68 |                              "Need at least two.")
 69 | 
 70 | 
 71 |     def wait(self):
 72 |         r"""Tell the workers to wait and listen for the master process. This is
 73 |         called automatically when using :meth:`MPIPool.map` and doesn't need to
 74 |         be called by the user.
 75 |         """
 76 |         if self.is_master():
 77 |             return
 78 | 
 79 |         status = MPI.Status()
 80 |         while True:
 81 |             task = self.comm.recv(source=self.master, tag=MPI.ANY_TAG, status=status)
 82 | 
 83 |             if task is None:
 84 |                 # Worker told to quit work
 85 |                 break
 86 | 
 87 |             func, arg = task
 88 |             result = func(arg)
 89 |             # Worker is sending answer with tag
 90 |             self.comm.ssend(result, self.master, status.tag)
 91 | 
 92 | 
 93 |     def map(self, worker, tasks):
 94 |         r"""Evaluate a function or callable on each task in parallel using MPI.
 95 |         The callable, ``worker``, is called on each element of the ``tasks``
 96 |         iterable. The results are returned in the expected order.
 97 |         
 98 |         Parameters
 99 |         ----------
100 |         worker : callable
101 |             A function or callable object that is executed on each element of
102 |             the specified ``tasks`` iterable. This object must be picklable
103 |             (i.e. it can't be a function scoped within a function or a
104 |             ``lambda`` function). This should accept a single positional
105 |             argument and return a single object.
106 |         tasks : iterable
107 |             A list or iterable of tasks. Each task can be itself an iterable
108 |             (e.g., tuple) of values or data to pass in to the worker function.
109 | 
110 |         Returns
111 |         -------
112 |         results : list
113 |             A list of results from the output of each ``worker()`` call.
114 |         """
115 | 
116 |         # If not the master just wait for instructions.
117 |         if not self.is_master():
118 |             self.wait()
119 |             return
120 | 
121 | 
122 |         workerset = self.workers.copy()
123 |         tasklist = [(tid, (worker, arg)) for tid, arg in enumerate(tasks)]
124 |         resultlist = [None] * len(tasklist)
125 |         pending = len(tasklist)
126 | 
127 |         while pending:
128 |             if workerset and tasklist:
129 |                 worker = workerset.pop()
130 |                 taskid, task = tasklist.pop()
131 |                 # "Sent task %s to worker %s with tag %s"
132 |                 self.comm.send(task, dest=worker, tag=taskid)
133 | 
134 |             if tasklist:
135 |                 flag = self.comm.Iprobe(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG)
136 |                 if not flag:
137 |                     continue
138 |             else:
139 |                 self.comm.Probe(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG)
140 | 
141 |             status = MPI.Status()
142 |             result = self.comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG,
143 |                                     status=status)
144 |             worker = status.source
145 |             taskid = status.tag
146 | 
147 |             # "Master received from worker %s with tag %s"
148 | 
149 |             workerset.add(worker)
150 |             resultlist[taskid] = result
151 |             pending -= 1
152 | 
153 |         return resultlist
154 | 
155 | 
156 |     def close(self):
157 |         """ Tell all the workers to quit."""
158 |         if self.is_worker():
159 |             return
160 | 
161 |         for worker in self.workers:
162 |             self.comm.send(None, worker, 0)
163 | 
164 | 
165 |     def is_master(self):
166 |         return self.rank == 0
167 | 
168 | 
169 |     def is_worker(self):
170 |         return self.rank != 0
171 | 
172 | 
173 |     def __enter__(self):
174 |         return self
175 | 
176 | 
177 |     def __exit__(self, *args):
178 |         self.close()


--------------------------------------------------------------------------------
/pocomc/particles.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | import numpy as np
  3 | 
  4 | class Particles:
  5 |     """
  6 |     Class to store the particles and their associated weights.
  7 | 
  8 |     Parameters
  9 |     ----------
 10 |     n_particles : int
 11 |         Number of particles.
 12 |     n_dim : int
 13 |         Dimension of the parameter space.
 14 |     ess_threshold : float, optional
 15 |         Threshold for the effective sample size. If the effective sample size
 16 |         is below this threshold, the weights are set to zero. This is useful
 17 |         for the case where the effective sample size is very small, but not
 18 |         exactly zero, due to numerical errors.
 19 |     
 20 |     Attributes
 21 |     ----------
 22 |     n_particles : int
 23 |         Number of particles.
 24 |     n_dim : int
 25 |         Dimension of the parameter space.
 26 |     ess_threshold : float, optional
 27 |         Threshold for the effective sample size. If the effective sample size
 28 |         is below this threshold, the weights are set to zero. This is useful
 29 |         for the case where the effective sample size is very small, but not
 30 |         exactly zero, due to numerical errors.
 31 |     u : numpy.ndarray
 32 |         Array of shape (n_particles, n_dim) containing the particles.
 33 |     logdetj : numpy.ndarray
 34 |         Array of shape (n_particles,) containing the log-determinant of the
 35 |         Jacobian of the transformation from the unit hypercube to the
 36 |         parameter space.
 37 |     logl : numpy.ndarray
 38 |         Array of shape (n_particles,) containing the log-likelihoods.
 39 |     logp : numpy.ndarray
 40 |         Array of shape (n_particles,) containing the log-priors.
 41 |     logw : numpy.ndarray
 42 |         Array of shape (n_particles,) containing the log-weights.
 43 |     iter : numpy.ndarray
 44 |         Array of shape (n_particles,) containing the iteration number of each
 45 |         particle.
 46 |     logz : numpy.ndarray
 47 |         Array of shape (n_particles,) containing the log-evidence of each
 48 |         particle.
 49 |     calls : numpy.ndarray
 50 |         Array of shape (n_particles,) containing the number of likelihood
 51 |         evaluations of each particle.
 52 |     steps : numpy.ndarray
 53 |         Array of shape (n_particles,) containing the number of steps of each
 54 |         particle.
 55 |     efficiency : numpy.ndarray
 56 |         Array of shape (n_particles,) containing the efficiency of each
 57 |         particle.
 58 |     ess : numpy.ndarray
 59 |         Array of shape (n_particles,) containing the effective sample size of
 60 |         each particle.
 61 |     accept : numpy.ndarray
 62 |         Array of shape (n_particles,) containing the acceptance rate of each
 63 |         particle.
 64 |     beta : numpy.ndarray
 65 |         Array of shape (n_particles,) containing the inverse temperature of
 66 |         each particle.
 67 |     """
 68 | 
 69 |     def __init__(self, n_particles, n_dim):
 70 |         self.n_particles = n_particles
 71 |         self.n_dim = n_dim
 72 | 
 73 |         self.past = dict(
 74 |             u = [],
 75 |             x = [],
 76 |             logdetj = [],
 77 |             logl = [],
 78 |             logp = [],
 79 |             logw = [],
 80 |             blobs = [],
 81 |             iter = [],
 82 |             logz = [],
 83 |             calls = [],
 84 |             steps = [],
 85 |             efficiency = [],
 86 |             ess = [],
 87 |             accept = [],
 88 |             beta = [],
 89 |         )
 90 | 
 91 |         self.results_dict = None
 92 | 
 93 |     def update(self, data):
 94 |         """
 95 |         Update the particles with the given data.
 96 | 
 97 |         Parameters
 98 |         ----------
 99 |         data : dict
100 |             Dictionary containing the data to be added to the particles.
101 |         
102 |         Notes
103 |         -----
104 |         The dictionary must contain the following keys:
105 |             u : numpy.ndarray
106 |                 Array of shape (n_particles, n_dim) containing the particles.
107 |             logdetj : numpy.ndarray
108 |                 Array of shape (n_particles,) containing the log-determinant
109 |                 of the Jacobian of the transformation from the unit hypercube
110 |                 to the parameter space.
111 |             logl : numpy.ndarray
112 |                 Array of shape (n_particles,) containing the log-likelihoods.
113 |             logp : numpy.ndarray
114 |                 Array of shape (n_particles,) containing the log-priors.
115 |             logw : numpy.ndarray
116 |                 Array of shape (n_particles,) containing the log-weights.
117 |             blobs : numpy.ndarray
118 |                 Array of shape (n_particles,) containing the blobs (derived parameters).
119 |             iter : numpy.ndarray
120 |                 Array of shape (n_particles,) containing the iteration number
121 |                 of each particle.
122 |             logz : numpy.ndarray
123 |                 Array of shape (n_particles,) containing the log-evidence of
124 |                 each particle.
125 |             calls : numpy.ndarray
126 |                 Array of shape (n_particles,) containing the number of
127 |                 likelihood evaluations of each particle.
128 |             steps : numpy.ndarray
129 |                 Array of shape (n_particles,) containing the number of steps
130 |                 of each particle.
131 |             efficiency : numpy.ndarray
132 |                 Array of shape (n_particles,) containing the efficiency of
133 |                 each particle.
134 |             ess : numpy.ndarray
135 |                 Array of shape (n_particles,) containing the effective sample
136 |                 size of each particle.
137 |             accept : numpy.ndarray
138 |                 Array of shape (n_particles,) containing the acceptance rate
139 |                 of each particle.
140 |             beta : numpy.ndarray
141 |                 Array of shape (n_particles,) containing the inverse
142 |                 temperature of each particle.
143 |         """
144 |         for key in data.keys():
145 |             if key in self.past.keys():
146 |                 value = data.get(key)
147 |                 # Save to past states
148 |                 self.past.get(key).append(value)
149 | 
150 |     def pop(self, key):
151 |         """
152 |         Remove the last element of the given key.
153 | 
154 |         Parameters
155 |         ----------
156 |         key : str
157 |             Key of the element to be removed.
158 |         
159 |         Notes
160 |         -----
161 |         This method is useful to remove the last element of the particles
162 |         after the resampling step.
163 |         """
164 |         _ = self.past.get(key).pop()
165 | 
166 |     def get(self, key, index=None, flat=False):
167 |         """
168 |         Get the element of the given key.
169 | 
170 |         Parameters
171 |         ----------
172 |         key : str
173 |             Key of the element to be returned.
174 |         index : int, optional
175 |             Index of the element to be returned. If None, all elements are
176 |             returned.
177 |         flat : bool, optional
178 |             If True, the elements are returned as a flattened array. Otherwise,
179 |             the elements are returned as a numpy.ndarray.
180 |         
181 |         Returns
182 |         -------
183 |         element : numpy.ndarray
184 |             Array of shape (n_particles,) or (n_particles, n_dim) containing
185 |             the elements of the given key.
186 |         
187 |         Notes
188 |         -----
189 |         If index is None, the elements are returned as a numpy.ndarray. If
190 |         index is not None, the elements are returned as a numpy.ndarray with
191 |         shape (n_dim,). If flat is True, the elements are returned as a
192 |         flattened array.
193 | 
194 |         Examples
195 |         --------
196 |         >>> particles = Particles(n_particles=10, n_dim=2)
197 |         >>> particles.update(dict(u=np.random.randn(10,2)))
198 |         >>> particles.get("u").shape
199 |         (10, 2)
200 |         >>> particles.get("u", index=0).shape
201 |         (2,)
202 |         >>> particles.get("u", index=0, flat=True).shape
203 |         (2,)
204 |         >>> particles.get("u", index=None, flat=True).shape
205 |         (20,)
206 |         """
207 |         if index is None:
208 |             if flat:
209 |                 return np.concatenate(self.past.get(key))
210 |             else:
211 |                 return np.asarray(self.past.get(key))
212 |         else:
213 |             return self.past.get(key)[index]
214 |         
215 |     def compute_logw_and_logz(self, beta_final=1.0, normalize=True):
216 | 
217 |         logz = self.get("logz")
218 |         logl = self.get("logl")
219 |         beta = self.get("beta")
220 | 
221 |         A = logl * beta_final
222 |         b = np.array([logl * beta[i] - logz[i] for i in range(len(beta))])
223 |         B = np.logaddexp.reduce(b, axis=0) - np.log(len(beta))
224 |         logw = A - B
225 |         logw = np.concatenate(logw)
226 |         logz_new = np.logaddexp.reduce(logw) - np.log(len(logw))
227 | 
228 |         if normalize:
229 |             logw -= np.logaddexp.reduce(logw)
230 | 
231 |         return logw, logz_new
232 |     
233 |     def compute_results(self):
234 |         """
235 |         Compute the results of the particles.
236 | 
237 |         Returns
238 |         -------
239 |         results_dict : dict
240 |             Dictionary containing the results of the particles.
241 | 
242 |         Notes
243 |         -----
244 |         The dictionary contains the following keys:
245 |             u : numpy.ndarray
246 |                 Array of shape (n_particles, n_dim) containing the particles.
247 |             logdetj : numpy.ndarray
248 |                 Array of shape (n_particles,) containing the log-determinant
249 |                 of the Jacobian of the transformation from the unit hypercube
250 |                 to the parameter space.
251 |             logl : numpy.ndarray
252 |                 Array of shape (n_particles,) containing the log-likelihoods.
253 |             logp : numpy.ndarray
254 |                 Array of shape (n_particles,) containing the log-priors.
255 |             logw : numpy.ndarray
256 |                 Array of shape (n_particles,) containing the log-weights.
257 |             blobs : numpy.ndarray
258 |                 Array of shape (n_particles,) containing the blobs (derived parameters).
259 |             iter : numpy.ndarray
260 |                 Array of shape (n_particles,) containing the iteration number
261 |                 of each particle.
262 |             logz : numpy.ndarray
263 |                 Array of shape (n_particles,) containing the log-evidence of
264 |                 each particle.
265 |             calls : numpy.ndarray
266 |                 Array of shape (n_particles,) containing the number of
267 |                 likelihood evaluations of each particle.
268 |             steps : numpy.ndarray
269 |                 Array of shape (n_particles,) containing the number of steps
270 |                 of each particle.
271 |             efficiency : numpy.ndarray
272 |                 Array of shape (n_particles,) containing the efficiency of
273 |                 each particle.
274 |             ess : numpy.ndarray
275 |                 Array of shape (n_particles,) containing the effective sample
276 |                 size of each particle.
277 |             accept : numpy.ndarray
278 |                 Array of shape (n_particles,) containing the acceptance rate
279 |                 of each particle.
280 |             beta : numpy.ndarray
281 |                 Array of shape (n_particles,) containing the inverse
282 |                 temperature of each particle.
283 | 
284 |         Examples
285 |         --------
286 |         >>> particles = Particles(n_particles=10, n_dim=2)
287 |         >>> particles.update(dict(u=np.random.randn(10,2)))
288 |         >>> particles.compute_results().keys()
289 |         dict_keys(['u', 'logdetj', 'logl', 'logp', 'logw', 'blobs', 'iter', 'logz', 'calls', 'steps', 'efficiency', 'ess', 'accept', 'beta'])
290 |         """
291 |         if self.results_dict is None:
292 |             self.results_dict = dict()
293 |             for key in self.past.keys():
294 |                 self.results_dict[key] = self.get(key)
295 | 
296 |             logw, _ = self.compute_logw_and_logz(1.0)
297 | 
298 |             self.results_dict["logw"] = logw
299 |             #self.results_dict["ess"] = np.exp(log_ess)
300 | 
301 |         return self.results_dict
302 | 
303 | 


--------------------------------------------------------------------------------
/pocomc/prior.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | class Prior:
  4 |     """
  5 |     A class for priors.
  6 | 
  7 |     Parameters
  8 |     ----------
  9 |     dists : list of scipy.stats distributions
 10 |         A list of distributions for each parameter. The length of the list
 11 |         determines the dimension of the prior.
 12 | 
 13 |     Attributes
 14 |     ----------
 15 |     dists : list of scipy.stats distributions
 16 |         A list of distributions for each parameter. The length of the list
 17 |         determines the dimension of the prior.
 18 |     bounds : ndarray
 19 |         An array of shape (dim, 2) containing the lower and upper bounds for
 20 |         each parameter.
 21 |     dim : int
 22 |         The dimension of the prior.
 23 |     
 24 |     Methods
 25 |     -------
 26 |     logpdf(x)
 27 |         Returns the log of the probability density function evaluated at x.
 28 |     rvs(size=1)
 29 |         Returns a random sample from the prior.
 30 |     
 31 |     Examples
 32 |     --------
 33 |     >>> import numpy as np
 34 |     >>> from scipy.stats import norm, uniform
 35 |     >>> from pocomc.prior import Prior
 36 |     >>> dists = [norm(loc=0, scale=1), uniform(loc=0, scale=1)]
 37 |     >>> prior = Prior(dists)
 38 |     >>> prior.logpdf(np.array([0, 0]))
 39 |     -1.8378770664093453
 40 |     >>> prior.rvs()
 41 |     array([0.417022  , 0.72032449])
 42 |     >>> prior.bounds
 43 |     array([[-inf,  inf],
 44 |            [ 0. ,  1. ]])
 45 |     >>> prior.dim
 46 |     2
 47 | 
 48 |     Notes
 49 |     -----
 50 |     The logpdf method is implemented as a sum of the logpdf methods of the
 51 |     individual distributions. This is equivalent to assuming that the
 52 |     parameters are independent.
 53 | 
 54 |     The rvs method is implemented by sampling from each distribution
 55 |     independently and then transposing the result. This is equivalent to
 56 |     assuming that the parameters are independent.
 57 | 
 58 |     The bounds property is implemented by calling the support method of each
 59 |     distribution. This is equivalent to assuming that the parameters are
 60 |     independent.
 61 | 
 62 |     The dim property is implemented by returning the length of the dists
 63 |     attribute. This is equivalent to assuming that the parameters are
 64 |     independent.
 65 |     """
 66 | 
 67 |     def __init__(self, dists=None):
 68 |         self.dists = dists
 69 | 
 70 |     def logpdf(self, x):
 71 |         """
 72 |         Returns the log of the probability density function evaluated at x.
 73 | 
 74 |         Parameters
 75 |         ----------
 76 |         x : ndarray
 77 |             An array of shape (n, dim) containing n samples of the parameters.
 78 |         
 79 |         Returns
 80 |         -------
 81 |         logp : ndarray
 82 |             An array of shape (n,) containing the log of the probability
 83 |             density function evaluated at each sample.
 84 |         
 85 |         Examples
 86 |         --------
 87 |         >>> import numpy as np
 88 |         >>> from scipy.stats import norm, uniform
 89 |         >>> from pocomc.prior import Prior
 90 |         >>> dists = [norm(loc=0, scale=1), uniform(loc=0, scale=1)]
 91 |         >>> prior = Prior(dists)
 92 |         >>> prior.logpdf(np.array([0, 0]))
 93 |         -1.8378770664093453
 94 |         >>> prior.logpdf(np.array([[0, 0], [0, 0]]))
 95 |         array([-1.83787707, -1.83787707])
 96 |         """
 97 |         logp = np.zeros(len(x))
 98 |         for i, dist in enumerate(self.dists): 
 99 |             logp += dist.logpdf(x[:,i])
100 |         return logp
101 |     
102 |     def rvs(self, size=1):
103 |         """
104 |         Returns a random sample from the prior.
105 | 
106 |         Parameters
107 |         ----------
108 |         size : int, optional
109 |             The number of samples to return. The default is 1.
110 |         
111 |         Returns
112 |         -------
113 |         samples : ndarray
114 |             An array of shape (size, dim) containing the samples.
115 |     
116 |         Examples
117 |         --------
118 |         >>> import numpy as np
119 |         >>> from scipy.stats import norm, uniform
120 |         >>> from pocomc.prior import Prior
121 |         >>> dists = [norm(loc=0, scale=1), uniform(loc=0, scale=1)]
122 |         >>> prior = Prior(dists)
123 |         >>> prior.rvs()
124 |         array([0.417022  , 0.72032449])
125 |         >>> prior.rvs(size=2)
126 |         array([[0.417022  , 0.72032449],
127 |                [0.00011438, 0.30233257]])
128 |         """
129 |         samples = []
130 |         for dist in self.dists:
131 |             samples.append(dist.rvs(size=size))
132 |         return np.transpose(samples)
133 |     
134 |     @property
135 |     def bounds(self):
136 |         """
137 |         An array of shape (dim, 2) containing the lower and upper bounds for
138 |         each parameter.
139 | 
140 |         Examples
141 |         --------
142 |         >>> import numpy as np
143 |         >>> from scipy.stats import norm, uniform
144 |         >>> from pocomc.prior import Prior
145 |         >>> dists = [norm(loc=0, scale=1), uniform(loc=0, scale=1)]
146 |         >>> prior = Prior(dists)
147 |         >>> prior.bounds
148 |         array([[-inf,  inf],
149 |                [ 0. ,  1. ]])
150 |         """
151 |         bounds = []
152 |         for dist in self.dists:
153 |             bounds.append(dist.support())
154 |         return np.array(bounds)
155 |     
156 |     @property
157 |     def dim(self):
158 |         """
159 |         The dimension of the prior.
160 | 
161 |         Examples
162 |         --------
163 |         >>> import numpy as np
164 |         >>> from scipy.stats import norm, uniform
165 |         >>> from pocomc.prior import Prior
166 |         >>> dists = [norm(loc=0, scale=1), uniform(loc=0, scale=1)]
167 |         >>> prior = Prior(dists)
168 |         >>> prior.dim
169 |         2
170 |         """
171 |         return len(self.dists)


--------------------------------------------------------------------------------
/pocomc/scaler.py:
--------------------------------------------------------------------------------
  1 | from typing import Union, List
  2 | 
  3 | import numpy as np
  4 | from scipy.special import erf, erfinv
  5 | 
  6 | from .input_validation import assert_array_float, assert_array_within_interval
  7 | 
  8 | class Reparameterize:
  9 |     """
 10 |     Class that reparameterises the model using change-of-variables parameter transformations.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     n_dim : ``int``
 15 |         Dimensionality of sampling problem
 16 |     bounds : ``np.ndarray`` or ``list`` or ``None``
 17 |         Parameter bounds
 18 |     periodic : ``list``
 19 |         List of indices corresponding to parameters with periodic boundary conditions
 20 |     reflective : ``list``
 21 |         List of indices corresponding to parameters with reflective boundary conditions
 22 |     transform : ``str``
 23 |         Type of transform to use for bounded parameters. Options are ``"probit"``
 24 |         (default) and ``"logit"``.
 25 |     scale : ``bool``
 26 |         Rescale parameters to zero mean and unit variance (default is true)
 27 |     diagonal : ``bool``
 28 |         Use diagonal transformation (i.e. ignore covariance) (default is true)
 29 | 
 30 |     Examples
 31 |     --------
 32 |     >>> import numpy as np
 33 |     >>> from pocomc.reparameterize import Reparameterize
 34 |     >>> bounds = np.array([[0, 1], [0, 1]])
 35 |     >>> reparam = Reparameterize(2, bounds)
 36 |     >>> x = np.array([[0.5, 0.5], [0.5, 0.5]])
 37 |     >>> reparam.forward(x)
 38 |     array([[0., 0.],
 39 |            [0., 0.]])
 40 |     >>> u = np.array([[0, 0], [0, 0]])
 41 |     >>> reparam.inverse(u)
 42 |     (array([[0.5, 0.5],
 43 |            [0.5, 0.5]]), array([0., 0.]))
 44 |     """
 45 |     def __init__(self,
 46 |                  n_dim: int,
 47 |                  bounds: Union[np.ndarray, list] = None,
 48 |                  periodic: List[int] = None,
 49 |                  reflective: List[int] = None,
 50 |                  transform: str = "probit",
 51 |                  scale: bool = True,
 52 |                  diagonal: bool = True):
 53 | 
 54 |         self.ndim = n_dim
 55 | 
 56 |         if bounds is None:
 57 |             bounds = np.full((self.ndim, 2), np.inf)
 58 |         elif len(bounds) == 2 and not np.shape(bounds) == (2, 2):
 59 |             bounds = np.tile(np.array(bounds, dtype=np.float32).reshape(2, 1), self.ndim).T
 60 |         assert_array_float(bounds)
 61 | 
 62 |         self.low = bounds.T[0]
 63 |         self.high = bounds.T[1]
 64 | 
 65 |         self.periodic = periodic
 66 |         self.reflective = reflective
 67 | 
 68 |         if transform not in ["logit", "probit"]:
 69 |             raise ValueError("Please provide a valid transformation function (e.g. logit or probit)")
 70 |         else:
 71 |             self.transform = transform
 72 | 
 73 |         self.mu = None
 74 |         self.sigma = None
 75 |         self.cov = None
 76 |         self.L = None
 77 |         self.L_inv = None
 78 |         self.log_det_L = None
 79 |         self.scale = scale
 80 |         self.diagonal = diagonal
 81 | 
 82 |         self._create_masks()
 83 | 
 84 |     def apply_boundary_conditions_x(self, x: np.ndarray):
 85 |         """
 86 |         Apply boundary conditions (i.e. periodic or reflective) to input ``x``.
 87 |         The first kind include phase parameters that might be periodic
 88 |         e.g. on a range ``[0,2*np.pi]``. The latter can arise in cases
 89 |         where parameters are ratios where ``a/b`` and  ``b/a`` are equivalent.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         x : np.ndarray
 94 |             Input array
 95 |         
 96 |         Returns
 97 |         -------
 98 |         Transformed input
 99 |         """
100 |         if (self.periodic is None) and (self.reflective is None):
101 |             return x
102 |         elif self.periodic is None:
103 |             return self._apply_reflective_boundary_conditions_x(x)
104 |         elif self.reflective is None:
105 |             return self._apply_periodic_boundary_conditions_x(x)
106 |         else:
107 |             return self._apply_reflective_boundary_conditions_x(self._apply_periodic_boundary_conditions_x(x))
108 | 
109 |     def _apply_periodic_boundary_conditions_x(self, x: np.ndarray):
110 |         """
111 |         Apply periodic boundary conditions to input ``x``.
112 |         This can be useful for phase parameters that might be periodic
113 |         e.g. on a range ``[0,2*np.pi]``
114 |         
115 |         Parameters
116 |         ----------
117 |         x : np.ndarray
118 |             Input array
119 |         
120 |         Returns
121 |         -------
122 |         Transformed input.
123 |         """
124 |         if self.periodic is not None:
125 |             x = x.copy()
126 |             for i in self.periodic:
127 |                 for j in range(len(x)):
128 |                     while x[j, i] > self.high[i]:
129 |                         x[j, i] = self.low[i] + x[j, i] - self.high[i]
130 |                     while x[j, i] < self.low[i]:
131 |                         x[j, i] = self.high[i] + x[j, i] - self.low[i]
132 |         return x
133 | 
134 |     def _apply_reflective_boundary_conditions_x(self, x: np.ndarray):
135 |         """
136 |         Apply reflective boundary conditions to input ``x``. This can arise in cases
137 |         where parameters are ratios where ``a/b`` and  ``b/a`` are equivalent.
138 |         
139 |         Parameters
140 |         ----------
141 |         x : np.ndarray
142 |             Input array
143 |         
144 |         Returns
145 |         -------
146 |         Transformed input.
147 |         """
148 |         if self.reflective is not None:
149 |             x = x.copy()
150 |             for i in self.reflective:
151 |                 for j in range(len(x)):
152 |                     while x[j, i] > self.high[i]:
153 |                         x[j, i] = self.high[i] - x[j, i] + self.high[i]
154 |                     while x[j, i] < self.low[i]:
155 |                         x[j, i] = self.low[i] + self.low[i] - x[j, i]
156 | 
157 |         return x
158 | 
159 |     def fit(self, x: np.ndarray):
160 |         """
161 |         Learn mean and standard deviation using for rescaling.
162 |         
163 |         Parameters
164 |         ----------
165 |         x : np.ndarray
166 |             Input data used for training.
167 |         """
168 |         assert_array_within_interval(x, self.low, self.high)
169 | 
170 |         u = self._forward(x)
171 |         self.mu = np.mean(u, axis=0)
172 |         if self.diagonal:
173 |             self.sigma = np.std(u, axis=0)
174 |         else:
175 |             self.cov = np.cov(u.T)
176 |             self.L = np.linalg.cholesky(self.cov)
177 |             self.L_inv = np.linalg.inv(self.L)
178 |             self.log_det_L = np.linalg.slogdet(self.L)[1]
179 | 
180 |     def forward(self, x: np.ndarray, check_input=True):
181 |         """
182 |         Forward transformation (both logit/probit for bounds and affine for all parameters).
183 | 
184 |         Parameters
185 |         ----------
186 |         x : np.ndarray
187 |             Input data
188 |         check_input : bool
189 |             Check if input is within bounds (default: True)
190 |         Returns
191 |         -------
192 |         u : np.ndarray
193 |             Transformed input data
194 |         """
195 |         if check_input:
196 |             assert_array_within_interval(x, self.low, self.high)
197 | 
198 |         u = self._forward(x)
199 |         if self.scale:
200 |             u = self._forward_affine(u)
201 | 
202 |         return u
203 | 
204 |     def inverse(self, u: np.ndarray):
205 |         """
206 |         Inverse transformation (both logit^-1/probit^-1 for bounds and affine for all parameters).
207 | 
208 |         Parameters
209 |         ----------
210 |         u : np.ndarray
211 |             Input data
212 |         Returns
213 |         -------
214 |         x : np.ndarray
215 |             Transformed input data
216 |         log_det_J : np.array
217 |             Logarithm of determinant of Jacobian matrix transformation.
218 |         """
219 |         if self.scale:
220 |             x, log_det_J = self._inverse_affine(u)
221 |             x, log_det_J_prime = self._inverse(x)
222 |             log_det_J += log_det_J_prime
223 |         else:
224 |             x, log_det_J = self._inverse(u)
225 | 
226 |         return x, log_det_J
227 | 
228 |     def _forward(self, x: np.ndarray):
229 |         """
230 |         Forward transformation (only logit/probit for bounds).
231 | 
232 |         Parameters
233 |         ----------
234 |         x : np.ndarray
235 |             Input data
236 |         Returns
237 |         -------
238 |         u : np.ndarray
239 |             Transformed input data
240 |         """
241 |         u = np.empty(x.shape)
242 |         u[:, self.mask_none] = self._forward_none(x)
243 |         u[:, self.mask_left] = self._forward_left(x)
244 |         u[:, self.mask_right] = self._forward_right(x)
245 |         u[:, self.mask_both] = self._forward_both(x)
246 | 
247 |         return u
248 | 
249 |     def _inverse(self, u: np.ndarray):
250 |         """
251 |         Inverse transformation (only logit^-1/probit^-1 for bounds).
252 | 
253 |         Parameters
254 |         ----------
255 |         u : np.ndarray
256 |             Input data
257 |         Returns
258 |         -------
259 |         x : np.ndarray
260 |             Transformed input data
261 |         log_det_J : np.array
262 |             Logarithm of determinant of Jacobian matrix transformation.
263 |         """
264 |         x = np.empty(u.shape)
265 |         J = np.empty(u.shape)
266 | 
267 |         x[:, self.mask_none], J[:, self.mask_none] = self._inverse_none(u)
268 |         x[:, self.mask_left], J[:, self.mask_left] = self._inverse_left(u)
269 |         x[:, self.mask_right], J[:, self.mask_right] = self._inverse_right(u)
270 |         x[:, self.mask_both], J[:, self.mask_both] = self._inverse_both(u)
271 | 
272 |         log_det_J = np.sum(J, axis=1)
273 | 
274 |         return x, log_det_J
275 | 
276 |     def _forward_affine(self, x: np.ndarray):
277 |         """
278 |         Forward affine transformation.
279 | 
280 |         Parameters
281 |         ----------
282 |         x : np.ndarray
283 |             Input data
284 |         Returns
285 |         -------
286 |         Transformed input data
287 |         """
288 |         if self.diagonal:
289 |             return (x - self.mu) / self.sigma
290 |         else:
291 |             return np.array([np.dot(self.L_inv, xi - self.mu) for xi in x])
292 | 
293 |     def _inverse_affine(self, u: np.ndarray):
294 |         """
295 |         Inverse affine transformation.
296 | 
297 |         Parameters
298 |         ----------
299 |         u : np.ndarray
300 |             Input data
301 |         Returns
302 |         -------
303 |         x : np.ndarray
304 |             Transformed input data
305 |         J : np.ndarray
306 |             Diagonal of Jacobian matrix.
307 |         """
308 |         if self.diagonal:
309 |             log_det_J = np.sum(np.log(self.sigma))
310 |             return self.mu + self.sigma * u, log_det_J * np.ones(len(u))
311 |         else:
312 |             x = self.mu + np.array([np.dot(self.L, ui) for ui in u])
313 |             return x, self.log_det_L * np.ones(len(u))
314 | 
315 |     def _forward_left(self, x: np.ndarray):
316 |         """
317 |         Forward transformation for bounded parameters (only low).
318 | 
319 |         Parameters
320 |         ----------
321 |         x : np.ndarray
322 |             Input data
323 |         Returns
324 |         -------
325 |         Transformed input data
326 |         """
327 |         return np.log(x[:, self.mask_left] - self.low[self.mask_left])
328 | 
329 |     def _inverse_left(self, u: np.ndarray):
330 |         """
331 |         Inverse transformation for bounded parameters (only low).
332 | 
333 |         Parameters
334 |         ----------
335 |         u : np.ndarray
336 |             Input data
337 |         Returns
338 |         -------
339 |         x : np.ndarray
340 |             Transformed input data
341 |         J : np.array
342 |             Diagonal of Jacobian matrix.
343 |         """
344 |         p = np.exp(u[:, self.mask_left])
345 | 
346 |         return np.exp(u[:, self.mask_left]) + self.low[self.mask_left], u[:, self.mask_left]
347 | 
348 |     def _forward_right(self, x: np.ndarray):
349 |         """
350 |         Forward transformation for bounded parameters (only high).
351 | 
352 |         Parameters
353 |         ----------
354 |         x : np.ndarray
355 |             Input data
356 |         Returns
357 |         -------
358 |         Transformed input data
359 |         """
360 |         return np.log(self.high[self.mask_right] - x[:, self.mask_right])
361 | 
362 |     def _inverse_right(self, u: np.ndarray):
363 |         """
364 |         Inverse transformation for bounded parameters (only high).
365 | 
366 |         Parameters
367 |         ----------
368 |         u : np.ndarray
369 |             Input data
370 |         Returns
371 |         -------
372 |         x : np.ndarray
373 |             Transformed input data
374 |         J : np.array
375 |             Diagonal of Jacobian matrix.
376 |         """
377 | 
378 |         return self.high[self.mask_right] - np.exp(u[:, self.mask_right]), u[:, self.mask_right]
379 | 
380 |     def _forward_both(self, x: np.ndarray):
381 |         """
382 |         Forward transformation for bounded parameters (both low and high).
383 | 
384 |         Parameters
385 |         ----------
386 |         x : np.ndarray
387 |             Input data
388 |         Returns
389 |         -------
390 |         Transformed input data
391 |         """
392 |         p = (x[:, self.mask_both] - self.low[self.mask_both]) / (self.high[self.mask_both] - self.low[self.mask_both])
393 |         np.clip(p, 1e-13, 1.0 - 1e-13)
394 | 
395 |         if self.transform == "logit":
396 |             u = np.log(p / (1.0 - p))
397 |         elif self.transform == "probit":
398 |             u = np.sqrt(2.0) * erfinv(2.0 * p - 1.0)
399 | 
400 |         return u
401 | 
402 |     def _inverse_both(self, u: np.ndarray):
403 |         """
404 |         Inverse transformation for bounded parameters (both low and high).
405 | 
406 |         Parameters
407 |         ----------
408 |         u : np.ndarray
409 |             Input data
410 |         Returns
411 |         -------
412 |         x : np.ndarray
413 |             Transformed input data
414 |         J : np.array
415 |             Diagonal of Jacobian matrix.
416 |         """
417 |         if self.transform == "logit":
418 |             p = np.exp(-np.logaddexp(0, -u[:, self.mask_both]))
419 |             x = p * (self.high[self.mask_both] - self.low[self.mask_both]) + self.low[self.mask_both]
420 |             J = np.log(self.high[self.mask_both] - self.low[self.mask_both]) + np.log(p) + np.log(1.0 - p)
421 |         elif self.transform == "probit":
422 |             p = ( erf(u[:, self.mask_both] / np.sqrt(2.0)) + 1.0 ) / 2.0
423 |             x = p * (self.high[self.mask_both] - self.low[self.mask_both]) + self.low[self.mask_both]
424 |             J = np.log(self.high[self.mask_both] - self.low[self.mask_both]) + (-u[:, self.mask_both]**2.0 / 2.0) - np.log(np.sqrt(2.0 * np.pi))
425 |         return x, J
426 | 
427 |     def _forward_none(self, x:np.ndarray):
428 |         """
429 |         Forward transformation for unbounded parameters (this does nothing).
430 | 
431 |         Parameters
432 |         ----------
433 |         x : np.ndarray
434 |             Input data
435 |         Returns
436 |         -------
437 |         u : np.ndarray
438 |             Transformed input data
439 |         """
440 |         return x[:, self.mask_none]
441 | 
442 |     def _inverse_none(self, u:np.ndarray):
443 |         """
444 |         Inverse transformation for unbounded parameters (this does nothing).
445 | 
446 |         Parameters
447 |         ----------
448 |         u : np.ndarray
449 |             Input data
450 |         Returns
451 |         -------
452 |         x : np.ndarray
453 |             Transformed input data
454 |         log_det_J : np.array
455 |             Logarithm of determinant of Jacobian matrix transformation.
456 |         """
457 |         return u[:, self.mask_none], np.zeros(u.shape)[:, self.mask_none]
458 | 
459 |     def _create_masks(self):
460 |         """
461 |         Create parameter masks for bounded parameters
462 |         """
463 | 
464 |         self.mask_left = np.zeros(self.ndim, dtype=bool)
465 |         self.mask_right = np.zeros(self.ndim, dtype=bool)
466 |         self.mask_both = np.zeros(self.ndim, dtype=bool)
467 |         self.mask_none = np.zeros(self.ndim, dtype=bool)
468 | 
469 |         # TODO: Do this more elegantly, it's a shame
470 |         for i in range(self.ndim):
471 |             if not np.isfinite(self.low[i]) and not np.isfinite(self.high[i]):
472 |                 self.mask_none[i] = True
473 |                 self.mask_left[i] = False
474 |                 self.mask_right[i] = False
475 |                 self.mask_both[i] = False
476 |             elif not np.isfinite(self.low[i]) and np.isfinite(self.high[i]):
477 |                 self.mask_none[i] = False
478 |                 self.mask_left[i] = False
479 |                 self.mask_right[i] = True
480 |                 self.mask_both[i] = False
481 |             elif np.isfinite(self.low[i]) and not np.isfinite(self.high[i]):
482 |                 self.mask_none[i] = False
483 |                 self.mask_left[i] = True
484 |                 self.mask_right[i] = False
485 |                 self.mask_both[i] = False
486 |             else:
487 |                 self.mask_none[i] = False
488 |                 self.mask_left[i] = False
489 |                 self.mask_right[i] = False
490 |                 self.mask_both[i] = True


--------------------------------------------------------------------------------
/pocomc/student.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy import optimize
 3 | from scipy import special
 4 | 
 5 | def fit_mvstud(data, tolerance=1e-6, max_iter=100):
 6 |     """
 7 |     Fit a multivariate Student's t distribution to data using the EM algorithm.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     data : ndarray
12 |         An array of shape (dim, n) containing n samples of dimension dim.
13 |     tolerance : float, optional
14 |         The tolerance for convergence. The default is 1e-6.
15 |     max_iter : int, optional
16 |         The maximum number of iterations. The default is 100.
17 |     
18 |     Returns
19 |     -------
20 |     mu : ndarray
21 |         The mean of the distribution.
22 |     Sigma : ndarray
23 |         The covariance matrix of the distribution.
24 |     nu : float
25 |         The degrees of freedom of the distribution.
26 |     
27 |     Examples
28 |     --------
29 |     >>> import numpy as np
30 |     >>> from pocomc.student import fit_mvstud
31 |     >>> data = np.random.randn(2, 100)
32 |     >>> mu, Sigma, nu = fit_mvstud(data)
33 |     >>> mu
34 |     array([ 0.00323705, -0.05405479])
35 |     >>> Sigma
36 |     array([[ 1.00524016, -0.02020086],
37 |            [-0.02020086,  0.99111344]])
38 |     >>> nu
39 |     20.000000000000004
40 |     """
41 |     def opt_nu(delta_iobs, nu):
42 |         def func0(nu):
43 |             w_iobs = (nu + dim) / (nu + delta_iobs)
44 |             f = -special.psi(nu/2) + np.log(nu/2) + np.sum(np.log(w_iobs))/n - np.sum(w_iobs)/n + 1 + special.psi((nu+dim)/2) - np.log((nu+dim)/2)
45 |             return f
46 | 
47 |         if func0(1e300) >= 0:
48 |             nu = np.inf
49 |         else:
50 |             nu = optimize.bisect(func0, 1e-300, 1e300)
51 |         return nu
52 | 
53 |     data = data.T
54 |     (dim,n) = data.shape
55 |     mu = np.array([np.median(data,1)]).T
56 |     Sigma = np.cov(data)*(n-1)/n + (1/n)*np.diag(np.var(data, axis=1))
57 |     nu = 20
58 | 
59 |     last_nu = 0
60 |     i = 0
61 |     while np.abs(last_nu - nu) > tolerance and i < max_iter:
62 |         i += 1
63 |         diffs = data - mu
64 |         delta_iobs = np.sum(diffs * np.linalg.solve(Sigma,diffs), 0)
65 |         
66 |         # update nu
67 |         last_nu = nu
68 |         nu = opt_nu(delta_iobs, nu)
69 |         if nu == np.inf:
70 |             return mu.T[0], Sigma, nu
71 | 
72 |         # update Sigma
73 |         w_iobs = (nu + dim) / (nu + delta_iobs)
74 |         Sigma = np.dot(w_iobs*diffs, diffs.T) / n
75 | 
76 |         # update mu
77 |         mu = np.sum(w_iobs * data, 1) / sum(w_iobs)
78 |         mu = np.array([mu]).T
79 | 
80 |     if i == max_iter:
81 |         print("Warning: EM algorithm did not converge.")
82 |         print("Last nu: ", last_nu)
83 |         print("Current nu: ", nu)
84 | 
85 |     return mu.T[0], Sigma, nu
86 | 


--------------------------------------------------------------------------------
/pocomc/threading.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def configure_threads(pytorch_threads=None):
 4 |     """Configure the number of threads available.
 5 | 
 6 |     This is necessary when using PyTorch on the CPU as by default it will use
 7 |     all available threads.
 8 | 
 9 |     Notes
10 |     -----
11 |     Uses ``torch.set_num_threads``. If pytorch threads is None but other
12 |     arguments are specified then the value is inferred from them.
13 | 
14 |     Parameters
15 |     ----------
16 |     pytorch_threads: int, optional
17 |         Maximum number of threads for PyTorch on CPU. If None, pytorch will
18 |         use all available threads.
19 |     """
20 |     if pytorch_threads:
21 |         torch.set_num_threads(pytorch_threads)


--------------------------------------------------------------------------------
/pocomc/tools.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | import torch
  4 | from tqdm import tqdm
  5 | import warnings
  6 | 
  7 | SQRTEPS = math.sqrt(float(np.finfo(np.float64).eps))
  8 | 
  9 | 
 10 | def trim_weights(samples, weights, ess=0.99, bins=1000):
 11 |     """
 12 |         Trim samples and weights to a given effective sample size.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     samples : ``np.ndarray``
 17 |         Samples.
 18 |     weights : ``np.ndarray``
 19 |         Weights.
 20 |     ess : ``float``
 21 |         Effective sample size threshold.
 22 |     bins : ``int``
 23 |         Number of bins to use for trimming.
 24 | 
 25 |     Returns
 26 |     -------
 27 |     samples_trimmed : ``np.ndarray``
 28 |         Trimmed samples.
 29 |     weights_trimmed : ``np.ndarray``
 30 |         Trimmed weights.
 31 |     """
 32 | 
 33 |     # normalize weights
 34 |     weights /= np.sum(weights)
 35 |     # compute untrimmed ess
 36 |     ess_total = 1.0 / np.sum(weights**2.0)
 37 |     # define percentile grid
 38 |     percentiles = np.linspace(0, 99, bins)
 39 | 
 40 |     i = bins - 1
 41 |     while True:
 42 |         p = percentiles[i]
 43 |         # compute weight threshold
 44 |         threshold = np.percentile(weights, p)
 45 |         mask = weights >= threshold
 46 |         weights_trimmed = weights[mask]
 47 |         weights_trimmed /= np.sum(weights_trimmed)
 48 |         ess_trimmed = 1.0 / np.sum(weights_trimmed**2.0)
 49 |         if ess_trimmed / ess_total >= ess:
 50 |             break 
 51 |         i -= 1
 52 |     
 53 |     return samples[mask], weights_trimmed
 54 | 
 55 | 
 56 | def effective_sample_size(weights):
 57 |     """
 58 |         Compute effective sample size (ESS).
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     weights : ``np.ndarray``
 63 |         Weights.
 64 | 
 65 |     Returns
 66 |     -------
 67 |     ess : ``float``
 68 |         Effective sample size.
 69 |     """
 70 |     weights /= np.sum(weights)
 71 |     return 1.0 / np.sum(weights**2.0)
 72 | 
 73 | 
 74 | def unique_sample_size(weights, k=None):
 75 |     """
 76 |         Compute unique sample size (ESS).
 77 | 
 78 |     Parameters
 79 |     ----------
 80 |     weights : ``np.ndarray``
 81 |         Weights.
 82 |     k : ``int``
 83 |         Number of resampled samples.
 84 | 
 85 |     Returns
 86 |     -------
 87 |     uss : ``float``
 88 |         Unique sample size.
 89 |     """
 90 |     if k is None:
 91 |         k = len(weights)
 92 |     weights /= np.sum(weights)
 93 |     return np.sum(1.0 - (1.0 - weights)**k)
 94 | 
 95 | 
 96 | def compute_ess(logw: np.ndarray):
 97 |     r"""
 98 |         Compute effective sample size (per centage).
 99 | 
100 |     Parameters
101 |     ----------
102 |     logw : ``np.ndarray``
103 |         Log-weights.
104 |     Returns
105 |     -------
106 |     ess : float
107 |         Effective sample size divided by actual number
108 |         of particles (between 0 and 1)
109 |     """
110 |     logw_max = np.max(logw)
111 |     logw_normed = logw - logw_max
112 | 
113 |     weights = np.exp(logw_normed) / np.sum(np.exp(logw_normed))
114 |     return 1.0 / np.sum(weights * weights) / len(weights)
115 | 
116 | 
117 | def increment_logz(logw: np.ndarray):
118 |     r"""
119 |         Compute log evidence increment factor.
120 | 
121 |     Parameters
122 |     ----------
123 |     logw : ``np.ndarray``
124 |         Log-weights.
125 |     Returns
126 |     -------
127 |     ess : float
128 |         logZ increment.
129 |     """
130 |     logw_max = np.max(logw)
131 |     logw_normed = logw - logw_max
132 | 
133 |     return logw_max + np.logaddexp.reduce(logw_normed)
134 | 
135 | 
136 | def systematic_resample(size: np.ndarray, 
137 |                         weights: np.ndarray, 
138 |                         random_state: int = None):
139 |     """
140 |         Resample a new set of points from the weighted set of inputs
141 |         such that they all have equal weight.
142 | 
143 |     Parameters
144 |     ----------
145 |     size : `int`
146 |         Number of samples to draw.
147 |     weights : `~numpy.ndarray` with shape (nsamples,)
148 |         Corresponding weight of each sample.
149 |     random_state : `int`, optional
150 |         Random seed.    
151 | 
152 |     Returns
153 |     -------
154 |     indeces : `~numpy.ndarray` with shape (nsamples,)
155 |         Indices of the resampled array.
156 |     
157 |     Examples
158 |     --------
159 |     >>> x = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]])
160 |     >>> w = np.array([0.6, 0.2, 0.15, 0.05])
161 |     >>> systematic_resample(4, w)
162 |     array([0, 0, 0, 2])
163 | 
164 |     Notes
165 |     -----
166 |     Implements the systematic resampling method.
167 |     """
168 |     
169 |     if random_state is not None:
170 |         np.random.seed(random_state)
171 | 
172 |     if abs(np.sum(weights) - 1.) > SQRTEPS:
173 |         weights = np.array(weights) / np.sum(weights)
174 | 
175 |     positions = (np.random.random() + np.arange(size)) / size
176 | 
177 |     j = 0
178 |     cumulative_sum = weights[0]
179 |     indeces = np.empty(size, dtype=int)
180 |     for i in range(size):
181 |         while positions[i] > cumulative_sum:
182 |             j += 1
183 |             cumulative_sum += weights[j]
184 |         indeces[i] = j
185 |     
186 |     return indeces
187 | 
188 | 
189 | class ProgressBar:
190 |     """
191 |         Progress bar class.
192 | 
193 |     Parameters
194 |     ----------
195 |     show : `bool`
196 |         Whether or not to show a progress bar. Default is ``True``.
197 |     """
198 |     def __init__(self, show: bool = True, initial=0):
199 |         self.progress_bar = tqdm(desc='Iter', disable=not show, initial=initial)
200 |         self.info = dict()
201 | 
202 |     def update_stats(self, info):
203 |         """
204 |             Update shown stats.
205 | 
206 |         Parameters
207 |         ----------
208 |         info : dict
209 |             Dictionary with stats to show.
210 |         """
211 |         self.info = {**self.info, **info}
212 |         self.progress_bar.set_postfix(ordered_dict=self.info)
213 | 
214 |     def update_iter(self):
215 |         """
216 |             Update iteration counter.
217 |         """
218 |         self.progress_bar.update(1)
219 | 
220 |     def close(self):
221 |         """
222 |             Close progress bar.
223 |         """
224 |         self.progress_bar.close()
225 | 
226 | 
227 | class FunctionWrapper(object):
228 |     r"""
229 |         Make the log-likelihood or log-prior function pickleable
230 |         when ``args`` or ``kwargs`` are also included.
231 | 
232 |     Parameters
233 |     ----------
234 |     f : callable
235 |         Log probability function.
236 |     args : list
237 |         Extra positional arguments to be passed to f.
238 |     kwargs : dict
239 |         Extra keyword arguments to be passed to f.
240 |     """
241 |     def __init__(self, f, args, kwargs):
242 |         self.f = f
243 |         self.args = [] if args is None else args
244 |         self.kwargs = {} if kwargs is None else kwargs
245 | 
246 |     def __call__(self, x):
247 |         """
248 |             Evaluate log-likelihood or log-prior function.
249 | 
250 |         Parameters
251 |         ----------
252 |         x : ``np.ndarray``
253 |             Input position array.
254 | 
255 |         Returns
256 |         -------
257 |         f : float or ``np.ndarray``
258 |             f(x)
259 |         """
260 |         return self.f(x, *self.args, **self.kwargs)
261 | 
262 | 
263 | def torch_to_numpy(x: torch.Tensor) -> np.ndarray:
264 |     """
265 |     Cast torch tensor to numpy.
266 | 
267 |     Parameters
268 |     ----------
269 |     x : torch.Tensor
270 |         Input tensor.
271 | 
272 |     Returns
273 |     -------
274 |         Numpy array corresponding to the input tensor.
275 |     """
276 |     return x.detach().numpy()
277 | 
278 | 
279 | def numpy_to_torch(x: np.ndarray) -> torch.Tensor:
280 |     """
281 |     Cast numpy array to torch tensor.
282 | 
283 |     Parameters
284 |     ----------
285 |     x : np.ndarray
286 |         Input array.
287 | 
288 |     Returns
289 |     -------
290 |         Torch tensor corresponding to the input array.
291 |     """
292 |     return torch.tensor(x, dtype=torch.float32)
293 | 
294 | 
295 | def torch_double_to_float(x: torch.Tensor, warn: bool = True):
296 |     """
297 |     Cast double precision (Float64) torch tensor to single precision (Float32).
298 | 
299 |     Parameters
300 |     ----------
301 |     x: torch.Tensor
302 |         Input tensor.
303 |     warn: bool
304 |         If True, warn the user about the typecast.
305 | 
306 |     Returns
307 |     -------
308 |         Single precision (Float32) torch tensor.
309 |     """
310 |     if x.dtype == torch.float64 and warn:
311 |         warnings.warn(f"Float64 data is currently unsupported, casting to Float32. Output will also have type Float32.")
312 |         return x.float()
313 |     elif x.dtype == torch.float32:
314 |         return x
315 |     else:
316 |         raise ValueError(f"Unsupported datatype for input data: {x.dtype}")
317 | 
318 | class flow_numpy_wrapper:
319 |     """
320 |     Wrapper class for numpy flows.
321 | 
322 |     Parameters
323 |     ----------
324 |     flow : Flow object
325 |         Flow object that implements forward and inverse
326 |         transformations.
327 |     
328 |     Returns
329 |     -------
330 |     Flow object
331 |     """
332 |     def __init__(self, flow):
333 |         self.flow = flow
334 | 
335 |     @torch.no_grad()
336 |     def forward(self, v):
337 |         v = numpy_to_torch(v)
338 |         theta, logdetj = self.flow.forward(v)
339 |         theta = torch_to_numpy(theta)
340 |         logdetj = - torch_to_numpy(logdetj)
341 |         return theta, logdetj
342 | 
343 |     @torch.no_grad()
344 |     def inverse(self, theta):
345 |         theta = numpy_to_torch(theta)
346 |         v, logdetj = self.flow.inverse(theta)
347 |         v = torch_to_numpy(v)
348 |         logdetj = torch_to_numpy(logdetj)
349 |         return v, logdetj


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.20.0
2 | torch>=1.12.0
3 | zuko>=1.1.0
4 | tqdm>=4.60.0
5 | scipy>=1.4.0
6 | dill>=0.3.8
7 | multiprocess>=0.70.15


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = pocomc
 3 | version = attr: pocomc._version.version
 4 | author = Minas Karamanis
 5 | author_email = minaskar@gmail.com
 6 | url = https://github.com/minaskar/pocomc
 7 | description = Preconditioned Monte Carlo
 8 | long_description = file: README.md
 9 | long_description_content_type = text/markdown
10 | license = GPLv3
11 | license_file = LICENCE
12 | platform = any
13 | classifiers =
14 |     Programming Language :: Python :: 3
15 |     License :: OSI Approved :: GNU General Public License v3 (GPLv3)
16 |     Operating System :: OS Independent
17 |     Intended Audience :: Science/Research
18 |     Topic :: Scientific/Engineering
19 |     Topic :: Scientific/Engineering :: Mathematics
20 | 
21 | [options]
22 | zip_safe = false
23 | include_package_data = true
24 | python_requires = >= 3.8
25 | packages =
26 |     pocomc
27 | test_suite = tests
28 | setup_requires =
29 |     setuptools >=46.4.0
30 | install_requires =
31 |     numpy>=1.20.0
32 |     torch>=1.12.0
33 |     zuko>=1.1.0
34 |     tqdm>=4.60.0
35 |     scipy>=1.4.0
36 |     dill>=0.3.8
37 |     multiprocess>=0.70.15


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 | 
3 | setuptools.setup()
4 | 


--------------------------------------------------------------------------------
/tests/test_flow.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import torch
  3 | from pocomc.flow import Flow
  4 | 
  5 | class FlowTestCase(unittest.TestCase):
  6 |     @staticmethod
  7 |     def make_data():
  8 |         # Make a dataset to use in tests
  9 |         torch.manual_seed(0)
 10 |         n_data = 100
 11 |         n_dim = 4
 12 |         x = torch.randn(size=(n_data, n_dim)) * 1.5
 13 |         return x
 14 | 
 15 |     @torch.no_grad()
 16 |     def test_forward(self):
 17 |         """
 18 |         Test that the forward pass works without raising an error
 19 |         """
 20 |         torch.manual_seed(0)
 21 | 
 22 |         data = self.make_data()
 23 |         flow = Flow(n_dim=data.shape[1], flow='maf3')
 24 |         z, _ = flow.forward(data)
 25 | 
 26 |         self.assertFalse(torch.any(torch.isnan(z)))
 27 |         self.assertFalse(torch.any(torch.isinf(z)))
 28 |         self.assertEqual(data.shape, z.shape)
 29 |         self.assertEqual(data.dtype, z.dtype)
 30 | 
 31 |     @torch.no_grad()
 32 |     def test_inverse(self):
 33 |         # Test that the inverse pass works without raising an error
 34 |         torch.manual_seed(0)
 35 | 
 36 |         z = self.make_data()
 37 |         flow = Flow(n_dim=z.shape[1], flow='maf3')
 38 |         x, _ = flow.inverse(z)
 39 | 
 40 |         self.assertFalse(torch.any(torch.isnan(x)))
 41 |         self.assertFalse(torch.any(torch.isinf(x)))
 42 |         self.assertEqual(x.shape, z.shape)
 43 |         self.assertEqual(x.dtype, z.dtype)
 44 | 
 45 |     @torch.no_grad()
 46 |     def test_logprob(self):
 47 |         # Test that logprob works without raising an error
 48 |         torch.manual_seed(0)
 49 | 
 50 |         x = self.make_data()
 51 |         flow = Flow(n_dim=x.shape[1], flow='maf3')
 52 | 
 53 |         log_prob = flow.log_prob(x)
 54 | 
 55 |         self.assertFalse(torch.any(torch.isnan(log_prob)))
 56 |         self.assertFalse(torch.any(torch.isinf(log_prob)))
 57 |         self.assertEqual(log_prob.shape, (x.shape[0],))
 58 |         self.assertEqual(x.dtype, log_prob.dtype)
 59 | 
 60 |     @torch.no_grad()
 61 |     def test_sample(self):
 62 |         # Test that sample works without raising an error
 63 |         torch.manual_seed(0)
 64 | 
 65 |         x_tmp = self.make_data()
 66 |         flow = Flow(n_dim=x_tmp.shape[1], flow='maf3')
 67 |         x, _ = flow.sample(x_tmp.shape[0])
 68 | 
 69 |         self.assertFalse(torch.any(torch.isnan(x)))
 70 |         self.assertFalse(torch.any(torch.isinf(x)))
 71 |         self.assertEqual(x.shape, x_tmp.shape)
 72 |         self.assertEqual(x.dtype, x_tmp.dtype)
 73 | 
 74 |     @torch.no_grad()
 75 |     def test_reconstruction(self):
 76 |         # Test that latent points are reconstructed to be close enough to data points
 77 |         torch.manual_seed(0)
 78 | 
 79 |         x = self.make_data()
 80 |         flow = Flow(n_dim=x.shape[1], flow='maf3')
 81 |         z, _ = flow.forward(x)
 82 |         x_reconstructed, _ = flow.inverse(z)
 83 | 
 84 |         self.assertFalse(torch.any(torch.isnan(x_reconstructed)))
 85 |         self.assertFalse(torch.any(torch.isinf(x_reconstructed)))
 86 |         self.assertEqual(x_reconstructed.shape, x.shape)
 87 |         self.assertEqual(x_reconstructed.dtype, x.dtype)
 88 |         self.assertTrue(torch.allclose(x, x_reconstructed, atol=1e-5))
 89 | 
 90 |     @torch.no_grad()
 91 |     def test_logprob_float32(self):
 92 |         # Test logprob when input is torch.float
 93 |         torch.manual_seed(0)
 94 | 
 95 |         x = self.make_data()
 96 |         x = x.float()
 97 |         flow = Flow(n_dim=x.shape[1], flow='maf3')
 98 |         log_prob = flow.log_prob(x)
 99 | 
100 |         self.assertFalse(torch.any(torch.isnan(log_prob)))
101 |         self.assertFalse(torch.any(torch.isinf(log_prob)))
102 |         self.assertEqual(log_prob.shape, (x.shape[0],))
103 |         self.assertEqual(x.dtype, log_prob.dtype)
104 | 
105 |     @torch.no_grad()
106 |     def test_logprob_float64(self):
107 |         # Test logprob when input is torch.double
108 |         torch.manual_seed(0)
109 | 
110 |         x = self.make_data()
111 |         x = x.double()
112 |         flow = Flow(n_dim=x.shape[1], flow='maf3')
113 |         with self.assertWarns(UserWarning):
114 |             log_prob = flow.log_prob(x)
115 | 
116 |         self.assertFalse(torch.any(torch.isnan(log_prob)))
117 |         self.assertFalse(torch.any(torch.isinf(log_prob)))
118 |         self.assertEqual(log_prob.shape, (x.shape[0],))
119 |         self.assertEqual(log_prob.dtype, torch.float32)
120 | 
121 |     @torch.no_grad()
122 |     def test_logprob_single_example(self):
123 |         # Test logprob when input is a single data point
124 |         torch.manual_seed(0)
125 | 
126 |         x = self.make_data()
127 |         x = x[0].reshape(1, -1)
128 |         flow = Flow(n_dim=x.shape[1], flow='maf3')
129 |         log_prob = flow.log_prob(x)
130 | 
131 |         self.assertFalse(torch.any(torch.isnan(log_prob)))
132 |         self.assertFalse(torch.any(torch.isinf(log_prob)))
133 |         self.assertEqual(log_prob.shape, (x.shape[0],))
134 |         self.assertEqual(x.dtype, log_prob.dtype)
135 | 
136 |     def test_logprob_backward(self):
137 |         # Test backpropagation on the negative log likelihood
138 |         torch.manual_seed(0)
139 | 
140 |         x = self.make_data()
141 |         flow = Flow(n_dim=x.shape[1], flow='maf3')
142 |         log_prob = flow.log_prob(x)
143 |         nll = -torch.mean(log_prob)
144 |         nll.backward()
145 | 
146 |         for param in flow.flow.parameters():
147 |             if param.requires_grad:
148 |                 self.assertIsNotNone(param.grad)
149 |             else:
150 |                 self.assertIsNone(param.grad)
151 | 
152 |     @torch.no_grad()
153 |     def test_logprob_inverse(self):
154 |         # Test that the inverse logprob is the negative of the forward logprob
155 |         torch.manual_seed(0)
156 | 
157 |         x = self.make_data()
158 | 
159 |         flow = Flow(n_dim=x.shape[1], flow='maf3')
160 |         z, logprob_forward = flow.forward(x)
161 |         _, logprob_inverse = flow.inverse(z)
162 | 
163 |         #self.assertTrue(torch.allclose(logprob_forward, -logprob_inverse))
164 |         torch.testing.assert_close(logprob_forward, -logprob_inverse)
165 |         self.assertEqual(logprob_forward.shape, logprob_inverse.shape)
166 |         self.assertEqual(logprob_forward.dtype, logprob_inverse.dtype)
167 | 
168 |     def test_fit(self):
169 |         # Test that fit works without errors and check some basic functions afterwards
170 |         torch.manual_seed(0)
171 | 
172 |         x = self.make_data()
173 |         flow = Flow(n_dim=x.shape[1], flow='maf3')
174 |         flow.fit(x, epochs=5)
175 | 
176 |         z, _ = flow.forward(x)
177 |         log_prob = flow.log_prob(x)
178 |         x_samples, _ = flow.sample(x.shape[0])
179 | 
180 |         self.assertFalse(torch.any(torch.isnan(log_prob)))
181 |         self.assertFalse(torch.any(torch.isinf(log_prob)))
182 |         self.assertEqual(log_prob.shape, (x.shape[0],))
183 |         self.assertEqual(x.dtype, log_prob.dtype)
184 | 
185 |         self.assertFalse(torch.any(torch.isnan(z)))
186 |         self.assertFalse(torch.any(torch.isinf(z)))
187 |         self.assertEqual(x.shape, z.shape)
188 |         self.assertEqual(x.dtype, z.dtype)
189 | 
190 |         self.assertFalse(torch.any(torch.isnan(x_samples)))
191 |         self.assertFalse(torch.any(torch.isinf(x_samples)))
192 |         self.assertEqual(x.shape, x_samples.shape)
193 |         self.assertEqual(x.dtype, x_samples.dtype)
194 | 
195 |     def test_logj(self):
196 |         torch.manual_seed(0)
197 | 
198 |         x = self.make_data()
199 |         flow = Flow(n_dim=x.shape[1], flow='maf3')
200 | 
201 |         z, logj_forward = flow.forward(x)
202 |         _, logj_inverse = flow.inverse(z)
203 | 
204 |         #assert torch.allclose(logj_forward, -logj_inverse, rtol=1e-4)
205 |         torch.testing.assert_close(logj_forward, -logj_inverse)
206 | 
207 | 
208 | if __name__ == '__main__':
209 |     unittest.main()
210 | 


--------------------------------------------------------------------------------
/tests/test_prior.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | from scipy.stats import norm
 5 | 
 6 | from pocomc.prior import Prior
 7 | 
 8 | class PriorTestCase(unittest.TestCase):
 9 | 
10 |     def test_sample(self):
11 |         prior = Prior([norm(0, 1), norm(0, 1)])
12 |         x = prior.rvs(10)
13 |         self.assertEqual(np.shape(x), (10,2))
14 | 
15 |     def test_log_prob(self):
16 |         prior = Prior([norm(0, 1), norm(0, 1)])
17 |         x = prior.rvs(10)
18 |         log_prob = prior.logpdf(x)
19 |         self.assertIsInstance(log_prob, np.ndarray)
20 | 
21 |     def test_log_prob2(self):
22 |         prior = Prior([norm(0, 1), norm(0, 1)])
23 |         x = prior.rvs(10)
24 |         log_prob = prior.logpdf(x)
25 |         self.assertEqual(np.shape(log_prob), (10,))
26 | 
27 |     def test_log_prob3(self):
28 |         prior = Prior([norm(0, 1), norm(0, 1)])
29 |         x = prior.rvs(10)
30 |         log_prob = prior.logpdf(x)
31 |         self.assertTrue(np.all(log_prob < 0))
32 |     
33 |     def test_log_prob4(self):
34 |         prior = Prior([norm(0, 1), norm(0, 1)])
35 |         x = prior.rvs(10)
36 |         log_prob = prior.logpdf(x)
37 |         self.assertTrue(np.all(np.isfinite(log_prob)))
38 | 
39 |     def test_bounds(self):
40 |         prior = Prior([norm(0, 1), norm(0, 1)])
41 |         bounds = prior.bounds
42 |         self.assertEqual(np.shape(bounds), (2,2))
43 | 
44 |     def test_bounds2(self):
45 |         prior = Prior([norm(0, 1), norm(0, 1)])
46 |         bounds = prior.bounds
47 |         self.assertTrue(np.all(bounds[:,0] < bounds[:,1]))
48 | 
49 |     def test_dim(self):
50 |         prior = Prior([norm(0, 1), norm(0, 1)])
51 |         self.assertEqual(prior.dim, 2)
52 | 
53 | 
54 |     


--------------------------------------------------------------------------------
/tests/test_sampler.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | 
 4 | from scipy.stats import norm
 5 | 
 6 | from pocomc.sampler import Sampler
 7 | from pocomc.prior import Prior
 8 | 
 9 | class SamplerTestCase(unittest.TestCase):
10 |     @staticmethod
11 |     def log_likelihood_single(x):
12 |         return np.sum(-0.5 * np.log(2 * np.pi) - 0.5 * x ** 2)
13 | 
14 |     @staticmethod
15 |     def log_likelihood_vectorized(x):
16 |         # Gaussian log likelihood with mu = 0, sigma = 1
17 |         return np.sum(-0.5 * np.log(2 * np.pi) - 0.5 * x ** 2, axis=1)
18 | 
19 |     def test_run(self):
20 |         
21 |         n_dim = 2
22 |         prior = Prior(n_dim*[norm(0, 1)])
23 | 
24 |         sampler = Sampler(
25 |             prior=prior,
26 |             likelihood=self.log_likelihood_single,
27 |             train_config={'epochs': 1},
28 |             random_state=0,
29 |         )
30 |         sampler.run()
31 | 
32 |     def test_run2(self):
33 |         
34 |         n_dim = 2
35 |         prior = Prior(n_dim*[norm(0, 1)])
36 | 
37 |         sampler = Sampler(
38 |             prior=prior,
39 |             likelihood=self.log_likelihood_vectorized,
40 |             vectorize=True,
41 |             train_config={'epochs': 1},
42 |             random_state=0,
43 |         )
44 |         sampler.run()
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     unittest.main()
49 | 


--------------------------------------------------------------------------------
/tests/test_scaler.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy as np
  3 | 
  4 | from pocomc.scaler import Reparameterize
  5 | 
  6 | 
  7 | class ReparameterizeTestCase(unittest.TestCase):
  8 |     @staticmethod
  9 |     def make_unconstrained_data():
 10 |         # Make a dataset to use in tests
 11 |         np.random.seed(0)
 12 |         n_data = 100
 13 |         n_dim = 10
 14 |         x = np.random.randn(n_data, n_dim) * 3 + 1
 15 | 
 16 |         lower_bound = np.nan
 17 |         upper_bound = np.nan
 18 |         return x, lower_bound, upper_bound
 19 | 
 20 |     @staticmethod
 21 |     def make_lower_bounded_data():
 22 |         # Make a dataset to use in tests
 23 |         np.random.seed(0)
 24 |         n_data = 100
 25 |         n_dim = 10
 26 |         x = np.random.exponential(scale=1, size=(n_data, n_dim))
 27 | 
 28 |         lower_bound = 0
 29 |         upper_bound = np.nan
 30 |         return x, lower_bound, upper_bound
 31 | 
 32 |     @staticmethod
 33 |     def make_upper_bounded_data():
 34 |         # Make a dataset to use in tests
 35 |         np.random.seed(0)
 36 |         n_data = 100
 37 |         n_dim = 10
 38 |         x = -np.random.exponential(scale=1, size=(n_data, n_dim))
 39 | 
 40 |         lower_bound = np.nan
 41 |         upper_bound = 0
 42 |         return x, lower_bound, upper_bound
 43 | 
 44 |     @staticmethod
 45 |     def make_lower_and_upper_bounded_data():
 46 |         # Make a dataset to use in tests
 47 |         np.random.seed(0)
 48 |         n_data = 100
 49 |         n_dim = 10
 50 |         x = np.random.uniform(low=0, high=1, size=(n_data, n_dim))
 51 | 
 52 |         lower_bound = 0
 53 |         upper_bound = 1
 54 |         return x, lower_bound, upper_bound
 55 | 
 56 |     def test_unconstrained(self):
 57 |         # Test that methods work without errors on unconstrained input data
 58 |         np.random.seed(0)
 59 | 
 60 |         x, lb, ub = self.make_unconstrained_data()
 61 |         r = Reparameterize(n_dim=x.shape[1], bounds=(lb, ub))
 62 |         r.fit(x)
 63 | 
 64 |         u = r.forward(x)
 65 |         x_r, _ = r.inverse(u)
 66 | 
 67 |         self.assertEqual(x.shape, u.shape)
 68 |         self.assertEqual(x.dtype, u.dtype)
 69 | 
 70 |         self.assertEqual(x_r.shape, u.shape)
 71 |         self.assertEqual(x_r.dtype, u.dtype)
 72 | 
 73 |         self.assertTrue(np.allclose(x, x_r))
 74 | 
 75 |     def test_lower_bounded(self):
 76 |         # Test that methods work without errors on unconstrained input data
 77 |         np.random.seed(0)
 78 | 
 79 |         x, lb, ub = self.make_lower_bounded_data()
 80 |         r = Reparameterize(n_dim=x.shape[1], bounds=(lb, ub))
 81 |         r.fit(x)
 82 | 
 83 |         u = r.forward(x)
 84 |         x_r, _ = r.inverse(u)
 85 | 
 86 |         self.assertEqual(x.shape, u.shape)
 87 |         self.assertEqual(x.dtype, u.dtype)
 88 | 
 89 |         self.assertEqual(x_r.shape, u.shape)
 90 |         self.assertEqual(x_r.dtype, u.dtype)
 91 | 
 92 |         self.assertTrue(np.allclose(x, x_r))
 93 | 
 94 |     def test_upper_bounded(self):
 95 |         # Test that methods work without errors on unconstrained input data
 96 |         np.random.seed(0)
 97 | 
 98 |         x, lb, ub = self.make_upper_bounded_data()
 99 |         r = Reparameterize(n_dim=x.shape[1], bounds=(lb, ub))
100 |         r.fit(x)
101 | 
102 |         u = r.forward(x)
103 |         x_r, _ = r.inverse(u)
104 | 
105 |         self.assertEqual(x.shape, u.shape)
106 |         self.assertEqual(x.dtype, u.dtype)
107 | 
108 |         self.assertEqual(x_r.shape, u.shape)
109 |         self.assertEqual(x_r.dtype, u.dtype)
110 | 
111 |         self.assertTrue(np.allclose(x, x_r))
112 | 
113 |     def test_lower_and_upper_bounded(self):
114 |         # Test that methods work without errors on unconstrained input data
115 |         np.random.seed(0)
116 | 
117 |         x, lb, ub = self.make_lower_and_upper_bounded_data()
118 |         r = Reparameterize(n_dim=x.shape[1], bounds=(lb, ub))
119 |         r.fit(x)
120 | 
121 |         u = r.forward(x)
122 |         x_r, _ = r.inverse(u)
123 | 
124 |         self.assertEqual(x.shape, u.shape)
125 |         self.assertEqual(x.dtype, u.dtype)
126 | 
127 |         self.assertEqual(x_r.shape, u.shape)
128 |         self.assertEqual(x_r.dtype, u.dtype)
129 | 
130 |         self.assertTrue(np.allclose(x, x_r))
131 | 
132 |     def test_out_of_bounds(self):
133 |         # Test that providing out-of-bound inputs raises an error
134 |         np.random.seed(0)
135 |         x, lb, ub = self.make_lower_and_upper_bounded_data()
136 |         x[0] = lb - 1  # Artificially make example 0 go outside the bounds
137 |         x[1] = ub + 1  # Artificially make example 1 go outside the bounds
138 | 
139 |         r = Reparameterize(n_dim=x.shape[1], bounds=(lb, ub))
140 |         self.assertRaises(ValueError, r.fit, x)
141 | 
142 | 
143 | if __name__ == '__main__':
144 |     unittest.main()
145 | 


--------------------------------------------------------------------------------
/tests/test_state.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from pathlib import Path
 3 | import numpy as np
 4 | from pocomc import Sampler
 5 | from pocomc import Prior
 6 | 
 7 | from scipy.stats import norm
 8 | 
 9 | 
10 | class SamplerStateTestCase(unittest.TestCase):
11 |     @staticmethod
12 |     def log_likelihood_vectorized(x):
13 |         # Gaussian log likelihood with mu = 0, sigma = 1
14 |         return np.sum(-0.5 * np.log(2 * np.pi) - 0.5 * x ** 2, axis=1)
15 | 
16 |     def test_save(self):
17 |         # Save PMC state.
18 |         prior = Prior([norm(0, 1), norm(0, 1)])
19 |         s = Sampler(prior, self.log_likelihood_vectorized, vectorize=True, train_config=dict(epochs=10), random_state=0)
20 |         path = Path('pmc.state')
21 |         s.save_state(path)
22 |         self.assertTrue(path.exists())
23 |         path.unlink()
24 |         self.assertFalse(path.exists())
25 | 
26 |     def test_load(self):
27 |         # Load PMC state.
28 |         prior = Prior([norm(0, 1), norm(0, 1)])
29 |         s = Sampler(prior, self.log_likelihood_vectorized, vectorize=True, train_config=dict(epochs=10), random_state=0)
30 |         path = Path('pmc.state')
31 |         s.save_state(path)
32 |         self.assertTrue(path.exists())
33 |         s.load_state(path)
34 |         path.unlink()
35 |         self.assertFalse(path.exists())
36 | 
37 |     def test_resume(self):
38 |         # Run PMC. Then, pick an intermediate state and resume from that state.
39 |         np.random.seed(0)
40 |         prior = Prior([norm(0, 1), norm(0, 1)])
41 |         s = Sampler(prior, self.log_likelihood_vectorized, vectorize=True, train_config=dict(epochs=10), random_state=0)
42 |         s.run(save_every=1)  # Save every iteration
43 | 
44 |         # At this point, we would look at the directory and choose the file we want to load. In this example, we select
45 |         # "pmc_1.state". Now we rerun the sampler starting from this path. We will not get the exact same
46 |         # results due to RNG.
47 | 
48 |         self.assertTrue(Path("states/pmc_1.state").exists())
49 |         self.assertTrue(Path("states/pmc_2.state").exists())
50 |         self.assertTrue(Path("states/pmc_3.state").exists())
51 | 
52 |         s = Sampler(prior, self.log_likelihood_vectorized, vectorize=True, train_config=dict(epochs=10), random_state=0)
53 |         s.run(resume_state_path="states/pmc_1.state")
54 | 
55 |         # Remove the generated state files
56 |         #Path("states/pmc_1.state").unlink()
57 |         #Path("states/pmc_2.state").unlink()
58 |         #Path("states/pmc_3.state").unlink()
59 |         p = Path("states").glob('**/*')
60 |         files = [x for x in p if x.is_file()]
61 |         for f in files:
62 |             f.unlink()
63 |         Path("states").rmdir()
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     unittest.main()
68 | 


--------------------------------------------------------------------------------
/tests/test_tools.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | 
 5 | from pocomc.tools import compute_ess
 6 | 
 7 | 
 8 | class ESSTestCase(unittest.TestCase):
 9 |     def test_ess_single_particle(self):
10 |         self.assertEqual(compute_ess(np.array([1.0])), 1.0)
11 |         self.assertEqual(compute_ess(np.array([251.0])), 1.0)
12 |         self.assertEqual(compute_ess(np.array([-421.0])), 1.0)
13 |         self.assertEqual(compute_ess(np.array([-421.125251])), 1.0)
14 |         self.assertEqual(compute_ess(np.array([0.0])), 1.0)
15 | 
16 | 
17 | if __name__ == '__main__':
18 |     unittest.main()
19 | 


--------------------------------------------------------------------------------