├── src └── emcee │ ├── tests │ ├── __init__.py │ ├── unit │ │ ├── __init__.py │ │ ├── test_stretch.py │ │ ├── test_autocorr.py │ │ ├── test_state.py │ │ ├── test_blobs.py │ │ ├── test_ensemble.py │ │ ├── test_backends.py │ │ └── test_sampler.py │ └── integration │ │ ├── __init__.py │ │ ├── test_walk.py │ │ ├── test_de_snooker.py │ │ ├── test_de.py │ │ ├── test_kde.py │ │ ├── test_stretch.py │ │ ├── test_longdouble.py │ │ ├── test_gaussian.py │ │ └── test_proposal.py │ ├── interruptible_pool.py │ ├── model.py │ ├── backends │ ├── __init__.py │ ├── backend.py │ └── hdf.py │ ├── mpi_pool.py │ ├── ptsampler.py │ ├── moves │ ├── __init__.py │ ├── stretch.py │ ├── walk.py │ ├── kde.py │ ├── de_snooker.py │ ├── move.py │ ├── mh.py │ ├── de.py │ ├── red_blue.py │ └── gaussian.py │ ├── __init__.py │ ├── pbar.py │ ├── utils.py │ ├── state.py │ └── autocorr.py ├── MANIFEST.in ├── docs ├── .gitignore ├── _static │ ├── logo.pxm │ ├── logo2.png │ ├── logo2.pxm │ ├── favicon.png │ ├── logo-sidebar.png │ └── line │ │ ├── line-data.png │ │ ├── line-mcmc.png │ │ ├── line-time.png │ │ ├── line-triangle.png │ │ ├── line-least-squares.png │ │ └── line-max-likelihood.png ├── requirements.txt ├── user │ ├── sampler.rst │ ├── autocorr.rst │ ├── faq.rst │ ├── backends.rst │ ├── upgrade.rst │ ├── install.rst │ ├── moves.rst │ └── blobs.rst ├── fix_internal_links.py ├── conf.py ├── tutorials │ ├── tutorial_rst.tpl │ └── parallel.ipynb ├── index.rst └── Makefile ├── document ├── .gitignore ├── plots │ ├── plot_acor.py │ └── oned.py └── Makefile ├── .gitattributes ├── AUTHORS.rst ├── .github ├── dependabot.yml ├── ISSUE_TEMPLATE.md └── workflows │ └── tests.yml ├── joss ├── .gitignore ├── metadata.yaml ├── make_latex.sh ├── paper.bib ├── paper.md └── paper.tex ├── VISION.md ├── .gitignore ├── binder └── environment.yml ├── .readthedocs.yaml ├── tox.ini ├── .pre-commit-config.yaml ├── LICENSE ├── pyproject.toml ├── CONTRIBUTING.md ├── README.rst ├── setup.py ├── CODE_OF_CONDUCT.md └── HISTORY.rst /src/emcee/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/emcee/tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE *.rst 2 | -------------------------------------------------------------------------------- /src/emcee/tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | *.h5 3 | _static/*/*.py 4 | tutorials 5 | -------------------------------------------------------------------------------- /document/.gitignore: -------------------------------------------------------------------------------- 1 | *.aux 2 | *.brf 3 | *.log 4 | *.out 5 | plots/*.h5 6 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | document/* linguist-documentation 2 | Makefile linguist-vendored=true 3 | -------------------------------------------------------------------------------- /docs/_static/logo.pxm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohannesBuchner/emcee/main/docs/_static/logo.pxm -------------------------------------------------------------------------------- /docs/_static/logo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohannesBuchner/emcee/main/docs/_static/logo2.png -------------------------------------------------------------------------------- /docs/_static/logo2.pxm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohannesBuchner/emcee/main/docs/_static/logo2.pxm -------------------------------------------------------------------------------- /docs/_static/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohannesBuchner/emcee/main/docs/_static/favicon.png -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | The list of contributors can be found `on GitHub `_. 2 | -------------------------------------------------------------------------------- /docs/_static/logo-sidebar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohannesBuchner/emcee/main/docs/_static/logo-sidebar.png -------------------------------------------------------------------------------- /docs/_static/line/line-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohannesBuchner/emcee/main/docs/_static/line/line-data.png -------------------------------------------------------------------------------- /docs/_static/line/line-mcmc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohannesBuchner/emcee/main/docs/_static/line/line-mcmc.png -------------------------------------------------------------------------------- /docs/_static/line/line-time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohannesBuchner/emcee/main/docs/_static/line/line-time.png -------------------------------------------------------------------------------- /docs/_static/line/line-triangle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohannesBuchner/emcee/main/docs/_static/line/line-triangle.png -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-book-theme 2 | myst-nb 3 | matplotlib 4 | scipy 5 | h5py 6 | celerite 7 | corner 8 | ipython 9 | -------------------------------------------------------------------------------- /docs/_static/line/line-least-squares.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohannesBuchner/emcee/main/docs/_static/line/line-least-squares.png -------------------------------------------------------------------------------- /docs/_static/line/line-max-likelihood.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohannesBuchner/emcee/main/docs/_static/line/line-max-likelihood.png -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "monthly" 7 | -------------------------------------------------------------------------------- /joss/.gitignore: -------------------------------------------------------------------------------- 1 | paper.aux 2 | paper.bbl 3 | paper.bcf 4 | paper.blg 5 | paper.fdb_latexmk 6 | paper.fls 7 | paper.log 8 | paper.out 9 | paper.pdf 10 | paper.run.xml 11 | *.png 12 | latex.template 13 | *.csl 14 | -------------------------------------------------------------------------------- /src/emcee/interruptible_pool.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # The standard library now has an interruptible pool 4 | from multiprocessing.pool import Pool as InterruptiblePool 5 | 6 | __all__ = ["InterruptiblePool"] 7 | -------------------------------------------------------------------------------- /src/emcee/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from collections import namedtuple 4 | 5 | __all__ = ["Model"] 6 | 7 | 8 | Model = namedtuple( 9 | "Model", ("log_prob_fn", "compute_log_prob_fn", "map_fn", "random") 10 | ) 11 | -------------------------------------------------------------------------------- /VISION.md: -------------------------------------------------------------------------------- 1 | - Easy to use gradient-free MCMC sampling of black box log probability functions 2 | - Few bells and whistles: no plotting, no modeling, no marginal likelihood calculation, etc. 3 | - Any sampling algorithm must have published proof of correctness 4 | -------------------------------------------------------------------------------- /document/plots/plot_acor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import matplotlib.pyplot as pl 5 | import numpy as np 6 | 7 | # sys.path.prepend(os.path.abspath(os.path.join(__file__, "..", "..", ".."))) 8 | # import emcee 9 | 10 | 11 | def plot_acor(acorfn): 12 | pass 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.swp 3 | .DS_Store 4 | 5 | *.pyc 6 | *.so 7 | venv* 8 | build/* 9 | 10 | dist 11 | emcee.egg-info 12 | MANIFEST 13 | docs.tar 14 | 15 | *.pdf 16 | 17 | .coverage 18 | .pytest_cache 19 | htmlcov 20 | emcee_version.py 21 | 22 | .tox 23 | env 24 | .eggs 25 | .coverage.* 26 | -------------------------------------------------------------------------------- /binder/environment.yml: -------------------------------------------------------------------------------- 1 | name: emcee 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python 6 | - numpy 7 | - scipy 8 | - h5py 9 | - matplotlib 10 | - corner 11 | - tqdm 12 | - mpi4py 13 | - schwimmbad 14 | - pip 15 | - pip: 16 | - celerite 17 | - autograd 18 | - .. 19 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-20.04 5 | apt_packages: 6 | - fonts-liberation 7 | tools: 8 | python: "3.10" 9 | 10 | python: 11 | install: 12 | - requirements: docs/requirements.txt 13 | - method: pip 14 | path: . 15 | 16 | sphinx: 17 | configuration: docs/conf.py 18 | fail_on_warning: true 19 | builder: dirhtml 20 | -------------------------------------------------------------------------------- /src/emcee/tests/integration/test_walk.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from emcee import moves 4 | 5 | from .test_proposal import _test_normal, _test_uniform 6 | 7 | __all__ = ["test_normal_walk", "test_uniform_walk"] 8 | 9 | 10 | def test_normal_walk(**kwargs): 11 | _test_normal(moves.WalkMove(s=3), **kwargs) 12 | 13 | 14 | def test_uniform_walk(**kwargs): 15 | _test_uniform(moves.WalkMove(s=3), **kwargs) 16 | -------------------------------------------------------------------------------- /docs/user/sampler.rst: -------------------------------------------------------------------------------- 1 | .. _sampler: 2 | 3 | The Ensemble Sampler 4 | ==================== 5 | 6 | Standard usage of ``emcee`` involves instantiating an 7 | :class:`EnsembleSampler`. 8 | 9 | .. autoclass:: emcee.EnsembleSampler 10 | :inherited-members: 11 | 12 | Note that several of the :class:`EnsembleSampler` methods return or consume 13 | :class:`State` objects: 14 | 15 | .. autoclass:: emcee.State 16 | :inherited-members: 17 | -------------------------------------------------------------------------------- /docs/user/autocorr.rst: -------------------------------------------------------------------------------- 1 | .. _autocorr-user: 2 | 3 | Autocorrelation Analysis 4 | ======================== 5 | 6 | A good heuristic for assessing convergence of samplings is the integrated 7 | autocorrelation time. ``emcee`` includes tools for computing this and the 8 | autocorrelation function itself. More details can be found in 9 | :ref:`autocorr`. 10 | 11 | 12 | .. autofunction:: emcee.autocorr.integrated_time 13 | .. autofunction:: emcee.autocorr.function_1d 14 | -------------------------------------------------------------------------------- /src/emcee/backends/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .backend import Backend 4 | from .hdf import HDFBackend, TempHDFBackend 5 | 6 | __all__ = ["Backend", "HDFBackend", "TempHDFBackend", "get_test_backends"] 7 | 8 | 9 | def get_test_backends(): 10 | backends = [Backend] 11 | 12 | try: 13 | import h5py # NOQA 14 | except ImportError: 15 | pass 16 | else: 17 | backends.append(TempHDFBackend) 18 | 19 | return backends 20 | -------------------------------------------------------------------------------- /src/emcee/tests/integration/test_de_snooker.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from emcee import moves 4 | 5 | from .test_proposal import _test_normal, _test_uniform 6 | 7 | __all__ = ["test_normal_de_snooker", "test_uniform_de_snooker"] 8 | 9 | 10 | def test_normal_de_snooker(**kwargs): 11 | kwargs["nsteps"] = 4000 12 | _test_normal(moves.DESnookerMove(), **kwargs) 13 | 14 | 15 | def test_uniform_de_snooker(**kwargs): 16 | _test_uniform(moves.DESnookerMove(), **kwargs) 17 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{39,310,311,312}{,-extras},lint 3 | 4 | [gh-actions] 5 | python = 6 | 3.9: py39 7 | 3.10: py310 8 | 3.11: py311-extras 9 | 3.12: py312 10 | 11 | [testenv] 12 | deps = coverage[toml] 13 | extras = 14 | tests 15 | extras: extras 16 | commands = 17 | pip freeze 18 | python -m coverage run -m pytest -v {posargs} 19 | 20 | [testenv:lint] 21 | skip_install = true 22 | deps = pre-commit 23 | commands = 24 | pre-commit run --all-files 25 | -------------------------------------------------------------------------------- /src/emcee/mpi_pool.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | try: 5 | from schwimmbad import MPIPool 6 | except ImportError: 7 | 8 | class MPIPool(object): 9 | def __init__(self, *args, **kwargs): 10 | raise ImportError( 11 | "The MPIPool from emcee has been forked to " 12 | "https://github.com/adrn/schwimmbad, " 13 | "please install that package to continue using the MPIPool" 14 | ) 15 | 16 | 17 | __all__ = ["MPIPool"] 18 | -------------------------------------------------------------------------------- /docs/fix_internal_links.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import re 5 | import sys 6 | 7 | if len(sys.argv) <= 1: 8 | sys.exit(0) 9 | 10 | 11 | def subber(m): 12 | return m.group(0).replace("``", "`") 13 | 14 | 15 | prog = re.compile(r":(.+):``(.+)``") 16 | 17 | for fn in sys.argv[1:]: 18 | print("Fixing links in {0}".format(fn)) 19 | with open(fn, "r") as f: 20 | txt = f.read() 21 | txt = prog.sub(subber, txt) 22 | with open(fn, "w") as f: 23 | f.write(txt) 24 | -------------------------------------------------------------------------------- /src/emcee/ptsampler.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | try: 5 | from ptemcee import Sampler as PTSampler 6 | except ImportError: 7 | 8 | class PTSampler(object): 9 | def __init__(self, *args, **kwargs): 10 | raise ImportError( 11 | "The PTSampler from emcee has been forked to " 12 | "https://github.com/willvousden/ptemcee, " 13 | "please install that package to continue using the PTSampler" 14 | ) 15 | 16 | 17 | __all__ = ["PTSampler"] 18 | -------------------------------------------------------------------------------- /src/emcee/tests/integration/test_de.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from emcee import moves 4 | 5 | from .test_proposal import _test_normal, _test_uniform 6 | 7 | __all__ = ["test_normal_de", "test_normal_de_no_gamma", "test_uniform_de"] 8 | 9 | 10 | def test_normal_de(**kwargs): 11 | _test_normal(moves.DEMove(), **kwargs) 12 | 13 | 14 | def test_normal_de_no_gamma(**kwargs): 15 | _test_normal(moves.DEMove(gamma0=1.0), **kwargs) 16 | 17 | 18 | def test_uniform_de(**kwargs): 19 | _test_uniform(moves.DEMove(), **kwargs) 20 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v6.0.0 4 | hooks: 5 | - id: trailing-whitespace 6 | - id: end-of-file-fixer 7 | - id: debug-statements 8 | 9 | - repo: https://github.com/PyCQA/isort 10 | rev: "6.0.1" 11 | hooks: 12 | - id: isort 13 | args: [] 14 | additional_dependencies: [toml] 15 | exclude: docs/tutorials 16 | 17 | - repo: https://github.com/psf/black 18 | rev: "25.1.0" 19 | hooks: 20 | - id: black-jupyter 21 | -------------------------------------------------------------------------------- /document/Makefile: -------------------------------------------------------------------------------- 1 | LATEX = pdflatex 2 | BASH = bash -c 3 | ECHO = echo 4 | RM = rm -rf 5 | RM_TMP = ${RM} $(foreach suff, ${TMP_SUFFS}, ${NAME}.${suff}) 6 | 7 | TMP_SUFFS = pdf aux bbl blg log dvi ps eps out 8 | SUFF = pdf 9 | 10 | CHECK_RERUN = grep Rerun $*.log 11 | 12 | NAME = ms 13 | DOC_OUT = ${NAME}.${SUFF} 14 | 15 | default: ${DOC_OUT} 16 | 17 | %.pdf: %.tex 18 | ${LATEX} $< 19 | ( ${CHECK_RERUN} && ${LATEX} $< ) || echo "Done." 20 | ( ${CHECK_RERUN} && ${LATEX} $< ) || echo "Done." 21 | 22 | clean: 23 | ${RM_TMP} 24 | -------------------------------------------------------------------------------- /src/emcee/moves/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .de import DEMove 4 | from .de_snooker import DESnookerMove 5 | from .gaussian import GaussianMove 6 | from .kde import KDEMove 7 | from .mh import MHMove 8 | from .move import Move 9 | from .red_blue import RedBlueMove 10 | from .stretch import StretchMove 11 | from .walk import WalkMove 12 | 13 | __all__ = [ 14 | "Move", 15 | "MHMove", 16 | "GaussianMove", 17 | "RedBlueMove", 18 | "StretchMove", 19 | "WalkMove", 20 | "KDEMove", 21 | "DEMove", 22 | "DESnookerMove", 23 | ] 24 | -------------------------------------------------------------------------------- /joss/metadata.yaml: -------------------------------------------------------------------------------- 1 | 2 | repository: https://github.com/dfm/emcee 3 | archive_doi: https://doi.org/10.5281/zenodo.3543502 4 | paper_url: https://doi.org/10.21105/joss.01864 5 | journal_name: Journal of Open Source Software 6 | review_issue_url: https://github.com/openjournals/joss-reviews/issues/1864 7 | issue: 4 8 | volume: 43 9 | page: 1864 10 | logo_path: logo.png 11 | aas_logo_path: aas-logo.png 12 | year: 2019 13 | submitted: 28 October 2019 14 | published: 17 November 2019 15 | formatted_doi: 10.21105/joss.01864 16 | citation_author: Foreman-Mackey 17 | editor_name: Juanjo Bazán 18 | reviewers: 19 | - benjaminrose 20 | - mattpitkin 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 6 | 7 | **General information:** 8 | 9 | - emcee version: 10 | - platform: 11 | - installation method (pip/conda/source/other?): 12 | 13 | **Problem description:** 14 | 15 | ### Expected behavior: 16 | 17 | ### Actual behavior: 18 | 19 | ### What have you tried so far?: 20 | 21 | ### Minimal example: 22 | 23 | 24 | 25 | ```python 26 | import emcee 27 | 28 | # sample code goes here... 29 | ``` 30 | -------------------------------------------------------------------------------- /src/emcee/tests/integration/test_kde.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | try: 4 | import scipy 5 | except ImportError: 6 | scipy = None 7 | import pytest 8 | 9 | from emcee import moves 10 | 11 | from .test_proposal import _test_normal, _test_uniform 12 | 13 | __all__ = ["test_normal_kde", "test_uniform_kde", "test_nsplits_kde"] 14 | 15 | 16 | @pytest.mark.skipif(scipy is None, reason="scipy is not available") 17 | def test_normal_kde(**kwargs): 18 | _test_normal(moves.KDEMove(), **kwargs) 19 | 20 | 21 | @pytest.mark.skipif(scipy is None, reason="scipy is not available") 22 | def test_uniform_kde(**kwargs): 23 | _test_uniform(moves.KDEMove(), **kwargs) 24 | 25 | 26 | @pytest.mark.skipif(scipy is None, reason="scipy is not available") 27 | def test_nsplits_kde(**kwargs): 28 | _test_normal(moves.KDEMove(nsplits=5), **kwargs) 29 | -------------------------------------------------------------------------------- /src/emcee/tests/integration/test_stretch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | 5 | from emcee import moves 6 | 7 | from .test_proposal import _test_normal, _test_uniform 8 | 9 | __all__ = [ 10 | "test_normal_stretch", 11 | "test_uniform_stretch", 12 | "test_nsplits_stretch", 13 | ] 14 | 15 | 16 | @pytest.mark.parametrize("blobs", [True, False]) 17 | def test_normal_stretch(blobs, **kwargs): 18 | kwargs["blobs"] = blobs 19 | _test_normal(moves.StretchMove(), **kwargs) 20 | 21 | 22 | def test_uniform_stretch(**kwargs): 23 | _test_uniform(moves.StretchMove(), **kwargs) 24 | 25 | 26 | def test_nsplits_stretch(**kwargs): 27 | _test_normal(moves.StretchMove(nsplits=5), **kwargs) 28 | 29 | 30 | def test_randomize_stretch(**kwargs): 31 | _test_normal(moves.StretchMove(randomize_split=True), **kwargs) 32 | -------------------------------------------------------------------------------- /src/emcee/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __bibtex__ = """ 4 | @article{emcee, 5 | author = {{Foreman-Mackey}, D. and {Hogg}, D.~W. and {Lang}, D. and {Goodman}, J.}, 6 | title = {emcee: The MCMC Hammer}, 7 | journal = {PASP}, 8 | year = 2013, 9 | volume = 125, 10 | pages = {306-312}, 11 | eprint = {1202.3665}, 12 | doi = {10.1086/670067} 13 | } 14 | """ 15 | __uri__ = "https://emcee.readthedocs.io" 16 | __author__ = "Daniel Foreman-Mackey" 17 | __email__ = "foreman.mackey@gmail.com" 18 | __license__ = "MIT" 19 | __description__ = "The Python ensemble sampling toolkit for MCMC" 20 | 21 | 22 | from .emcee_version import __version__ # isort:skip 23 | 24 | from . import autocorr, backends, moves 25 | from .ensemble import EnsembleSampler, walkers_independent 26 | from .state import State 27 | 28 | __all__ = [ 29 | "EnsembleSampler", 30 | "walkers_independent", 31 | "State", 32 | "moves", 33 | "autocorr", 34 | "backends", 35 | "__version__", 36 | ] 37 | -------------------------------------------------------------------------------- /src/emcee/tests/unit/test_stretch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import warnings 4 | 5 | import numpy as np 6 | import pytest 7 | 8 | from emcee import moves 9 | from emcee.model import Model 10 | from emcee.state import State 11 | 12 | __all__ = ["test_live_dangerously"] 13 | 14 | 15 | def test_live_dangerously(nwalkers=32, nsteps=3000, seed=1234): 16 | warnings.filterwarnings("error") 17 | 18 | # Set up the random number generator. 19 | np.random.seed(seed) 20 | state = State( 21 | np.random.randn(nwalkers, 2 * nwalkers), 22 | log_prob=np.random.randn(nwalkers), 23 | ) 24 | model = Model(None, lambda x: (np.zeros(len(x)), None), map, np.random) 25 | proposal = moves.StretchMove() 26 | 27 | # Test to make sure that the error is thrown if there aren't enough 28 | # walkers. 29 | with pytest.raises(RuntimeError): 30 | proposal.propose(model, state) 31 | 32 | # Living dangerously... 33 | proposal.live_dangerously = True 34 | proposal.propose(model, state) 35 | -------------------------------------------------------------------------------- /src/emcee/moves/stretch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from .red_blue import RedBlueMove 6 | 7 | __all__ = ["StretchMove"] 8 | 9 | 10 | class StretchMove(RedBlueMove): 11 | """ 12 | A `Goodman & Weare (2010) 13 | `_ "stretch move" with 14 | parallelization as described in `Foreman-Mackey et al. (2013) 15 | `_. 16 | 17 | :param a: (optional) 18 | The stretch scale parameter. (default: ``2.0``) 19 | 20 | """ 21 | 22 | def __init__(self, a=2.0, **kwargs): 23 | self.a = a 24 | super(StretchMove, self).__init__(**kwargs) 25 | 26 | def get_proposal(self, s, c, random): 27 | c = np.concatenate(c, axis=0) 28 | Ns, Nc = len(s), len(c) 29 | ndim = s.shape[1] 30 | zz = ((self.a - 1.0) * random.rand(Ns) + 1) ** 2.0 / self.a 31 | factors = (ndim - 1.0) * np.log(zz) 32 | rint = random.randint(Nc, size=(Ns,)) 33 | return c[rint] - (c[rint] - s) * zz[:, None], factors 34 | -------------------------------------------------------------------------------- /docs/user/faq.rst: -------------------------------------------------------------------------------- 1 | .. _faq: 2 | 3 | FAQ 4 | === 5 | 6 | **The not-so-frequently asked questions that still have useful answers** 7 | 8 | What are "walkers"? 9 | ------------------- 10 | 11 | Walkers are the members of the ensemble. They are almost like separate 12 | Metropolis-Hastings chains but, of course, the proposal distribution for 13 | a given walker depends on the positions of all the other walkers in the 14 | ensemble. See `Goodman & Weare (2010) 15 | `_ for more details. 16 | 17 | 18 | How should I initialize the walkers? 19 | ------------------------------------ 20 | 21 | The best technique seems to be to start in a small ball around the a priori 22 | preferred position. Don't worry, the walkers quickly branch out and explore 23 | the rest of the space. 24 | 25 | 26 | Parameter limits 27 | ---------------- 28 | 29 | In order to confine the walkers to a finite volume of the parameter space, have 30 | your function return negative infinity outside of the volume corresponding to 31 | the logarithm of 0 prior probability using 32 | 33 | .. code-block:: python 34 | 35 | return -numpy.inf 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2010-2021 Daniel Foreman-Mackey & contributors. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docs/user/backends.rst: -------------------------------------------------------------------------------- 1 | .. _backends: 2 | 3 | Backends 4 | ======== 5 | 6 | Starting with version 3, emcee has an interface for serializing the sampler 7 | output. 8 | This can be useful in any scenario where you want to share the results of 9 | sampling or when sampling with an expensive model because, even if the 10 | sampler crashes, the current state of the chain will always be saved. 11 | 12 | There is currently one backend that can be used to serialize the chain to a 13 | file: :class:`emcee.backends.HDFBackend`. 14 | The methods and options for this backend are documented below. 15 | It can also be used as a reader for existing samplings. 16 | For example, if a chain was saved using the :class:`backends.HDFBackend`, the 17 | results can be accessed as follows: 18 | 19 | .. code-block:: python 20 | 21 | reader = emcee.backends.HDFBackend("chain_filename.h5", read_only=True) 22 | flatchain = reader.get_chain(flat=True) 23 | 24 | The ``read_only`` argument is not required, but it will make sure that you 25 | don't inadvertently overwrite the samples in the file. 26 | 27 | .. autoclass:: emcee.backends.Backend 28 | :inherited-members: 29 | 30 | .. autoclass:: emcee.backends.HDFBackend 31 | :inherited-members: 32 | -------------------------------------------------------------------------------- /src/emcee/moves/walk.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from .red_blue import RedBlueMove 6 | 7 | __all__ = ["WalkMove"] 8 | 9 | 10 | class WalkMove(RedBlueMove): 11 | """ 12 | A `Goodman & Weare (2010) 13 | `_ "walk move" with 14 | parallelization as described in `Foreman-Mackey et al. (2013) 15 | `_. 16 | 17 | :param s: (optional) 18 | The number of helper walkers to use. By default it will use all the 19 | walkers in the complement. 20 | 21 | """ 22 | 23 | def __init__(self, s=None, **kwargs): 24 | self.s = s 25 | super(WalkMove, self).__init__(**kwargs) 26 | 27 | def get_proposal(self, s, c, random): 28 | c = np.concatenate(c, axis=0) 29 | Ns, Nc = len(s), len(c) 30 | ndim = s.shape[1] 31 | q = np.empty_like(s) 32 | s0 = Nc if self.s is None else self.s 33 | for i in range(Ns): 34 | inds = random.choice(Nc, s0, replace=False) 35 | cov = np.atleast_2d(np.cov(c[inds], rowvar=0)) 36 | q[i] = random.multivariate_normal(s[i], cov) 37 | return q, np.zeros(Ns, dtype=np.float64) 38 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=40.6.0", "wheel", "setuptools_scm"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.black] 6 | line-length = 79 7 | exclude = ''' 8 | /( 9 | \.eggs 10 | | \.git 11 | | \.hg 12 | | \.mypy_cache 13 | | \.tox 14 | | \.venv 15 | | _build 16 | | buck-out 17 | | build 18 | | dist 19 | )/ 20 | ''' 21 | 22 | [tool.isort] 23 | line_length = 79 24 | multi_line_output = 3 25 | include_trailing_comma = true 26 | force_grid_wrap = 0 27 | use_parentheses = true 28 | known_third_party = ["h5py", "matplotlib", "numpy", "pkg_resources", "pytest", "setuptools"] 29 | 30 | [tool.coverage.run] 31 | parallel = true 32 | branch = true 33 | source = ["emcee"] 34 | omit = [ 35 | "emcee/interruptible_pool.py", 36 | "emcee/mpi_pool.py", 37 | "emcee/ptsampler.py", 38 | "docs/*", 39 | "tests/*", 40 | "*__init__*" 41 | ] 42 | 43 | [tool.coverage.paths] 44 | source = ["src", "*/site-packages"] 45 | 46 | [tool.coverage.report] 47 | show_missing = true 48 | exclude_lines = [ 49 | "pragma: no cover", 50 | "raise NotImplementedError", 51 | "raise ImportError", 52 | "except ImportError", 53 | "def __len__", 54 | "def __repr__", 55 | "logging.warning", 56 | "deprecation_warning", 57 | "deprecated", 58 | "if tqdm is None" 59 | ] 60 | -------------------------------------------------------------------------------- /joss/make_latex.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | echo "Downloading..." 4 | rm -rf latex.template logo.png aas-logo.png apa.csl 5 | wget -q https://raw.githubusercontent.com/openjournals/whedon/editor-and-reviewers-on-papers/resources/joss/latex.template 6 | wget -q https://raw.githubusercontent.com/openjournals/whedon/editor-and-reviewers-on-papers/resources/joss/logo.png 7 | wget -q https://raw.githubusercontent.com/openjournals/whedon/editor-and-reviewers-on-papers/resources/joss/aas-logo.png 8 | wget -q https://raw.githubusercontent.com/openjournals/whedon/editor-and-reviewers-on-papers/resources/joss/apa.csl 9 | echo "Done" 10 | 11 | pandoc \ 12 | -s paper.md \ 13 | -o paper.tex \ 14 | --template latex.template \ 15 | --csl=apa.csl \ 16 | --bibliography=paper.bib \ 17 | --filter pandoc-citeproc \ 18 | -V repository="https://github.com/dfm/emcee" \ 19 | -V archive_doi="https://doi.org/10.5281/zenodo.3543502" \ 20 | -V review_issue_url="https://github.com/openjournals/joss-reviews/issues/1864" \ 21 | -V editor_url="http://juanjobazan.com" \ 22 | -V graphics="true" \ 23 | --metadata-file=metadata.yaml 24 | 25 | # -o paper.pdf -V geometry:margin=1in \ 26 | # --pdf-engine=xelatex \ 27 | # --filter pandoc-citeproc \ 28 | # -t latex \ 29 | # -o paper.tex \ 30 | # --bibliography=paper.bib 31 | # --from markdown+autolink_bare_uris \ 32 | # --template latex.template \ 33 | # -s paper.md 34 | -------------------------------------------------------------------------------- /src/emcee/moves/kde.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from .red_blue import RedBlueMove 6 | 7 | try: 8 | from scipy.stats import gaussian_kde 9 | except ImportError: 10 | gaussian_kde = None 11 | 12 | 13 | __all__ = ["KDEMove"] 14 | 15 | 16 | class KDEMove(RedBlueMove): 17 | """A proposal using a KDE of the complementary ensemble 18 | 19 | This is a simplified version of the method used in `kombine 20 | `_. If you use this proposal, you should 21 | use *a lot* of walkers in your ensemble. 22 | 23 | Args: 24 | bw_method: The bandwidth estimation method. See `the scipy docs 25 | `_ 26 | for allowed values. 27 | 28 | """ 29 | 30 | def __init__(self, bw_method=None, **kwargs): 31 | if gaussian_kde is None: 32 | raise ImportError( 33 | "you need scipy.stats.gaussian_kde to use the KDEMove" 34 | ) 35 | self.bw_method = bw_method 36 | super(KDEMove, self).__init__(**kwargs) 37 | 38 | def get_proposal(self, s, c, random): 39 | c = np.concatenate(c, axis=0) 40 | kde = gaussian_kde(c.T, bw_method=self.bw_method) 41 | q = kde.resample(len(s), random) 42 | factor = kde.logpdf(s.T) - kde.logpdf(q) 43 | return q.T, factor 44 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from pkg_resources import DistributionNotFound, get_distribution 4 | 5 | try: 6 | __version__ = get_distribution("emcee").version 7 | except DistributionNotFound: 8 | __version__ = "unknown version" 9 | 10 | 11 | # General stuff 12 | extensions = [ 13 | "sphinx.ext.autodoc", 14 | "sphinx.ext.intersphinx", 15 | "sphinx.ext.napoleon", 16 | "sphinx.ext.mathjax", 17 | "myst_nb", 18 | "IPython.sphinxext.ipython_console_highlighting", 19 | ] 20 | 21 | myst_enable_extensions = ["dollarmath", "colon_fence"] 22 | source_suffix = ".rst" 23 | master_doc = "index" 24 | 25 | project = "emcee" 26 | copyright = "2012-2021, Dan Foreman-Mackey & contributors" 27 | version = __version__ 28 | release = __version__ 29 | exclude_patterns = ["_build"] 30 | 31 | # HTML theme 32 | html_theme = "sphinx_book_theme" 33 | html_copy_source = True 34 | html_show_sourcelink = True 35 | html_sourcelink_suffix = "" 36 | html_title = "emcee" 37 | html_favicon = "_static/favicon.png" 38 | html_static_path = ["_static"] 39 | html_theme_options = { 40 | "path_to_docs": "docs", 41 | "repository_url": "https://github.com/dfm/emcee", 42 | "repository_branch": "main", 43 | "launch_buttons": { 44 | "binderhub_url": "https://mybinder.org", 45 | "notebook_interface": "classic", 46 | }, 47 | "use_edit_page_button": True, 48 | "use_issues_button": True, 49 | "use_repository_button": True, 50 | "use_download_button": True, 51 | } 52 | nb_execution_mode = "off" 53 | nb_execution_timeout = -1 54 | -------------------------------------------------------------------------------- /src/emcee/moves/de_snooker.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from .red_blue import RedBlueMove 6 | 7 | __all__ = ["DESnookerMove"] 8 | 9 | 10 | class DESnookerMove(RedBlueMove): 11 | """A snooker proposal using differential evolution. 12 | 13 | Based on `Ter Braak & Vrugt (2008) 14 | `_. 15 | 16 | Credit goes to GitHub user `mdanthony17 `_ 17 | for proposing this as an addition to the original emcee package. 18 | 19 | Args: 20 | gammas (Optional[float]): The mean stretch factor for the proposal 21 | vector. By default, it is :math:`1.7` as recommended by the 22 | reference. 23 | 24 | """ 25 | 26 | def __init__(self, gammas=1.7, **kwargs): 27 | self.gammas = gammas 28 | kwargs["nsplits"] = 4 29 | super(DESnookerMove, self).__init__(**kwargs) 30 | 31 | def get_proposal(self, s, c, random): 32 | Ns = len(s) 33 | Nc = list(map(len, c)) 34 | ndim = s.shape[1] 35 | q = np.empty_like(s) 36 | metropolis = np.empty(Ns, dtype=np.float64) 37 | for i in range(Ns): 38 | w = np.array([c[j][random.randint(Nc[j])] for j in range(3)]) 39 | random.shuffle(w) 40 | z, z1, z2 = w 41 | delta = s[i] - z 42 | norm = np.linalg.norm(delta) 43 | u = delta / norm 44 | q[i] = s[i] + u * self.gammas * (np.dot(u, z1) - np.dot(u, z2)) 45 | metropolis[i] = np.log(np.linalg.norm(q[i] - z)) - np.log(norm) 46 | return q, (ndim - 1.0) * metropolis 47 | -------------------------------------------------------------------------------- /src/emcee/tests/unit/test_autocorr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from emcee.autocorr import AutocorrError, integrated_time 7 | 8 | 9 | def get_chain(seed=1234, ndim=3, N=100000): 10 | np.random.seed(seed) 11 | a = 0.9 12 | x = np.empty((N, ndim)) 13 | x[0] = np.zeros(ndim) 14 | for i in range(1, N): 15 | x[i] = x[i - 1] * a + np.random.rand(ndim) 16 | return x 17 | 18 | 19 | def test_1d(seed=1234, ndim=1, N=250000): 20 | x = get_chain(seed=seed, ndim=ndim, N=N) 21 | tau = integrated_time(x) 22 | assert np.all(np.abs(tau - 19.0) / 19.0 < 0.2) 23 | 24 | 25 | def test_nd(seed=1234, ndim=3, N=150000): 26 | x = get_chain(seed=seed, ndim=ndim, N=N) 27 | tau = integrated_time(x) 28 | assert np.all(np.abs(tau - 19.0) / 19.0 < 0.2) 29 | 30 | 31 | def test_nd_without_walkers(seed=1234, ndim=3, N=10000): 32 | x = get_chain(seed=seed, ndim=ndim, N=N) 33 | tau1 = integrated_time(x[:, np.newaxis]) 34 | tau2 = integrated_time(x, has_walkers=False) 35 | assert np.allclose(tau1, tau2) 36 | 37 | 38 | def test_too_short(seed=1234, ndim=3, N=100): 39 | x = get_chain(seed=seed, ndim=ndim, N=N) 40 | with pytest.raises(AutocorrError): 41 | integrated_time(x) 42 | tau = integrated_time(x, quiet=True) # NOQA 43 | 44 | 45 | def test_autocorr_multi_works(): 46 | np.random.seed(42) 47 | xs = np.random.randn(16384, 2) 48 | 49 | acls_multi = integrated_time(xs[:, np.newaxis]) 50 | acls_single = np.array( 51 | [integrated_time(xs[:, i]) for i in range(xs.shape[1])] 52 | ).squeeze() 53 | 54 | assert np.allclose(acls_multi, acls_single) 55 | -------------------------------------------------------------------------------- /src/emcee/pbar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import logging 4 | 5 | __all__ = ["get_progress_bar"] 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | try: 10 | import tqdm 11 | except ImportError: 12 | tqdm = None 13 | 14 | 15 | class _NoOpPBar(object): 16 | """This class implements the progress bar interface but does nothing""" 17 | 18 | def __init__(self): 19 | pass 20 | 21 | def __enter__(self, *args, **kwargs): 22 | return self 23 | 24 | def __exit__(self, *args, **kwargs): 25 | pass 26 | 27 | def update(self, count): 28 | pass 29 | 30 | 31 | def get_progress_bar(display, total, **kwargs): 32 | """Get a progress bar interface with given properties 33 | 34 | If the tqdm library is not installed, this will always return a "progress 35 | bar" that does nothing. 36 | 37 | Args: 38 | display (bool or str): Should the bar actually show the progress? Or a 39 | string to indicate which tqdm bar to use. 40 | total (int): The total size of the progress bar. 41 | kwargs (dict): Optional keyword arguments to be passed to the tqdm call. 42 | 43 | """ 44 | if display: 45 | if tqdm is None: 46 | logger.warning( 47 | "You must install the tqdm library to use progress " 48 | "indicators with emcee" 49 | ) 50 | return _NoOpPBar() 51 | else: 52 | if display is True: 53 | return tqdm.tqdm(total=total, **kwargs) 54 | else: 55 | return getattr(tqdm, "tqdm_" + display)(total=total, **kwargs) 56 | 57 | return _NoOpPBar() 58 | -------------------------------------------------------------------------------- /src/emcee/moves/move.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | __all__ = ["Move"] 6 | 7 | 8 | class Move(object): 9 | def tune(self, state, accepted): 10 | pass 11 | 12 | def update(self, old_state, new_state, accepted, subset=None): 13 | """Update a given subset of the ensemble with an accepted proposal 14 | 15 | Args: 16 | coords: The original ensemble coordinates. 17 | log_probs: The original log probabilities of the walkers. 18 | blobs: The original blobs. 19 | new_coords: The proposed coordinates. 20 | new_log_probs: The proposed log probabilities. 21 | new_blobs: The proposed blobs. 22 | accepted: A vector of booleans indicating which walkers were 23 | accepted. 24 | subset (Optional): A boolean mask indicating which walkers were 25 | included in the subset. This can be used, for example, when 26 | updating only the primary ensemble in a :class:`RedBlueMove`. 27 | 28 | """ 29 | if subset is None: 30 | subset = np.ones(len(old_state.coords), dtype=bool) 31 | m1 = subset & accepted 32 | m2 = accepted[subset] 33 | old_state.coords[m1] = new_state.coords[m2] 34 | old_state.log_prob[m1] = new_state.log_prob[m2] 35 | 36 | if new_state.blobs is not None: 37 | if old_state.blobs is None: 38 | raise ValueError( 39 | "If you start sampling with a given log_prob, " 40 | "you also need to provide the current list of " 41 | "blobs at that position." 42 | ) 43 | old_state.blobs[m1] = new_state.blobs[m2] 44 | 45 | return old_state 46 | -------------------------------------------------------------------------------- /src/emcee/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import warnings 4 | from functools import wraps 5 | 6 | import numpy as np 7 | 8 | __all__ = ["sample_ball", "deprecated", "deprecation_warning"] 9 | 10 | 11 | def deprecation_warning(msg): 12 | warnings.warn(msg, category=DeprecationWarning, stacklevel=2) 13 | 14 | 15 | def deprecated(alternate): 16 | def wrapper(func, alternate=alternate): 17 | msg = "'{0}' is deprecated.".format(func.__name__) 18 | if alternate is not None: 19 | msg += " Use '{0}' instead.".format(alternate) 20 | 21 | @wraps(func) 22 | def f(*args, **kwargs): 23 | deprecation_warning(msg) 24 | return func(*args, **kwargs) 25 | 26 | return f 27 | 28 | return wrapper 29 | 30 | 31 | @deprecated(None) 32 | def sample_ball(p0, std, size=1): 33 | """ 34 | Produce a ball of walkers around an initial parameter value. 35 | 36 | :param p0: The initial parameter value. 37 | :param std: The axis-aligned standard deviation. 38 | :param size: The number of samples to produce. 39 | 40 | """ 41 | assert len(p0) == len(std) 42 | return np.vstack( 43 | [p0 + std * np.random.normal(size=len(p0)) for i in range(size)] 44 | ) 45 | 46 | 47 | @deprecated(None) 48 | def sample_ellipsoid(p0, covmat, size=1): 49 | """ 50 | Produce an ellipsoid of walkers around an initial parameter value, 51 | according to a covariance matrix. 52 | 53 | :param p0: The initial parameter value. 54 | :param covmat: 55 | The covariance matrix. Must be symmetric-positive definite or 56 | it will raise the exception numpy.linalg.LinAlgError 57 | :param size: The number of samples to produce. 58 | 59 | """ 60 | return np.random.multivariate_normal( 61 | np.atleast_1d(p0), np.atleast_2d(covmat), size=size 62 | ) 63 | -------------------------------------------------------------------------------- /src/emcee/tests/integration/test_longdouble.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | import emcee 5 | from emcee.backends.hdf import TempHDFBackend, does_hdf5_support_longdouble 6 | 7 | 8 | def test_longdouble_doesnt_crash_bug_312(): 9 | def log_prob(x, ivar): 10 | return -0.5 * np.sum(ivar * x**2) 11 | 12 | np.random.seed(0) 13 | ndim, nwalkers = 5, 20 14 | ivar = 1.0 / np.random.rand(ndim).astype(np.longdouble) 15 | p0 = np.random.randn(nwalkers, ndim).astype(np.longdouble) 16 | 17 | sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob, args=[ivar]) 18 | sampler.run_mcmc(p0, 100) 19 | 20 | 21 | @pytest.mark.parametrize("cls", emcee.backends.get_test_backends()) 22 | def test_longdouble_actually_needed(cls): 23 | if issubclass(cls, TempHDFBackend) and not does_hdf5_support_longdouble(): 24 | pytest.xfail("HDF5 does not support long double on this platform") 25 | 26 | mjd = np.longdouble(58000.0) 27 | sigma = 100 * np.finfo(np.longdouble).eps * mjd 28 | 29 | def log_prob(x): 30 | assert x.dtype == np.longdouble 31 | return -0.5 * np.sum(((x - mjd) / sigma) ** 2) 32 | 33 | np.random.seed(0) 34 | ndim, nwalkers = 1, 20 35 | steps = 1000 36 | p0 = sigma * np.random.randn(nwalkers, ndim).astype(np.longdouble) + mjd 37 | assert not all(p0 == mjd) 38 | 39 | with cls(dtype=np.longdouble) as backend: 40 | sampler = emcee.EnsembleSampler( 41 | nwalkers, ndim, log_prob, backend=backend 42 | ) 43 | sampler.run_mcmc(p0, steps) 44 | 45 | samples = sampler.get_chain().reshape((-1,)) 46 | assert samples.dtype == np.longdouble 47 | 48 | assert not np.all(samples == mjd) 49 | assert np.abs(np.mean(samples) - mjd) < 10 * sigma / np.sqrt( 50 | len(samples) 51 | ) 52 | assert 0.1 * sigma < np.std(samples) < 10 * sigma 53 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## How to contribute to emcee 2 | 3 | ### Expectations 4 | 5 | emcee is developed and maintained in my spare time and, while I try to be 6 | responsive, I don't always get to every issue immediately. If it has been more 7 | than a week or two, feel free to ping me to try to get my attention. Do not 8 | email me directly; all discussion should happen on [the mailing 9 | list](https://groups.google.com/forum/#!forum/emcee-users). 10 | 11 | ### Did you find a bug? 12 | 13 | **Ensure the bug was not already reported** by searching on GitHub under 14 | [Issues](https://github.com/dfm/emcee/issues). If you're unable to find an 15 | open issue addressing the problem, [open a new 16 | one](https://github.com/dfm/emcee/issues/new). Be sure to include a **title 17 | and clear description**, as much relevant information as possible, and the 18 | simplest possible **code sample** demonstrating the expected behavior that is 19 | not occurring. 20 | 21 | ### Did you write a patch that fixes a bug? 22 | 23 | Open a new GitHub pull request with the patch. Ensure the PR description 24 | clearly describes the problem and solution. Include the relevant issue number 25 | if applicable. 26 | 27 | ### Do you intend to add a new feature or change an existing one? 28 | 29 | First, read the [VISION](https://github.com/dfm/emcee/blob/main/VISION.md) 30 | notes and make sure that your feature is consistent with those. In particular, 31 | modifications of the core algorithm or additions of new algorithms are 32 | unlikely to be approved. If your feature seems to be consistent, suggest it on 33 | the [emcee-users mailing 34 | list](https://groups.google.com/forum/#!forum/emcee-users) for some discussion 35 | before opening a pull request. 36 | 37 | ### Do you have questions about the code or about MCMC in general? 38 | 39 | **Do not open an issue.** Ask any questions on the [emcee-users mailing 40 | list](https://groups.google.com/forum/#!forum/emcee-users). 41 | 42 | Thanks! 43 | 44 | [Dan](https://github.com/dfm) 45 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | emcee 2 | ===== 3 | 4 | **The Python ensemble sampling toolkit for affine-invariant MCMC** 5 | 6 | .. image:: https://img.shields.io/badge/GitHub-dfm%2Femcee-blue.svg?style=flat 7 | :target: https://github.com/dfm/emcee 8 | .. image:: https://github.com/dfm/emcee/workflows/Tests/badge.svg 9 | :target: https://github.com/dfm/emcee/actions?query=workflow%3ATests 10 | .. image:: http://img.shields.io/badge/license-MIT-blue.svg?style=flat 11 | :target: https://github.com/dfm/emcee/blob/main/LICENSE 12 | .. image:: http://img.shields.io/badge/arXiv-1202.3665-orange.svg?style=flat 13 | :target: https://arxiv.org/abs/1202.3665 14 | .. image:: https://coveralls.io/repos/github/dfm/emcee/badge.svg?branch=main&style=flat&v=2 15 | :target: https://coveralls.io/github/dfm/emcee?branch=main 16 | .. image:: https://readthedocs.org/projects/emcee/badge/?version=latest 17 | :target: http://emcee.readthedocs.io/en/latest/?badge=latest 18 | 19 | 20 | emcee is a stable, well tested Python implementation of the affine-invariant 21 | ensemble sampler for Markov chain Monte Carlo (MCMC) 22 | proposed by 23 | `Goodman & Weare (2010) `_. 24 | The code is open source and has 25 | already been used in several published projects in the Astrophysics 26 | literature. 27 | 28 | Documentation 29 | ------------- 30 | 31 | Read the docs at `emcee.readthedocs.io `_. 32 | 33 | Attribution 34 | ----------- 35 | 36 | Please cite `Foreman-Mackey, Hogg, Lang & Goodman (2012) 37 | `_ if you find this code useful in your 38 | research. The BibTeX entry for the paper is:: 39 | 40 | @article{emcee, 41 | author = {{Foreman-Mackey}, D. and {Hogg}, D.~W. and {Lang}, D. and {Goodman}, J.}, 42 | title = {emcee: The MCMC Hammer}, 43 | journal = {PASP}, 44 | year = 2013, 45 | volume = 125, 46 | pages = {306-312}, 47 | eprint = {1202.3665}, 48 | doi = {10.1086/670067} 49 | } 50 | 51 | License 52 | ------- 53 | 54 | Copyright 2010-2021 Dan Foreman-Mackey and contributors. 55 | 56 | emcee is free software made available under the MIT License. For details see 57 | the LICENSE file. 58 | -------------------------------------------------------------------------------- /docs/user/upgrade.rst: -------------------------------------------------------------------------------- 1 | .. _upgrade: 2 | 3 | Upgrading From Pre-3.0 Versions 4 | =============================== 5 | 6 | The version 3 release of emcee is the biggest update in years. 7 | That being said, we've made every attempt to maintain backwards compatibility 8 | while still offering new features. 9 | The main new features include: 10 | 11 | 1. A :ref:`moves-user` interface that allows the use of a variety of ensemble 12 | proposals, 13 | 14 | 2. A more self consistent and user-friendly :ref:`blobs` interface, 15 | 16 | 3. A :ref:`backends` interface that simplifies the process of serializing the 17 | sampling results, and 18 | 19 | 4. The long requested progress bar (implemented using `tqdm 20 | `_) so that users can watch the grass grow 21 | while the sampler does its thing (this is as simple as installing tqdm and 22 | setting ``progress=True`` in :class:`EnsembleSampler`). 23 | 24 | To improve the stability and supportability of emcee, we also removed some 25 | features. 26 | The main removals are as follows: 27 | 28 | 1. The ``threads`` keyword argument has been removed in favor of the ``pool`` 29 | interface (see the :ref:`parallel` tutorial for more information). 30 | The old interface had issues with memory consumption and hanging processes 31 | when the sampler object wasn't explicitly deleted. 32 | The ``pool`` interface has been supported since the first release of emcee 33 | and existing code should be easy to update following the :ref:`parallel` 34 | tutorial. 35 | 36 | 2. The ``MPIPool`` has been removed and forked to the `schwimmbad 37 | `_ project. 38 | There was a longstanding issue with memory leaks and random crashes of the 39 | emcee implementation of the ``MPIPool`` that have been fixed in schwimmbad. 40 | schwimmbad also supports several other ``pool`` interfaces that can be used 41 | for parallel sampling. 42 | See the :ref:`parallel` tutorial for more details. 43 | 44 | 3. The ``PTSampler`` has been removed and forked to the `ptemcee 45 | `_ project. 46 | The existing implementation had been gathering dust and there aren't enough 47 | resources to maintain the sampler within the emcee project. 48 | -------------------------------------------------------------------------------- /src/emcee/tests/unit/test_state.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from emcee import EnsembleSampler 7 | from emcee.state import State 8 | 9 | 10 | def check_rstate(a, b): 11 | assert all(np.allclose(a_, b_) for a_, b_ in zip(a[1:], b[1:])) 12 | 13 | 14 | def test_back_compat(seed=1234): 15 | np.random.seed(seed) 16 | coords = np.random.randn(16, 3) 17 | log_prob = np.random.randn(len(coords)) 18 | blobs = np.random.randn(len(coords)) 19 | rstate = np.random.get_state() 20 | 21 | state = State(coords, log_prob, blobs, rstate) 22 | c, l, r, b = state 23 | assert np.allclose(coords, c) 24 | assert np.allclose(log_prob, l) 25 | assert np.allclose(blobs, b) 26 | check_rstate(rstate, r) 27 | 28 | state = State(coords, log_prob, None, rstate) 29 | c, l, r = state 30 | assert np.allclose(coords, c) 31 | assert np.allclose(log_prob, l) 32 | check_rstate(rstate, r) 33 | 34 | 35 | def test_overwrite(seed=1234): 36 | np.random.seed(seed) 37 | 38 | def ll(x): 39 | return -0.5 * np.sum(x**2) 40 | 41 | nwalkers = 64 42 | p0 = np.random.normal(size=(nwalkers, 1)) 43 | init = np.copy(p0) 44 | 45 | sampler = EnsembleSampler(nwalkers, 1, ll) 46 | sampler.run_mcmc(p0, 10) 47 | assert np.allclose(init, p0) 48 | 49 | 50 | def test_indexing(seed=1234): 51 | np.random.seed(seed) 52 | coords = np.random.randn(16, 3) 53 | log_prob = np.random.randn(len(coords)) 54 | blobs = np.random.randn(len(coords)) 55 | rstate = np.random.get_state() 56 | 57 | state = State(coords, log_prob, blobs, rstate) 58 | np.testing.assert_allclose(state[0], state.coords) 59 | np.testing.assert_allclose(state[1], state.log_prob) 60 | check_rstate(state[2], state.random_state) 61 | np.testing.assert_allclose(state[3], state.blobs) 62 | np.testing.assert_allclose(state[-1], state.blobs) 63 | with pytest.raises(IndexError): 64 | state[4] 65 | 66 | state = State(coords, log_prob, random_state=rstate) 67 | np.testing.assert_allclose(state[0], state.coords) 68 | np.testing.assert_allclose(state[1], state.log_prob) 69 | check_rstate(state[2], state.random_state) 70 | check_rstate(state[-1], state.random_state) 71 | with pytest.raises(IndexError): 72 | state[3] 73 | -------------------------------------------------------------------------------- /src/emcee/tests/integration/test_gaussian.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from itertools import product 4 | 5 | import numpy as np 6 | import pytest 7 | 8 | from emcee import moves 9 | 10 | from .test_proposal import _test_normal, _test_uniform 11 | 12 | __all__ = [ 13 | "test_normal_gaussian", 14 | "test_uniform_gaussian", 15 | "test_normal_gaussian_nd", 16 | ] 17 | 18 | 19 | @pytest.mark.parametrize("mode,factor", product(["vector"], [None, 2.0, 5.0])) 20 | def test_normal_gaussian(mode, factor, **kwargs): 21 | _test_normal(moves.GaussianMove(0.5, mode=mode, factor=factor), **kwargs) 22 | 23 | 24 | @pytest.mark.parametrize( 25 | "mode,factor", product(["vector", "random", "sequential"], [None, 2.0]) 26 | ) 27 | def test_normal_gaussian_nd(mode, factor, **kwargs): 28 | ndim = 3 29 | kwargs["nsteps"] = 8000 30 | 31 | # Isotropic. 32 | _test_normal( 33 | moves.GaussianMove(0.5, factor=factor, mode=mode), ndim=ndim, **kwargs 34 | ) 35 | 36 | # Axis-aligned. 37 | _test_normal( 38 | moves.GaussianMove(0.5 * np.ones(ndim), factor=factor, mode=mode), 39 | ndim=ndim, 40 | **kwargs, 41 | ) 42 | with pytest.raises(ValueError): 43 | _test_normal( 44 | moves.GaussianMove( 45 | 0.5 * np.ones(ndim - 1), factor=factor, mode=mode 46 | ), 47 | ndim=ndim, 48 | **kwargs, 49 | ) 50 | 51 | # Full matrix. 52 | if mode == "vector": 53 | _test_normal( 54 | moves.GaussianMove( 55 | np.diag(0.5 * np.ones(ndim)), factor=factor, mode=mode 56 | ), 57 | ndim=ndim, 58 | **kwargs, 59 | ) 60 | with pytest.raises(ValueError): 61 | _test_normal( 62 | moves.GaussianMove(np.diag(0.5 * np.ones(ndim - 1))), 63 | ndim=ndim, 64 | **kwargs, 65 | ) 66 | else: 67 | with pytest.raises(ValueError): 68 | _test_normal( 69 | moves.GaussianMove( 70 | np.diag(0.5 * np.ones(ndim)), factor=factor, mode=mode 71 | ), 72 | ndim=ndim, 73 | **kwargs, 74 | ) 75 | 76 | 77 | @pytest.mark.parametrize("mode,factor", product(["vector"], [None, 2.0, 5.0])) 78 | def test_uniform_gaussian(mode, factor, **kwargs): 79 | _test_uniform(moves.GaussianMove(0.5, factor=factor, mode=mode), **kwargs) 80 | -------------------------------------------------------------------------------- /docs/tutorials/tutorial_rst.tpl: -------------------------------------------------------------------------------- 1 | {%- extends 'display_priority.tpl' -%} 2 | 3 | {% block header %} 4 | .. module:: emcee 5 | 6 | **Note:** This tutorial was generated from an IPython notebook that can be 7 | downloaded `here <../../_static/notebooks/{{ resources.metadata.name }}.ipynb>`_. 8 | 9 | .. _{{resources.metadata.name}}: 10 | {% endblock %} 11 | 12 | {% block in_prompt %} 13 | {% endblock in_prompt %} 14 | 15 | {% block output_prompt %} 16 | {% endblock output_prompt %} 17 | 18 | {% block input %} 19 | {%- if cell.source.strip() and not cell.source.startswith("%") -%} 20 | .. code:: python 21 | 22 | {{ cell.source | indent}} 23 | {% endif -%} 24 | {% endblock input %} 25 | 26 | {% block error %} 27 | :: 28 | 29 | {{ super() }} 30 | {% endblock error %} 31 | 32 | {% block traceback_line %} 33 | {{ line | indent | strip_ansi }} 34 | {% endblock traceback_line %} 35 | 36 | {% block execute_result %} 37 | {% block data_priority scoped %} 38 | {{ super() }} 39 | {% endblock %} 40 | {% endblock execute_result %} 41 | 42 | {% block stream %} 43 | .. parsed-literal:: 44 | 45 | {{ output.text | indent }} 46 | {% endblock stream %} 47 | 48 | {% block data_svg %} 49 | .. image:: {{ output.metadata.filenames['image/svg+xml'] | urlencode }} 50 | {% endblock data_svg %} 51 | 52 | {% block data_png %} 53 | .. image:: {{ output.metadata.filenames['image/png'] | urlencode }} 54 | {% endblock data_png %} 55 | 56 | {% block data_jpg %} 57 | .. image:: {{ output.metadata.filenames['image/jpeg'] | urlencode }} 58 | {% endblock data_jpg %} 59 | 60 | {% block data_latex %} 61 | .. math:: 62 | 63 | {{ output.data['text/latex'] | strip_dollars | indent }} 64 | {% endblock data_latex %} 65 | 66 | {% block data_text scoped %} 67 | .. parsed-literal:: 68 | 69 | {{ output.data['text/plain'] | indent }} 70 | {% endblock data_text %} 71 | 72 | {% block data_html scoped %} 73 | .. raw:: html 74 | 75 | {{ output.data['text/html'] | indent }} 76 | {% endblock data_html %} 77 | 78 | {% block markdowncell scoped %} 79 | {{ cell.source | markdown2rst }} 80 | {% endblock markdowncell %} 81 | 82 | {%- block rawcell scoped -%} 83 | {%- if cell.metadata.get('raw_mimetype', '').lower() in resources.get('raw_mimetypes', ['']) %} 84 | {{cell.source}} 85 | {% endif -%} 86 | {%- endblock rawcell -%} 87 | 88 | {% block headingcell scoped %} 89 | {{ ("#" * cell.level + cell.source) | replace('\n', ' ') | markdown2rst }} 90 | {% endblock headingcell %} 91 | 92 | {% block unknowncell scoped %} 93 | unknown type {{cell.type}} 94 | {% endblock unknowncell %} 95 | -------------------------------------------------------------------------------- /document/plots/oned.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | from multiprocessing import Pool 5 | 6 | import h5py 7 | import matplotlib.pyplot as pl 8 | import numpy as np 9 | 10 | import emcee 11 | 12 | sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", ".."))) 13 | 14 | # import acor 15 | 16 | 17 | def lnprobfn(p, icov): 18 | return -0.5 * np.dot(p, np.dot(icov, p)) 19 | 20 | 21 | def random_cov(ndim, dof=1): 22 | v = np.random.randn(ndim * (ndim + dof)).reshape((ndim + dof, ndim)) 23 | return sum([np.outer(v[i], v[i]) for i in range(ndim + dof)]) / ( 24 | ndim + dof 25 | ) 26 | 27 | 28 | _rngs = {} 29 | 30 | 31 | def _worker(args): 32 | i, outfn, nsteps = args 33 | 34 | pid = os.getpid() 35 | _random = _rngs.get( 36 | pid, np.random.RandomState(int(int(pid) + time.time())) 37 | ) 38 | _rngs[pid] = _random 39 | 40 | ndim = int(np.ceil(2 ** (7 * _random.rand()))) 41 | nwalkers = 2 * ndim + 2 42 | # nwalkers += nwalkers % 2 43 | print(ndim, nwalkers) 44 | 45 | cov = random_cov(ndim) 46 | icov = np.linalg.inv(cov) 47 | 48 | ens_samp = emcee.EnsembleSampler(nwalkers, ndim, lnprobfn, args=[icov]) 49 | ens_samp.random_state = _random.get_state() 50 | pos, lnprob, state = ens_samp.run_mcmc( 51 | np.random.randn(nwalkers * ndim).reshape([nwalkers, ndim]), nsteps 52 | ) 53 | 54 | proposal = np.diag(cov.diagonal()) 55 | mh_samp = emcee.MHSampler(proposal, ndim, lnprobfn, args=[icov]) 56 | mh_samp.random_state = state 57 | mh_samp.run_mcmc(np.random.randn(ndim), nsteps) 58 | 59 | f = h5py.File(outfn) 60 | f["data"][i, :] = np.array( 61 | [ndim, np.mean(ens_samp.acor), np.mean(mh_samp.acor)] 62 | ) 63 | f.close() 64 | 65 | 66 | def oned(): 67 | nsteps = 10000 68 | niter = 10 69 | nthreads = 2 70 | 71 | outfn = os.path.join(os.path.split(__file__)[0], "gauss_scaling.h5") 72 | print(outfn) 73 | f = h5py.File(outfn, "w") 74 | f.create_dataset("data", (niter, 3), "f") 75 | f.close() 76 | 77 | pool = Pool(nthreads) 78 | pool.map(_worker, [(i, outfn, nsteps) for i in range(niter)]) 79 | 80 | f = h5py.File(outfn) 81 | data = f["data"][...] 82 | f.close() 83 | 84 | pl.clf() 85 | pl.plot(data[:, 0], data[:, 1], "ks", alpha=0.5) 86 | pl.plot(data[:, 0], data[:, 2], ".k", alpha=0.5) 87 | 88 | pl.savefig(os.path.join(os.path.split(__file__)[0], "gauss_scaling.png")) 89 | 90 | 91 | if __name__ == "__main__": 92 | oned() 93 | -------------------------------------------------------------------------------- /docs/user/install.rst: -------------------------------------------------------------------------------- 1 | .. _install: 2 | 3 | Installation 4 | ============ 5 | 6 | Since emcee is a pure Python module, it should be pretty easy to install. 7 | All you'll need `numpy `_. 8 | 9 | .. note:: For pre-release versions of emcee, you need to follow the 10 | instructions in :ref:`source`. 11 | 12 | 13 | Package managers 14 | ---------------- 15 | 16 | The recommended way to install the stable version of emcee is using 17 | `pip `_ 18 | 19 | .. code-block:: bash 20 | 21 | python -m pip install -U pip 22 | pip install -U setuptools setuptools_scm pep517 23 | pip install -U emcee 24 | 25 | or `conda `_ 26 | 27 | .. code-block:: bash 28 | 29 | conda update conda 30 | conda install -c conda-forge emcee 31 | 32 | Distribution packages 33 | --------------------- 34 | 35 | Some distributions contain `emcee` packages that can be installed with the 36 | system package manager as listed in the `Repology packaging status 37 | `_. Note that the packages 38 | in some of these distributions may be out-of-date. You can always get the 39 | current stable version via `pip` or `conda`, or the latest development version 40 | as described in :ref:`source` below. 41 | 42 | .. image:: https://repology.org/badge/vertical-allrepos/python:emcee.svg?header=emcee%20packaging%20status 43 | :target: https://repology.org/project/python:emcee/versions 44 | 45 | .. _source: 46 | 47 | From source 48 | ----------- 49 | 50 | emcee is developed on `GitHub `_ so if you feel 51 | like hacking or if you like all the most recent shininess, you can clone the 52 | source repository and install from there 53 | 54 | .. code-block:: bash 55 | 56 | python -m pip install -U pip 57 | python -m pip install -U setuptools setuptools_scm pep517 58 | git clone https://github.com/dfm/emcee.git 59 | cd emcee 60 | python -m pip install -e . 61 | 62 | 63 | Test the installation 64 | --------------------- 65 | 66 | To make sure that the installation went alright, you can execute some unit and 67 | integration tests. 68 | To do this, you'll need the source (see :ref:`source` above) and 69 | `py.test `_. 70 | You'll execute the tests by running the following command in the root 71 | directory of the source code: 72 | 73 | .. code-block:: bash 74 | 75 | python -m pip install -U pytest h5py 76 | python -m pytest -v src/emcee/tests 77 | 78 | This might take a few minutes but you shouldn't get any errors if all went 79 | as planned. 80 | -------------------------------------------------------------------------------- /src/emcee/moves/mh.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from ..state import State 6 | from .move import Move 7 | 8 | __all__ = ["MHMove"] 9 | 10 | 11 | class MHMove(Move): 12 | r"""A general Metropolis-Hastings proposal 13 | 14 | Concrete implementations can be made by providing a ``proposal_function`` 15 | argument that implements the proposal as described below. 16 | For standard Gaussian Metropolis moves, :class:`moves.GaussianMove` can be 17 | used. 18 | 19 | Args: 20 | proposal_function: The proposal function. It should take 2 arguments: a 21 | numpy-compatible random number generator and a ``(K, ndim)`` list 22 | of coordinate vectors. This function should return the proposed 23 | position and the log-ratio of the proposal probabilities 24 | (:math:`\ln q(x;\,x^\prime) - \ln q(x^\prime;\,x)` where 25 | :math:`x^\prime` is the proposed coordinate). 26 | ndim (Optional[int]): If this proposal is only valid for a specific 27 | dimension of parameter space, set that here. 28 | 29 | """ 30 | 31 | def __init__(self, proposal_function, ndim=None): 32 | self.ndim = ndim 33 | self.get_proposal = proposal_function 34 | 35 | def propose(self, model, state): 36 | """Use the move to generate a proposal and compute the acceptance 37 | 38 | Args: 39 | coords: The initial coordinates of the walkers. 40 | log_probs: The initial log probabilities of the walkers. 41 | log_prob_fn: A function that computes the log probabilities for a 42 | subset of walkers. 43 | random: A numpy-compatible random number state. 44 | 45 | """ 46 | # Check to make sure that the dimensions match. 47 | nwalkers, ndim = state.coords.shape 48 | if self.ndim is not None and self.ndim != ndim: 49 | raise ValueError("Dimension mismatch in proposal") 50 | 51 | # Get the move-specific proposal. 52 | q, factors = self.get_proposal(state.coords, model.random) 53 | 54 | # Compute the lnprobs of the proposed position. 55 | new_log_probs, new_blobs = model.compute_log_prob_fn(q) 56 | 57 | # Loop over the walkers and update them accordingly. 58 | lnpdiff = new_log_probs - state.log_prob + factors 59 | accepted = np.log(model.random.rand(nwalkers)) < lnpdiff 60 | 61 | # Update the parameters 62 | new_state = State(q, log_prob=new_log_probs, blobs=new_blobs) 63 | state = self.update(state, new_state, accepted) 64 | 65 | return state, accepted 66 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Inspired by: 4 | # https://hynek.me/articles/sharing-your-labor-of-love-pypi-quick-and-dirty/ 5 | 6 | import codecs 7 | import os 8 | import re 9 | 10 | from setuptools import find_packages, setup 11 | 12 | # PROJECT SPECIFIC 13 | 14 | NAME = "emcee" 15 | PACKAGES = find_packages(where="src") 16 | META_PATH = os.path.join("src", "emcee", "__init__.py") 17 | CLASSIFIERS = [ 18 | "Development Status :: 5 - Production/Stable", 19 | "Intended Audience :: Developers", 20 | "Intended Audience :: Science/Research", 21 | "License :: OSI Approved :: MIT License", 22 | "Operating System :: OS Independent", 23 | "Programming Language :: Python", 24 | ] 25 | INSTALL_REQUIRES = ["numpy"] 26 | SETUP_REQUIRES = [ 27 | "setuptools>=40.6.0", 28 | "setuptools_scm", 29 | "wheel", 30 | ] 31 | EXTRA_REQUIRE = { 32 | "extras": ["h5py", "scipy"], 33 | "tests": ["pytest", "pytest-cov", "coverage[toml]"], 34 | } 35 | 36 | # END PROJECT SPECIFIC 37 | 38 | 39 | HERE = os.path.dirname(os.path.realpath(__file__)) 40 | 41 | 42 | def read(*parts): 43 | with codecs.open(os.path.join(HERE, *parts), "rb", "utf-8") as f: 44 | return f.read() 45 | 46 | 47 | def find_meta(meta, meta_file=read(META_PATH)): 48 | meta_match = re.search( 49 | r"^__{meta}__ = ['\"]([^'\"]*)['\"]".format(meta=meta), meta_file, re.M 50 | ) 51 | if meta_match: 52 | return meta_match.group(1) 53 | raise RuntimeError("Unable to find __{meta}__ string.".format(meta=meta)) 54 | 55 | 56 | if __name__ == "__main__": 57 | setup( 58 | name=NAME, 59 | use_scm_version={ 60 | "write_to": os.path.join( 61 | "src", NAME, "{0}_version.py".format(NAME) 62 | ), 63 | "write_to_template": '__version__ = "{version}"\n', 64 | }, 65 | author=find_meta("author"), 66 | author_email=find_meta("email"), 67 | maintainer=find_meta("author"), 68 | maintainer_email=find_meta("email"), 69 | url=find_meta("uri"), 70 | project_urls={ 71 | "Source": "https://github.com/dfm/emcee", 72 | }, 73 | license=find_meta("license"), 74 | description=find_meta("description"), 75 | long_description=read("README.rst"), 76 | long_description_content_type="text/x-rst", 77 | packages=PACKAGES, 78 | package_dir={"": "src"}, 79 | include_package_data=True, 80 | install_requires=INSTALL_REQUIRES, 81 | setup_requires=SETUP_REQUIRES, 82 | extras_require=EXTRA_REQUIRE, 83 | classifiers=CLASSIFIERS, 84 | zip_safe=False, 85 | options={"bdist_wheel": {"universal": "1"}}, 86 | ) 87 | -------------------------------------------------------------------------------- /src/emcee/state.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from copy import deepcopy 4 | 5 | import numpy as np 6 | 7 | __all__ = ["State"] 8 | 9 | 10 | class State(object): 11 | """The state of the ensemble during an MCMC run 12 | 13 | For backwards compatibility, this will unpack into ``coords, log_prob, 14 | (blobs), random_state`` when iterated over (where ``blobs`` will only be 15 | included if it exists and is not ``None``). 16 | 17 | Args: 18 | coords (ndarray[nwalkers, ndim]): The current positions of the walkers 19 | in the parameter space. 20 | log_prob (ndarray[nwalkers, ndim], Optional): Log posterior 21 | probabilities for the walkers at positions given by ``coords``. 22 | blobs (Optional): The metadata “blobs” associated with the current 23 | position. The value is only returned if lnpostfn returns blobs too. 24 | random_state (Optional): The current state of the random number 25 | generator. 26 | """ 27 | 28 | __slots__ = "coords", "log_prob", "blobs", "random_state" 29 | 30 | def __init__( 31 | self, coords, log_prob=None, blobs=None, random_state=None, copy=False 32 | ): 33 | dc = deepcopy if copy else lambda x: x 34 | 35 | if hasattr(coords, "coords"): 36 | self.coords = dc(coords.coords) 37 | self.log_prob = dc(coords.log_prob) 38 | self.blobs = dc(coords.blobs) 39 | self.random_state = dc(coords.random_state) 40 | return 41 | 42 | self.coords = dc(np.atleast_2d(coords)) 43 | self.log_prob = dc(log_prob) 44 | self.blobs = dc(blobs) 45 | self.random_state = dc(random_state) 46 | 47 | def __len__(self): 48 | if self.blobs is None: 49 | return 3 50 | return 4 51 | 52 | def __repr__(self): 53 | return "State({0}, log_prob={1}, blobs={2}, random_state={3})".format( 54 | self.coords, self.log_prob, self.blobs, self.random_state 55 | ) 56 | 57 | def __iter__(self): 58 | if self.blobs is None: 59 | return iter((self.coords, self.log_prob, self.random_state)) 60 | return iter( 61 | (self.coords, self.log_prob, self.random_state, self.blobs) 62 | ) 63 | 64 | def __getitem__(self, index): 65 | if index < 0: 66 | return self[len(self) + index] 67 | if index == 0: 68 | return self.coords 69 | elif index == 1: 70 | return self.log_prob 71 | elif index == 2: 72 | return self.random_state 73 | elif index == 3 and self.blobs is not None: 74 | return self.blobs 75 | raise IndexError("Invalid index '{0}'".format(index)) 76 | -------------------------------------------------------------------------------- /src/emcee/moves/de.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from functools import lru_cache 3 | 4 | import numpy as np 5 | 6 | from .red_blue import RedBlueMove 7 | 8 | __all__ = ["DEMove"] 9 | 10 | 11 | class DEMove(RedBlueMove): 12 | r"""A proposal using differential evolution. 13 | 14 | This `Differential evolution proposal 15 | `_ is 16 | implemented following `Nelson et al. (2013) 17 | `_. 18 | 19 | Args: 20 | sigma (float): The standard deviation of the Gaussian used to stretch 21 | the proposal vector. 22 | gamma0 (Optional[float]): The mean stretch factor for the proposal 23 | vector. By default, it is :math:`2.38 / \sqrt{2\,\mathrm{ndim}}` 24 | as recommended by the two references. 25 | 26 | """ 27 | 28 | def __init__(self, sigma=1.0e-5, gamma0=None, **kwargs): 29 | self.sigma = sigma 30 | self.gamma0 = gamma0 31 | super().__init__(**kwargs) 32 | 33 | def setup(self, coords): 34 | self.g0 = self.gamma0 35 | if self.g0 is None: 36 | # Pure MAGIC: 37 | ndim = coords.shape[1] 38 | self.g0 = 2.38 / np.sqrt(2 * ndim) 39 | 40 | def get_proposal(self, s, c, random): 41 | c = np.concatenate(c, axis=0) 42 | ns, ndim = s.shape 43 | nc = c.shape[0] 44 | 45 | # Get the pair indices 46 | pairs = _get_nondiagonal_pairs(nc) 47 | 48 | # Sample from the pairs 49 | indices = random.choice(pairs.shape[0], size=ns, replace=True) 50 | pairs = pairs[indices] 51 | 52 | # Compute diff vectors 53 | diffs = np.diff(c[pairs], axis=1).squeeze(axis=1) # (ns, ndim) 54 | 55 | # Sample a gamma value for each walker following Nelson et al. (2013) 56 | gamma = self.g0 * (1 + self.sigma * random.randn(ns, 1)) # (ns, 1) 57 | 58 | # In this way, sigma is the standard deviation of the distribution of gamma, 59 | # instead of the standard deviation of the distribution of the proposal as proposed by Ter Braak (2006). 60 | # Otherwise, sigma should be tuned for each dimension, which confronts the idea of affine-invariance. 61 | 62 | q = s + gamma * diffs 63 | 64 | return q, np.zeros(ns, dtype=np.float64) 65 | 66 | 67 | @lru_cache(maxsize=1) 68 | def _get_nondiagonal_pairs(n: int) -> np.ndarray: 69 | """Get the indices of a square matrix with size n, excluding the diagonal.""" 70 | rows, cols = np.tril_indices(n, -1) # -1 to exclude diagonal 71 | 72 | # Combine rows-cols and cols-rows pairs 73 | pairs = np.column_stack( 74 | [np.concatenate([rows, cols]), np.concatenate([cols, rows])] 75 | ) 76 | 77 | return pairs 78 | -------------------------------------------------------------------------------- /docs/user/moves.rst: -------------------------------------------------------------------------------- 1 | .. _moves-user: 2 | 3 | Moves 4 | ===== 5 | 6 | emcee was originally built on the "stretch move" ensemble method from `Goodman 7 | & Weare (2010) `_, but 8 | starting with version 3, emcee nows allows proposals generated from a mixture 9 | of "moves". 10 | This can be used to get a more efficient sampler for models where the stretch 11 | move is not well suited, such as high dimensional or multi-modal probability 12 | surfaces. 13 | 14 | A "move" is an algorithm for updating the coordinates of walkers in an 15 | ensemble sampler based on the current set of coordinates in a manner that 16 | satisfies detailed balance. 17 | In most cases, the update for each walker is based on the coordinates in some 18 | other set of walkers, the complementary ensemble. 19 | 20 | These moves have been designed to update the ensemble in parallel following 21 | the prescription from `Foreman-Mackey et al. (2013) 22 | `_. 23 | This means that computationally expensive models can take advantage of 24 | multiple CPUs to accelerate sampling (see the :ref:`parallel` tutorial for 25 | more information). 26 | 27 | The moves are selected using the ``moves`` keyword for the 28 | :class:`EnsembleSampler` and the mixture can optionally be a weighted mixture 29 | of moves. 30 | During sampling, at each step, a move is randomly selected from the mixture 31 | and used as the proposal. 32 | 33 | The default move is still the :class:`moves.StretchMove`, but the others are 34 | described below. 35 | Many standard ensemble moves are available with parallelization provided by 36 | the :class:`moves.RedBlueMove` abstract base class that implements the 37 | parallelization method described by `Foreman-Mackey et al. (2013) 38 | `_. 39 | In addition to these moves, there is also a framework for building 40 | Metropolis–Hastings proposals that update the walkers using independent 41 | proposals. 42 | :class:`moves.MHMove` is the base class for this type of move and a concrete 43 | implementation of a Gaussian Metropolis proposal is found in 44 | :class:`moves.GaussianMove`. 45 | 46 | .. note:: The :ref:`moves` tutorial shows a concrete example of how to use 47 | this interface. 48 | 49 | Ensemble moves 50 | -------------- 51 | 52 | .. autoclass:: emcee.moves.RedBlueMove 53 | :members: 54 | 55 | .. autoclass:: emcee.moves.StretchMove 56 | :members: 57 | 58 | .. autoclass:: emcee.moves.WalkMove 59 | :members: 60 | 61 | .. autoclass:: emcee.moves.KDEMove 62 | :members: 63 | 64 | .. autoclass:: emcee.moves.DEMove 65 | :members: 66 | 67 | .. autoclass:: emcee.moves.DESnookerMove 68 | :members: 69 | 70 | Metropolis–Hastings moves 71 | ------------------------- 72 | 73 | .. autoclass:: emcee.moves.MHMove 74 | :members: 75 | 76 | .. autoclass:: emcee.moves.GaussianMove 77 | :members: 78 | -------------------------------------------------------------------------------- /src/emcee/tests/integration/test_proposal.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | import emcee 7 | 8 | try: 9 | from scipy import stats 10 | except ImportError: 11 | stats = None 12 | 13 | 14 | __all__ = ["_test_normal", "_test_uniform"] 15 | 16 | 17 | def normal_log_prob_blobs(params): 18 | return -0.5 * np.sum(params**2), params 19 | 20 | 21 | def normal_log_prob(params): 22 | return -0.5 * np.sum(params**2) 23 | 24 | 25 | def uniform_log_prob(params): 26 | if np.any(params > 1) or np.any(params < 0): 27 | return -np.inf 28 | return 0.0 29 | 30 | 31 | def _test_normal( 32 | proposal, 33 | ndim=1, 34 | nwalkers=32, 35 | nsteps=2000, 36 | seed=1234, 37 | check_acceptance=True, 38 | pool=None, 39 | blobs=False, 40 | ): 41 | # Set up the random number generator. 42 | np.random.seed(seed) 43 | 44 | # Initialize the ensemble and proposal. 45 | coords = np.random.randn(nwalkers, ndim) 46 | 47 | if blobs: 48 | lp = normal_log_prob_blobs 49 | else: 50 | lp = normal_log_prob 51 | 52 | sampler = emcee.EnsembleSampler( 53 | nwalkers, ndim, lp, moves=proposal, pool=pool 54 | ) 55 | if hasattr(proposal, "ntune") and proposal.ntune > 0: 56 | coords = sampler.run_mcmc(coords, proposal.ntune, tune=True) 57 | sampler.reset() 58 | sampler.run_mcmc(coords, nsteps) 59 | 60 | # Check the acceptance fraction. 61 | if check_acceptance: 62 | acc = sampler.acceptance_fraction 63 | assert np.all( 64 | (acc < 0.9) * (acc > 0.1) 65 | ), "Invalid acceptance fraction\n{0}".format(acc) 66 | 67 | # Check the resulting chain using a K-S test and compare to the mean and 68 | # standard deviation. 69 | samps = sampler.get_chain(flat=True) 70 | mu, sig = np.mean(samps, axis=0), np.std(samps, axis=0) 71 | assert np.all(np.abs(mu) < 0.08), "Incorrect mean" 72 | assert np.all(np.abs(sig - 1) < 0.05), "Incorrect standard deviation" 73 | 74 | if ndim == 1 and stats is not None: 75 | ks, _ = stats.kstest(samps[:, 0], "norm") 76 | assert ks < 0.05, "The K-S test failed" 77 | 78 | 79 | def _test_uniform(proposal, nwalkers=32, nsteps=2000, seed=1234): 80 | # Set up the random number generator. 81 | np.random.seed(seed) 82 | 83 | # Initialize the ensemble and proposal. 84 | coords = np.random.rand(nwalkers, 1) 85 | 86 | sampler = emcee.EnsembleSampler( 87 | nwalkers, 1, normal_log_prob, moves=proposal 88 | ) 89 | sampler.run_mcmc(coords, nsteps) 90 | 91 | # Check the acceptance fraction. 92 | acc = sampler.acceptance_fraction 93 | assert np.all( 94 | (acc < 0.9) * (acc > 0.1) 95 | ), "Invalid acceptance fraction\n{0}".format(acc) 96 | 97 | if stats is not None: 98 | # Check that the resulting chain "fails" the K-S test. 99 | samps = sampler.get_chain(flat=True) 100 | np.random.shuffle(samps) 101 | ks, _ = stats.kstest(samps[::100, 0], "uniform") 102 | assert ks > 0.1, "The K-S test failed" 103 | -------------------------------------------------------------------------------- /src/emcee/tests/unit/test_blobs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import warnings 4 | 5 | import numpy as np 6 | import pytest 7 | 8 | from emcee import EnsembleSampler, backends 9 | 10 | __all__ = ["test_blob_shape"] 11 | 12 | 13 | class BlobLogProb(object): 14 | def __init__(self, blob_function): 15 | self.blob_function = blob_function 16 | 17 | def __call__(self, params): 18 | return -0.5 * np.sum(params**2), self.blob_function(params) 19 | 20 | 21 | @pytest.mark.parametrize("backend", backends.get_test_backends()) 22 | @pytest.mark.parametrize( 23 | "blob_spec", 24 | [ 25 | (True, False, 5, lambda x: np.random.randn(5)), 26 | (True, False, (5, 3), lambda x: np.random.randn(5, 3)), 27 | (True, False, (5, 3), lambda x: np.random.randn(1, 5, 1, 3, 1)), 28 | (True, False, 0, lambda x: np.random.randn()), 29 | (False, True, 2, lambda x: (1.0, np.random.randn(3))), 30 | (False, False, 0, lambda x: "face"), 31 | (False, False, 0, lambda x: object()), 32 | (False, False, 2, lambda x: ("face", "surface")), 33 | (False, True, 2, lambda x: (np.random.randn(5), "face")), 34 | ], 35 | ) 36 | def test_blob_shape(backend, blob_spec): 37 | # HDF backends don't support the object type 38 | hdf_able, ragged, blob_shape, func = blob_spec 39 | if backend in (backends.TempHDFBackend,) and not hdf_able: 40 | return 41 | 42 | with backend() as be: 43 | np.random.seed(42) 44 | 45 | model = BlobLogProb(func) 46 | coords = np.random.randn(32, 3) 47 | nwalkers, ndim = coords.shape 48 | 49 | sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) 50 | nsteps = 10 51 | 52 | if ragged: 53 | with warnings.catch_warnings(): 54 | warnings.simplefilter("ignore", DeprecationWarning) 55 | sampler.run_mcmc(coords, nsteps) 56 | else: 57 | sampler.run_mcmc(coords, nsteps) 58 | 59 | shape = [nsteps, nwalkers] 60 | if isinstance(blob_shape, tuple): 61 | shape += blob_shape 62 | elif blob_shape > 0: 63 | shape += [blob_shape] 64 | 65 | assert sampler.get_blobs().shape == tuple(shape) 66 | if not hdf_able: 67 | assert sampler.get_blobs().dtype == np.dtype("object") 68 | 69 | 70 | class VariableLogProb: 71 | def __init__(self): 72 | self.i = 3 73 | 74 | def __call__(self, *args): 75 | return 0, np.zeros(self.i) 76 | 77 | 78 | @pytest.mark.parametrize("backend", backends.get_test_backends()) 79 | def test_blob_mismatch(backend): 80 | with backend() as be: 81 | np.random.seed(42) 82 | 83 | model = VariableLogProb() 84 | coords = np.random.randn(32, 3) 85 | nwalkers, ndim = coords.shape 86 | 87 | sampler = EnsembleSampler(nwalkers, ndim, model, backend=be) 88 | 89 | model.i += 1 90 | # We don't save blobs from the initial points 91 | # so blob shapes are taken from the first round of moves 92 | sampler.run_mcmc(coords, 1) 93 | 94 | model.i += 1 95 | with pytest.raises(ValueError): 96 | sampler.run_mcmc(coords, 1) 97 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at foreman.mackey@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | tags: 8 | - "*" 9 | paths-ignore: 10 | - "joss/**" 11 | - "docs/**" 12 | pull_request: 13 | 14 | jobs: 15 | tests: 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | matrix: 19 | python-version: ["3.9", "3.10", "3.11", "3.12"] 20 | os: ["ubuntu-latest"] 21 | include: 22 | - python-version: "3.9" 23 | os: "macos-latest" 24 | - python-version: "3.9" 25 | os: "windows-latest" 26 | 27 | steps: 28 | - name: Checkout 29 | uses: actions/checkout@v4 30 | with: 31 | fetch-depth: 0 32 | - name: Setup Python 33 | uses: actions/setup-python@v5 34 | with: 35 | python-version: ${{ matrix.python-version }} 36 | - name: Install dependencies 37 | run: | 38 | python -m pip install -U pip 39 | python -m pip install -U coveralls coverage[toml] tox tox-gh-actions 40 | - name: Run tests 41 | run: python -m tox 42 | - name: Combine and upload coverage 43 | run: | 44 | python -m coverage combine 45 | python -m coverage xml -i 46 | python -m coveralls --service=github 47 | env: 48 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 49 | COVERALLS_PARALLEL: true 50 | COVERALLS_FLAG_NAME: ${{ matrix.python-version }}-${{ matrix.os }} 51 | 52 | coverage: 53 | needs: tests 54 | runs-on: ubuntu-latest 55 | steps: 56 | - name: Setup Python 57 | uses: actions/setup-python@v5 58 | with: 59 | python-version: "3.9" 60 | - name: Finish coverage collection 61 | run: | 62 | python -m pip install -U pip 63 | python -m pip install -U coveralls 64 | python -m coveralls --finish 65 | env: 66 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 67 | 68 | lint: 69 | runs-on: ubuntu-latest 70 | steps: 71 | - uses: actions/checkout@v4 72 | with: 73 | fetch-depth: 0 74 | - name: Setup Python 75 | uses: actions/setup-python@v5 76 | with: 77 | python-version: "3.9" 78 | - name: Install dependencies 79 | run: | 80 | python -m pip install -U pip 81 | python -m pip install tox 82 | - name: Lint the code 83 | run: python -m tox -e lint 84 | 85 | build: 86 | runs-on: ubuntu-latest 87 | steps: 88 | - uses: actions/checkout@v4 89 | with: 90 | fetch-depth: 0 91 | - uses: actions/setup-python@v5 92 | name: Install Python 93 | with: 94 | python-version: "3.9" 95 | - name: Build sdist and wheel 96 | run: | 97 | python -m pip install -U pip 98 | python -m pip install -U build 99 | python -m build . 100 | - uses: actions/upload-artifact@v4 101 | with: 102 | path: dist/* 103 | 104 | upload_pypi: 105 | needs: [tests, lint, build] 106 | runs-on: ubuntu-latest 107 | if: startsWith(github.ref, 'refs/tags/') 108 | steps: 109 | - uses: actions/download-artifact@v4 110 | with: 111 | name: artifact 112 | path: dist 113 | 114 | - uses: pypa/gh-action-pypi-publish@v1.12.4 115 | with: 116 | user: __token__ 117 | password: ${{ secrets.pypi_password }} 118 | -------------------------------------------------------------------------------- /joss/paper.bib: -------------------------------------------------------------------------------- 1 | %% This BibTeX bibliography file was created using BibDesk. 2 | %% http://bibdesk.sourceforge.net/ 3 | 4 | %% Created for Dan Foreman-Mackey at 2019-10-17 16:15:45 -0400 5 | 6 | 7 | %% Saved with string encoding Unicode (UTF-8) 8 | 9 | 10 | 11 | @article{Farr:2015, 12 | Author = {{Farr}, B. and {Farr}, W.~M.}, 13 | Date-Added = {2019-10-17 16:15:45 -0400}, 14 | Date-Modified = {2019-10-17 16:15:45 -0400}, 15 | Note = {in prep}, 16 | Title = {kombine: a kernel-density-based, embarrassingly parallel ensemble sampler}, 17 | Year = 2015, 18 | Url = {https://github.com/bfarr/kombine}} 19 | 20 | @article{Ter-Braak:2008, 21 | Author = {{ter Braak}, Cajo J.~F. and Vrugt, Jasper A}, 22 | Date-Added = {2019-10-17 16:07:14 -0400}, 23 | Date-Modified = {2019-10-17 16:15:43 -0400}, 24 | Journal = {Statistics and Computing}, 25 | Number = {4}, 26 | Pages = {435--446}, 27 | Publisher = {Springer}, 28 | Title = {{Differential evolution Markov chain with snooker updater and fewer chains}}, 29 | Volume = {18}, 30 | Year = {2008}, 31 | Doi = {10.1007/s11222-008-9104-9}} 32 | 33 | @article{Ter-Braak:2006, 34 | Author = {{ter Braak}, Cajo J.~F.}, 35 | Date-Added = {2019-10-17 16:06:50 -0400}, 36 | Date-Modified = {2019-10-17 16:15:43 -0400}, 37 | Journal = {Statistics and Computing}, 38 | Number = {3}, 39 | Pages = {239--249}, 40 | Publisher = {Springer}, 41 | Title = {{A Markov Chain Monte Carlo version of the genetic algorithm Differential Evolution: easy Bayesian computing for real parameter spaces}}, 42 | Volume = {16}, 43 | Year = {2006}, 44 | Doi = {10.1007/s11222-006-8769-1}} 45 | 46 | @article{Speagle:2019, 47 | Adsnote = {Provided by the SAO/NASA Astrophysics Data System}, 48 | Adsurl = {https://ui.adsabs.harvard.edu/abs/2019arXiv190402180S}, 49 | Archiveprefix = {arXiv}, 50 | Author = {{Speagle}, Joshua S}, 51 | Date-Added = {2019-10-17 15:43:02 -0400}, 52 | Date-Modified = {2019-10-17 15:43:03 -0400}, 53 | Eid = {arXiv:1904.02180}, 54 | Eprint = {1904.02180}, 55 | Journal = {arXiv e-prints}, 56 | Keywords = {Astrophysics - Instrumentation and Methods for Astrophysics, Statistics - Computation}, 57 | Month = {Apr}, 58 | Pages = {arXiv:1904.02180}, 59 | Primaryclass = {astro-ph.IM}, 60 | Title = {{dynesty: A Dynamic Nested Sampling Package for Estimating Bayesian Posteriors and Evidences}}, 61 | Year = {2019}} 62 | 63 | @article{Goodman:2010, 64 | Author = {Goodman, Jonathan and Weare, Jonathan}, 65 | Date-Added = {2019-10-17 14:38:39 -0400}, 66 | Date-Modified = {2019-10-17 14:38:40 -0400}, 67 | Journal = {Communications in applied mathematics and computational science}, 68 | Number = {1}, 69 | Pages = {65--80}, 70 | Publisher = {Mathematical Sciences Publishers}, 71 | Title = {Ensemble samplers with affine invariance}, 72 | Volume = {5}, 73 | Year = {2010}, 74 | Doi = {10.2140/camcos.2010.5.65}} 75 | 76 | @article{Foreman-Mackey:2013, 77 | Adsnote = {Provided by the SAO/NASA Astrophysics Data System}, 78 | Adsurl = {https://ui.adsabs.harvard.edu/abs/2013PASP..125..306F}, 79 | Archiveprefix = {arXiv}, 80 | Author = {{Foreman-Mackey}, Daniel and {Hogg}, David W. and {Lang}, Dustin and {Goodman}, Jonathan}, 81 | Date-Added = {2019-10-17 14:36:34 -0400}, 82 | Date-Modified = {2019-10-17 14:36:36 -0400}, 83 | Doi = {10.1086/670067}, 84 | Eprint = {1202.3665}, 85 | Journal = {Publications of the Astronomical Society of the Pacific}, 86 | Keywords = {Astrophysics - Instrumentation and Methods for Astrophysics, Physics - Computational Physics, Statistics - Computation}, 87 | Month = {Mar}, 88 | Number = {925}, 89 | Pages = {306}, 90 | Primaryclass = {astro-ph.IM}, 91 | Title = {{emcee: The MCMC Hammer}}, 92 | Volume = {125}, 93 | Year = {2013}, 94 | Bdsk-Url-1 = {https://doi.org/10.1086/670067}} 95 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | emcee 2 | ===== 3 | 4 | **emcee** is an MIT licensed pure-Python implementation of Goodman & Weare's 5 | `Affine Invariant Markov chain Monte Carlo (MCMC) Ensemble sampler 6 | `_ and these pages will 7 | show you how to use it. 8 | 9 | This documentation won't teach you too much about MCMC but there are a lot 10 | of resources available for that (try `this one 11 | `_). 12 | We also `published a paper `_ explaining 13 | the emcee algorithm and implementation in detail. 14 | 15 | emcee has been used in quite a few projects in the astrophysical literature and 16 | it is being actively developed on `GitHub `_. 17 | 18 | .. image:: https://img.shields.io/badge/GitHub-dfm%2Femcee-blue.svg?style=flat 19 | :target: https://github.com/dfm/emcee 20 | .. image:: https://github.com/dfm/emcee/workflows/Tests/badge.svg 21 | :target: https://github.com/dfm/emcee/actions?query=workflow%3ATests 22 | .. image:: https://img.shields.io/badge/license-MIT-blue.svg?style=flat 23 | :target: https://github.com/dfm/emcee/blob/main/LICENSE 24 | .. image:: https://img.shields.io/badge/arXiv-1202.3665-orange.svg?style=flat 25 | :target: https://arxiv.org/abs/1202.3665 26 | .. image:: https://coveralls.io/repos/github/dfm/emcee/badge.svg?branch=main&style=flat 27 | :target: https://coveralls.io/github/dfm/emcee?branch=main 28 | 29 | 30 | Basic Usage 31 | ----------- 32 | 33 | If you wanted to draw samples from a 5 dimensional Gaussian, you would do 34 | something like: 35 | 36 | .. code-block:: python 37 | 38 | import numpy as np 39 | import emcee 40 | 41 | def log_prob(x, ivar): 42 | return -0.5 * np.sum(ivar * x ** 2) 43 | 44 | ndim, nwalkers = 5, 100 45 | ivar = 1. / np.random.rand(ndim) 46 | p0 = np.random.randn(nwalkers, ndim) 47 | 48 | sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob, args=[ivar]) 49 | sampler.run_mcmc(p0, 10000) 50 | 51 | A more complete example is available in the :ref:`quickstart` tutorial. 52 | 53 | 54 | How to Use This Guide 55 | --------------------- 56 | 57 | To start, you're probably going to need to follow the :ref:`install` guide to 58 | get emcee installed on your computer. 59 | After you finish that, you can probably learn most of what you need from the 60 | tutorials listed below (you might want to start with 61 | :ref:`quickstart` and go from there). 62 | If you need more details about specific functionality, the User Guide below 63 | should have what you need. 64 | 65 | We welcome bug reports, patches, feature requests, and other comments via `the GitHub 66 | issue tracker `_, but you should check out the 67 | `contribution guidelines `_ 68 | first. 69 | If you have a question about the use of emcee, please post it to `the users list 70 | `_ instead of the issue tracker. 71 | 72 | 73 | .. toctree:: 74 | :maxdepth: 2 75 | :caption: User Guide 76 | 77 | user/install 78 | user/sampler 79 | user/moves 80 | user/blobs 81 | user/backends 82 | user/autocorr 83 | user/upgrade 84 | user/faq 85 | 86 | .. toctree:: 87 | :maxdepth: 1 88 | :caption: Tutorials 89 | 90 | tutorials/quickstart 91 | tutorials/line 92 | tutorials/parallel 93 | tutorials/autocorr 94 | tutorials/monitor 95 | tutorials/moves 96 | 97 | 98 | License & Attribution 99 | --------------------- 100 | 101 | Copyright 2010-2021 Dan Foreman-Mackey and `contributors `_. 102 | 103 | emcee is free software made available under the MIT License. For details 104 | see the ``LICENSE``. 105 | 106 | If you make use of emcee in your work, please cite our paper 107 | (`arXiv `_, 108 | `ADS `_, 109 | `BibTeX `_). 110 | 111 | 112 | Changelog 113 | --------- 114 | 115 | .. include:: ../HISTORY.rst 116 | -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | .. :changelog: 2 | 3 | 3.1.2 (2022-05-10) 4 | ++++++++++++++++++ 5 | 6 | - Removed ``numpy`` from ``setup_requires`` `#427 `_ 7 | - Made the sampler state indexable `#425 `_ 8 | 9 | 10 | 3.1.1 (2021-08-23) 11 | ++++++++++++++++++ 12 | 13 | - Added support for a progress bar description `#401 `_ 14 | 15 | 16 | 3.1.0 (2021-06-25) 17 | ++++++++++++++++++ 18 | 19 | - Added preliminary support for named parameters `#386 `_ 20 | - Improved handling of blob dtypes `#363 `_ 21 | - Fixed various small bugs and documentation issues 22 | 23 | 24 | 3.0.2 (2019-11-15) 25 | ++++++++++++++++++ 26 | 27 | - Added tutorial for moves interface 28 | - Added information about contributions to documentation 29 | - Improved documentation for installation and testing 30 | - Fixed dtype issues and instability in linear dependence test 31 | - Final release for `JOSS `_ submission 32 | 33 | 34 | 3.0.1 (2019-10-28) 35 | ++++++++++++++++++ 36 | 37 | - Added support for long double dtypes 38 | - Prepared manuscript to submit to `JOSS `_ 39 | - Improved packaging and release infrastructure 40 | - Fixed bug in initial linear dependence test 41 | 42 | 43 | 3.0.0 (2019-09-30) 44 | ++++++++++++++++++ 45 | 46 | - Added progress bars using `tqdm `_. 47 | - Added HDF5 backend using `h5py `_. 48 | - Added new ``Move`` interface for more flexible specification of proposals. 49 | - Improved autocorrelation time estimation algorithm. 50 | - Switched documentation to using Jupyter notebooks for tutorials. 51 | - More details can be found `on the docs `_. 52 | 53 | 2.2.0 (2016-07-12) 54 | ++++++++++++++++++ 55 | 56 | - Improved autocorrelation time computation. 57 | - Numpy compatibility issues. 58 | - Fixed deprecated integer division behavior in PTSampler. 59 | 60 | 61 | 2.1.0 (2014-05-22) 62 | ++++++++++++++++++ 63 | 64 | - Removing dependence on ``acor`` extension. 65 | - Added arguments to ``PTSampler`` function. 66 | - Added automatic load-balancing for MPI runs. 67 | - Added custom load-balancing for MPI and multiprocessing. 68 | - New default multiprocessing pool that supports ``^C``. 69 | 70 | 71 | 2.0.0 (2013-11-17) 72 | ++++++++++++++++++ 73 | 74 | - **Re-licensed under the MIT license!** 75 | - Clearer less verbose documentation. 76 | - Added checks for parameters becoming infinite or NaN. 77 | - Added checks for log-probability becoming NaN. 78 | - Improved parallelization and various other tweaks in ``PTSampler``. 79 | 80 | 81 | 1.2.0 (2013-01-30) 82 | ++++++++++++++++++ 83 | 84 | - Added a parallel tempering sampler ``PTSampler``. 85 | - Added instructions and utilities for using ``emcee`` with ``MPI``. 86 | - Added ``flatlnprobability`` property to the ``EnsembleSampler`` object 87 | to be consistent with the ``flatchain`` property. 88 | - Updated document for publication in PASP. 89 | - Various bug fixes. 90 | 91 | 92 | 1.1.3 (2012-11-22) 93 | ++++++++++++++++++ 94 | 95 | - Made the packaging system more robust even when numpy is not installed. 96 | 97 | 98 | 1.1.2 (2012-08-06) 99 | ++++++++++++++++++ 100 | 101 | - Another bug fix related to metadata blobs: the shape of the final ``blobs`` 102 | object was incorrect and all of the entries would generally be identical 103 | because we needed to copy the list that was appended at each step. Thanks 104 | goes to Jacqueline Chen (MIT) for catching this problem. 105 | 106 | 107 | 1.1.1 (2012-07-30) 108 | ++++++++++++++++++ 109 | 110 | - Fixed bug related to metadata blobs. The sample function was yielding 111 | the ``blobs`` object even when it wasn't expected. 112 | 113 | 114 | 1.1.0 (2012-07-28) 115 | ++++++++++++++++++ 116 | 117 | - Allow the ``lnprobfn`` to return arbitrary "blobs" of data as well as the 118 | log-probability. 119 | - Python 3 compatible (thanks Alex Conley)! 120 | - Various speed ups and clean ups in the core code base. 121 | - New documentation with better examples and more discussion. 122 | 123 | 124 | 1.0.1 (2012-03-31) 125 | ++++++++++++++++++ 126 | 127 | - Fixed transpose bug in the usage of ``acor`` in ``EnsembleSampler``. 128 | 129 | 130 | 1.0.0 (2012-02-15) 131 | ++++++++++++++++++ 132 | 133 | - Initial release. 134 | -------------------------------------------------------------------------------- /src/emcee/moves/red_blue.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from ..state import State 6 | from .move import Move 7 | 8 | __all__ = ["RedBlueMove"] 9 | 10 | 11 | class RedBlueMove(Move): 12 | """ 13 | An abstract red-blue ensemble move with parallelization as described in 14 | `Foreman-Mackey et al. (2013) `_. 15 | 16 | Args: 17 | nsplits (Optional[int]): The number of sub-ensembles to use. Each 18 | sub-ensemble is updated in parallel using the other sets as the 19 | complementary ensemble. The default value is ``2`` and you 20 | probably won't need to change that. 21 | 22 | randomize_split (Optional[bool]): Randomly shuffle walkers between 23 | sub-ensembles. The same number of walkers will be assigned to 24 | each sub-ensemble on each iteration. By default, this is ``True``. 25 | 26 | live_dangerously (Optional[bool]): By default, an update will fail with 27 | a ``RuntimeError`` if the number of walkers is smaller than twice 28 | the dimension of the problem because the walkers would then be 29 | stuck on a low dimensional subspace. This can be avoided by 30 | switching between the stretch move and, for example, a 31 | Metropolis-Hastings step. If you want to do this and suppress the 32 | error, set ``live_dangerously = True``. Thanks goes (once again) 33 | to @dstndstn for this wonderful terminology. 34 | 35 | """ 36 | 37 | def __init__( 38 | self, nsplits=2, randomize_split=True, live_dangerously=False 39 | ): 40 | self.nsplits = int(nsplits) 41 | self.live_dangerously = live_dangerously 42 | self.randomize_split = randomize_split 43 | 44 | def setup(self, coords): 45 | pass 46 | 47 | def get_proposal(self, sample, complement, random): 48 | raise NotImplementedError( 49 | "The proposal must be implemented by " "subclasses" 50 | ) 51 | 52 | def propose(self, model, state): 53 | """Use the move to generate a proposal and compute the acceptance 54 | 55 | Args: 56 | coords: The initial coordinates of the walkers. 57 | log_probs: The initial log probabilities of the walkers. 58 | log_prob_fn: A function that computes the log probabilities for a 59 | subset of walkers. 60 | random: A numpy-compatible random number state. 61 | 62 | """ 63 | # Check that the dimensions are compatible. 64 | nwalkers, ndim = state.coords.shape 65 | if nwalkers < 2 * ndim and not self.live_dangerously: 66 | raise RuntimeError( 67 | "It is unadvisable to use a red-blue move " 68 | "with fewer walkers than twice the number of " 69 | "dimensions." 70 | ) 71 | 72 | # Run any move-specific setup. 73 | self.setup(state.coords) 74 | 75 | # Split the ensemble in half and iterate over these two halves. 76 | accepted = np.zeros(nwalkers, dtype=bool) 77 | all_inds = np.arange(nwalkers) 78 | inds = all_inds % self.nsplits 79 | if self.randomize_split: 80 | model.random.shuffle(inds) 81 | for split in range(self.nsplits): 82 | S1 = inds == split 83 | 84 | # Get the two halves of the ensemble. 85 | sets = [state.coords[inds == j] for j in range(self.nsplits)] 86 | s = sets[split] 87 | c = sets[:split] + sets[split + 1 :] 88 | 89 | # Get the move-specific proposal. 90 | q, factors = self.get_proposal(s, c, model.random) 91 | 92 | # Compute the lnprobs of the proposed position. 93 | new_log_probs, new_blobs = model.compute_log_prob_fn(q) 94 | 95 | # Loop over the walkers and update them accordingly. 96 | for i, (j, f, nlp) in enumerate( 97 | zip(all_inds[S1], factors, new_log_probs) 98 | ): 99 | lnpdiff = f + nlp - state.log_prob[j] 100 | if lnpdiff > np.log(model.random.rand()): 101 | accepted[j] = True 102 | 103 | new_state = State(q, log_prob=new_log_probs, blobs=new_blobs) 104 | state = self.update(state, new_state, accepted, S1) 105 | 106 | return state, accepted 107 | -------------------------------------------------------------------------------- /src/emcee/moves/gaussian.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from .mh import MHMove 6 | 7 | __all__ = ["GaussianMove"] 8 | 9 | 10 | class GaussianMove(MHMove): 11 | """A Metropolis step with a Gaussian proposal function. 12 | 13 | Args: 14 | cov: The covariance of the proposal function. This can be a scalar, 15 | vector, or matrix and the proposal will be assumed isotropic, 16 | axis-aligned, or general respectively. 17 | mode (Optional): Select the method used for updating parameters. This 18 | can be one of ``"vector"``, ``"random"``, or ``"sequential"``. The 19 | ``"vector"`` mode updates all dimensions simultaneously, 20 | ``"random"`` randomly selects a dimension and only updates that 21 | one, and ``"sequential"`` loops over dimensions and updates each 22 | one in turn. 23 | factor (Optional[float]): If provided the proposal will be made with a 24 | standard deviation uniformly selected from the range 25 | ``exp(U(-log(factor), log(factor))) * cov``. This is invalid for 26 | the ``"vector"`` mode. 27 | 28 | Raises: 29 | ValueError: If the proposal dimensions are invalid or if any of any of 30 | the other arguments are inconsistent. 31 | 32 | """ 33 | 34 | def __init__(self, cov, mode="vector", factor=None): 35 | # Parse the proposal type. 36 | try: 37 | float(cov) 38 | 39 | except TypeError: 40 | cov = np.atleast_1d(cov) 41 | if len(cov.shape) == 1: 42 | # A diagonal proposal was given. 43 | ndim = len(cov) 44 | proposal = _diagonal_proposal(np.sqrt(cov), factor, mode) 45 | 46 | elif len(cov.shape) == 2 and cov.shape[0] == cov.shape[1]: 47 | # The full, square covariance matrix was given. 48 | ndim = cov.shape[0] 49 | proposal = _proposal(cov, factor, mode) 50 | 51 | else: 52 | raise ValueError("Invalid proposal scale dimensions") 53 | 54 | else: 55 | # This was a scalar proposal. 56 | ndim = None 57 | proposal = _isotropic_proposal(np.sqrt(cov), factor, mode) 58 | 59 | super(GaussianMove, self).__init__(proposal, ndim=ndim) 60 | 61 | 62 | class _isotropic_proposal(object): 63 | allowed_modes = ["vector", "random", "sequential"] 64 | 65 | def __init__(self, scale, factor, mode): 66 | self.index = 0 67 | self.scale = scale 68 | if factor is None: 69 | self._log_factor = None 70 | else: 71 | if factor < 1.0: 72 | raise ValueError("'factor' must be >= 1.0") 73 | self._log_factor = np.log(factor) 74 | 75 | if mode not in self.allowed_modes: 76 | raise ValueError( 77 | ( 78 | "'{0}' is not a recognized mode. " 79 | "Please select from: {1}" 80 | ).format(mode, self.allowed_modes) 81 | ) 82 | self.mode = mode 83 | 84 | def get_factor(self, rng): 85 | if self._log_factor is None: 86 | return 1.0 87 | return np.exp(rng.uniform(-self._log_factor, self._log_factor)) 88 | 89 | def get_updated_vector(self, rng, x0): 90 | return x0 + self.get_factor(rng) * self.scale * rng.randn(*(x0.shape)) 91 | 92 | def __call__(self, x0, rng): 93 | nw, nd = x0.shape 94 | xnew = self.get_updated_vector(rng, x0) 95 | if self.mode == "random": 96 | m = (range(nw), rng.randint(x0.shape[-1], size=nw)) 97 | elif self.mode == "sequential": 98 | m = (range(nw), self.index % nd + np.zeros(nw, dtype=int)) 99 | self.index = (self.index + 1) % nd 100 | else: 101 | return xnew, np.zeros(nw) 102 | x = np.array(x0) 103 | x[m] = xnew[m] 104 | return x, np.zeros(nw) 105 | 106 | 107 | class _diagonal_proposal(_isotropic_proposal): 108 | def get_updated_vector(self, rng, x0): 109 | return x0 + self.get_factor(rng) * self.scale * rng.randn(*(x0.shape)) 110 | 111 | 112 | class _proposal(_isotropic_proposal): 113 | allowed_modes = ["vector"] 114 | 115 | def get_updated_vector(self, rng, x0): 116 | return x0 + self.get_factor(rng) * rng.multivariate_normal( 117 | np.zeros(len(self.scale)), self.scale 118 | ) 119 | -------------------------------------------------------------------------------- /src/emcee/autocorr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import logging 4 | 5 | import numpy as np 6 | 7 | __all__ = ["function_1d", "integrated_time", "AutocorrError"] 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | def next_pow_two(n): 13 | """Returns the next power of two greater than or equal to `n`""" 14 | i = 1 15 | while i < n: 16 | i = i << 1 17 | return i 18 | 19 | 20 | def function_1d(x): 21 | """Estimate the normalized autocorrelation function of a 1-D series 22 | 23 | Args: 24 | x: The series as a 1-D numpy array. 25 | 26 | Returns: 27 | array: The autocorrelation function of the time series. 28 | 29 | """ 30 | x = np.atleast_1d(x) 31 | if len(x.shape) != 1: 32 | raise ValueError("invalid dimensions for 1D autocorrelation function") 33 | n = next_pow_two(len(x)) 34 | 35 | # Compute the FFT and then (from that) the auto-correlation function 36 | f = np.fft.fft(x - np.mean(x), n=2 * n) 37 | acf = np.fft.ifft(f * np.conjugate(f))[: len(x)].real 38 | acf /= acf[0] 39 | return acf 40 | 41 | 42 | def auto_window(taus, c): 43 | m = np.arange(len(taus)) < c * taus 44 | if np.any(m): 45 | return np.argmin(m) 46 | return len(taus) - 1 47 | 48 | 49 | def integrated_time(x, c=5, tol=50, quiet=False, has_walkers=True): 50 | """Estimate the integrated autocorrelation time of a time series. 51 | 52 | This estimate uses the iterative procedure described on page 16 of 53 | `Sokal's notes `_ to 54 | determine a reasonable window size. 55 | 56 | Args: 57 | x (numpy.ndarray): The time series. If 2-dimensional, the array 58 | dimesions are interpreted as ``(n_step, n_walker)`` unless 59 | ``has_walkers==False``, in which case they are interpreted as 60 | ``(n_step, n_param)``. If 3-dimensional, the dimensions are 61 | interperted as ``(n_step, n_walker, n_param)``. 62 | c (Optional[float]): The step size for the window search. (default: 63 | ``5``) 64 | tol (Optional[float]): The minimum number of autocorrelation times 65 | needed to trust the estimate. (default: ``50``) 66 | quiet (Optional[bool]): This argument controls the behavior when the 67 | chain is too short. If ``True``, give a warning instead of raising 68 | an :class:`AutocorrError`. (default: ``False``) 69 | has_walkers (Optional[bool]): Whether the last axis should be 70 | interpreted as walkers or parameters if ``x`` has 2 dimensions. 71 | (default: ``True``) 72 | 73 | Returns: 74 | float or array: An estimate of the integrated autocorrelation time of 75 | the time series ``x``. 76 | 77 | Raises 78 | AutocorrError: If the autocorrelation time can't be reliably estimated 79 | from the chain and ``quiet`` is ``False``. This normally means 80 | that the chain is too short. 81 | 82 | """ 83 | x = np.atleast_1d(x) 84 | if len(x.shape) == 1: 85 | x = x[:, np.newaxis, np.newaxis] 86 | if len(x.shape) == 2: 87 | if not has_walkers: 88 | x = x[:, np.newaxis, :] 89 | else: 90 | x = x[:, :, np.newaxis] 91 | if len(x.shape) != 3: 92 | raise ValueError("invalid dimensions") 93 | 94 | n_t, n_w, n_d = x.shape 95 | tau_est = np.empty(n_d) 96 | windows = np.empty(n_d, dtype=int) 97 | 98 | # Loop over parameters 99 | for d in range(n_d): 100 | f = np.zeros(n_t) 101 | for k in range(n_w): 102 | f += function_1d(x[:, k, d]) 103 | f /= n_w 104 | taus = 2.0 * np.cumsum(f) - 1.0 105 | windows[d] = auto_window(taus, c) 106 | tau_est[d] = taus[windows[d]] 107 | 108 | # Check convergence 109 | flag = tol * tau_est > n_t 110 | 111 | # Warn or raise in the case of non-convergence 112 | if np.any(flag): 113 | msg = ( 114 | "The chain is shorter than {0} times the integrated " 115 | "autocorrelation time for {1} parameter(s). Use this estimate " 116 | "with caution and run a longer chain!\n" 117 | ).format(tol, np.sum(flag)) 118 | msg += "N/{0} = {1:.0f};\ntau: {2}".format(tol, n_t / tol, tau_est) 119 | if not quiet: 120 | raise AutocorrError(tau_est, msg) 121 | logger.warning(msg) 122 | 123 | return tau_est 124 | 125 | 126 | class AutocorrError(Exception): 127 | """Raised if the chain is too short to estimate an autocorrelation time. 128 | 129 | The current estimate of the autocorrelation time can be accessed via the 130 | ``tau`` attribute of this exception. 131 | 132 | """ 133 | 134 | def __init__(self, tau, *args, **kwargs): 135 | self.tau = tau 136 | super(AutocorrError, self).__init__(*args, **kwargs) 137 | -------------------------------------------------------------------------------- /joss/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'emcee v3: A Python ensemble sampling toolkit for affine-invariant MCMC' 3 | tags: 4 | - Python 5 | - astronomy 6 | authors: 7 | - name: Daniel Foreman-Mackey 8 | orcid: 0000-0003-0872-7098 9 | affiliation: 1 10 | - name: Will M. Farr 11 | orcid: 0000-0003-1540-8562 12 | affiliation: "1, 2" 13 | - name: Manodeep Sinha 14 | orcid: 0000-0002-4845-1228 15 | affiliation: "3, 4" 16 | - name: Anne M. Archibald 17 | orcid: 0000-0003-0638-3340 18 | affiliation: 5 19 | - name: David W. Hogg 20 | orcid: 0000-0003-2866-9403 21 | affiliation: "1, 6" 22 | - name: Jeremy S. Sanders 23 | orcid: 0000-0003-2189-4501 24 | affiliation: 7 25 | - name: Joe Zuntz 26 | orcid: 0000-0001-9789-9646 27 | affiliation: 8 28 | - name: Peter K. G. Williams 29 | orcid: 0000-0003-3734-3587 30 | affiliation: "9, 10" 31 | - name: Andrew R. J. Nelson 32 | orcid: 0000-0002-4548-3558 33 | affiliation: 11 34 | - name: Miguel de Val-Borro 35 | orcid: 0000-0002-0455-9384 36 | affiliation: 12 37 | - name: Tobias Erhardt 38 | orcid: 0000-0002-6683-6746 39 | affiliation: 13 40 | - name: Ilya Pashchenko 41 | orcid: 0000-0002-9404-7023 42 | affiliation: 14 43 | - name: Oriol Abril Pla 44 | orcid: 0000-0002-1847-9481 45 | affiliation: 15 46 | affiliations: 47 | - name: Center for Computational Astrophysics, Flatiron Institute 48 | index: 1 49 | - name: Department of Physics and Astronomy, Stony Brook University, United States 50 | index: 2 51 | - name: Centre for Astrophysics & Supercomputing, Swinburne University of Technology, Australia 52 | index: 3 53 | - name: ARC Centre of Excellence for All Sky Astrophysics in 3 Dimensions (ASTRO 3D) 54 | index: 4 55 | - name: University of Newcastle 56 | index: 5 57 | - name: Center for Cosmology and Particle Physics, Department of Physics, New York University 58 | index: 6 59 | - name: Max Planck Institute for Extraterrestrial Physics 60 | index: 7 61 | - name: Institute for Astronomy, University of Edinburgh, Edinburgh, EH9 3HJ, UK 62 | index: 8 63 | - name: "Center for Astrophysics | Harvard & Smithsonian" 64 | index: 9 65 | - name: American Astronomical Society 66 | index: 10 67 | - name: Australian Nuclear Science and Technology Organisation, NSW, Australia 68 | index: 11 69 | - name: Planetary Science Institute, 1700 East Fort Lowell Rd., Suite 106, Tucson, AZ 85719, USA 70 | index: 12 71 | - name: Climate and Environmental Physics and Oeschger Center for Climate Change Research, University of Bern, Bern, Switzerland 72 | index: 13 73 | - name: P.N. Lebedev Physical Institute of the Russian Academy of Sciences, Moscow, Russia 74 | index: 14 75 | - name: Universitat Pompeu Fabra, Barcelona 76 | index: 15 77 | 78 | date: 17 October 2019 79 | bibliography: paper.bib 80 | --- 81 | 82 | # Summary 83 | 84 | ``emcee`` is a Python library implementing a class of affine-invariant ensemble samplers for Markov chain Monte Carlo (MCMC). 85 | This package has been widely applied to probabilistic modeling problems in astrophysics where it was originally published [@Foreman-Mackey:2013], with some applications in other fields. 86 | When it was first released in 2012, the interface implemented in ``emcee`` was fundamentally different from the MCMC libraries that were popular at the time, such as ``PyMC``, because it was specifically designed to work with "black box" models instead of structured graphical models. 87 | This has been a popular interface for applications in astrophysics because it is often non-trivial to implement realistic physics within the modeling frameworks required by other libraries. 88 | Since ``emcee``'s release, other libraries have been developed with similar interfaces, such as ``dynesty`` [@Speagle:2019]. 89 | The version 3.0 release of ``emcee`` is the first major release of the library in about 6 years and it includes a full re-write of the computational backend, several commonly requested features, and a set of new "move" implementations. 90 | 91 | This new release includes both small quality of life improvements—like a progress bar using [``tqdm``](https://tqdm.github.io)—and larger features. 92 | For example, the new ``backends`` interface implements real time serialization of sampling results. 93 | By default ``emcee`` saves its results in memory (as in the original implementation), but it now also includes a ``HDFBackend`` class that serializes the chain to disk using [h5py](https://www.h5py.org). 94 | 95 | The most important new feature included in the version 3.0 release of ``emcee`` is the new ``moves`` interface. 96 | Originally, ``emcee`` implemented the affine-invariant "stretch move" proposed by @Goodman:2010, but there are other ensemble proposals that can get better performance for certain applications. 97 | ``emcee`` now includes implementations of several other ensemble moves and an interface for defining custom proposals. 98 | The implemented moves include: 99 | 100 | - The "stretch move" proposed by @Goodman:2010, 101 | - The "differential evolution" and "differential evolution snooker update" moves [@Ter-Braak:2006; @Ter-Braak:2008], and 102 | - A "kernel density proposal" based on the implementation in [the ``kombine`` library](https://github.com/bfarr/kombine) [@Farr:2015]. 103 | 104 | ``emcee`` has been widely used and the original paper has been highly cited, but there have been many contributions from members of the community. 105 | This paper is meant to highlight these contributions and provide citation credit to the academic contributors. 106 | A full up-to-date list of contributors can always be found [on GitHub](https://github.com/dfm/emcee/graphs/contributors). 107 | 108 | # References 109 | -------------------------------------------------------------------------------- /docs/user/blobs.rst: -------------------------------------------------------------------------------- 1 | .. _blobs: 2 | 3 | Blobs 4 | ===== 5 | 6 | Way back in version 1.1 of emcee, the concept of blobs was introduced. 7 | This allows a user to track arbitrary metadata associated with every sample in 8 | the chain. 9 | The interface to access these blobs was previously a little clunky because it 10 | was stored as a list of lists of blobs. 11 | In version 3, this interface has been updated to use NumPy arrays instead and 12 | the sampler will do type inference to save the simplest possible 13 | representation of the blobs. 14 | 15 | Anything that your ``log_prob`` function returns in addition to the log 16 | probability is assumed to be a blob and is tracked as part of blobs. 17 | Put another way, if ``log_prob`` returns more than one thing, all the things 18 | after the first (which is assumed to be the log probability) are assumed to be 19 | blobs. 20 | If ``log_prob`` returns ``-np.inf`` for the log probability, the blobs are not 21 | inspected or tracked so can be anything (but the correct number of arguments 22 | must still be returned). 23 | 24 | Using blobs to track the value of the prior 25 | ------------------------------------------- 26 | 27 | A common pattern is to save the value of the log prior at every step in the 28 | chain. 29 | To do this, your ``log_prob`` function should return your blobs (in this case log prior) as well as the log probability when called. 30 | This can be implemented something like: 31 | 32 | .. code-block:: python 33 | 34 | import emcee 35 | import numpy as np 36 | 37 | def log_prior(params): 38 | return -0.5 * np.sum(params**2) 39 | 40 | def log_like(params): 41 | return -0.5 * np.sum((params / 0.1)**2) 42 | 43 | def log_prob(params): 44 | lp = log_prior(params) 45 | if not np.isfinite(lp): 46 | # log prior is not finite, return -np.inf for log probability 47 | # and None for log prior as it won't be used anyway (but we 48 | # must use the correct number of return values) 49 | return -np.inf, None 50 | ll = log_like(params) 51 | if not np.isfinite(ll): 52 | # log likelihood is not finite, return -np.inf for log 53 | # probability and None for log prior (again, this value isn't 54 | # used but we have to have the correct number of return values) 55 | return -np.inf, None 56 | 57 | # return log probability (sum of log prior and log likelihood) 58 | # and log prior. Log prior will be saved as part of the blobs. 59 | return lp + ll, lp 60 | 61 | coords = np.random.randn(32, 3) 62 | nwalkers, ndim = coords.shape 63 | sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob) 64 | sampler.run_mcmc(coords, 100) 65 | 66 | log_prior_samps = sampler.get_blobs() 67 | flat_log_prior_samps = sampler.get_blobs(flat=True) 68 | 69 | print(log_prior_samps.shape) # (100, 32) 70 | print(flat_log_prior_samps.shape) # (3200,) 71 | 72 | As shown above, after running this, the "blobs" stored by the sampler will be 73 | a ``(nsteps, nwalkers)`` NumPy array with the value of the log prior at every 74 | sample. 75 | 76 | Named blobs & custom dtypes 77 | --------------------------- 78 | 79 | If you want to save multiple pieces of metadata, it can be useful to name 80 | them. 81 | To implement this, we use the ``blobs_dtype`` argument in 82 | :class:`EnsembleSampler`. 83 | Using this is also helpful to specify types. 84 | If you don't provide ``blobs_dtype``, the dtype of the extra args is automatically guessed the first time ``log_prob`` is called. 85 | 86 | For example, let's say that, for some reason, we wanted to save the mean of 87 | the parameters as well as the log prior. 88 | To do this, we would update the above example as follows: 89 | 90 | .. code-block:: python 91 | 92 | def log_prob(params): 93 | lp = log_prior(params) 94 | if not np.isfinite(lp): 95 | # As above, log prior is not finite, so return -np.inf for log 96 | # probability and None for everything else (these values aren't 97 | # used, but the number of return values must be correct) 98 | return -np.inf, None, None 99 | ll = log_like(params) 100 | if not np.isfinite(ll): 101 | # Log likelihood is not finite so return -np.inf for log 102 | # probabilitiy and None for everything else (maintaining the 103 | # correct number of return values) 104 | return -np.inf, None, None 105 | 106 | # All values are finite, so return desired blobs (in this case: log 107 | # probability, log prior and mean of parameters) 108 | return lp + ll, lp, np.mean(params) 109 | 110 | coords = np.random.randn(32, 3) 111 | nwalkers, ndim = coords.shape 112 | 113 | # Here are the important lines for defining the blobs_dtype 114 | dtype = [("log_prior", float), ("mean", float)] 115 | sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob, 116 | blobs_dtype=dtype) 117 | 118 | sampler.run_mcmc(coords, 100) 119 | 120 | blobs = sampler.get_blobs() 121 | log_prior_samps = blobs["log_prior"] 122 | mean_samps = blobs["mean"] 123 | print(log_prior_samps.shape) # (100, 32) 124 | print(mean_samps.shape) # (100, 32) 125 | 126 | flat_blobs = sampler.get_blobs(flat=True) 127 | flat_log_prior_samps = flat_blobs["log_prior"] 128 | flat_mean_samps = flat_blobs["mean"] 129 | print(flat_log_prior_samps.shape) # (3200,) 130 | print(flat_mean_samps.shape) # (3200,) 131 | 132 | This will print 133 | 134 | .. code-block:: python 135 | 136 | (100, 32) 137 | (100, 32) 138 | (3200,) 139 | (3200,) 140 | 141 | and the ``blobs`` object will be a structured NumPy array with two columns 142 | called ``log_prior`` and ``mean``. 143 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | default: dirhtml 20 | 21 | help: 22 | @echo "Please use \`make ' where is one of" 23 | @echo " html to make standalone HTML files" 24 | @echo " dirhtml to make HTML files named index.html in directories" 25 | @echo " singlehtml to make a single large HTML file" 26 | @echo " pickle to make pickle files" 27 | @echo " json to make JSON files" 28 | @echo " htmlhelp to make HTML files and a HTML help project" 29 | @echo " qthelp to make HTML files and a qthelp project" 30 | @echo " devhelp to make HTML files and a Devhelp project" 31 | @echo " epub to make an epub" 32 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 33 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 34 | @echo " text to make text files" 35 | @echo " man to make manual pages" 36 | @echo " texinfo to make Texinfo files" 37 | @echo " info to make Texinfo files and run them through makeinfo" 38 | @echo " gettext to make PO message catalogs" 39 | @echo " changes to make an overview of all changed/added/deprecated items" 40 | @echo " linkcheck to check all external links for integrity" 41 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 42 | 43 | clean: 44 | -rm -rf $(BUILDDIR)/* 45 | 46 | html: 47 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 48 | @echo 49 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 50 | 51 | dirhtml: $(TUTORIALS) 52 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 53 | @echo 54 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 55 | 56 | singlehtml: 57 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 58 | @echo 59 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 60 | 61 | pickle: 62 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 63 | @echo 64 | @echo "Build finished; now you can process the pickle files." 65 | 66 | json: 67 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 68 | @echo 69 | @echo "Build finished; now you can process the JSON files." 70 | 71 | htmlhelp: 72 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 73 | @echo 74 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 75 | ".hhp project file in $(BUILDDIR)/htmlhelp." 76 | 77 | qthelp: 78 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 79 | @echo 80 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 81 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 82 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/emcee.qhcp" 83 | @echo "To view the help file:" 84 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/emcee.qhc" 85 | 86 | devhelp: 87 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 88 | @echo 89 | @echo "Build finished." 90 | @echo "To view the help file:" 91 | @echo "# mkdir -p $$HOME/.local/share/devhelp/emcee" 92 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/emcee" 93 | @echo "# devhelp" 94 | 95 | epub: 96 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 97 | @echo 98 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 99 | 100 | latex: 101 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 102 | @echo 103 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 104 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 105 | "(use \`make latexpdf' here to do that automatically)." 106 | 107 | latexpdf: 108 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 109 | @echo "Running LaTeX files through pdflatex..." 110 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 111 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 112 | 113 | text: 114 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 115 | @echo 116 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 117 | 118 | man: 119 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 120 | @echo 121 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 122 | 123 | texinfo: 124 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 125 | @echo 126 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 127 | @echo "Run \`make' in that directory to run these through makeinfo" \ 128 | "(use \`make info' here to do that automatically)." 129 | 130 | info: 131 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 132 | @echo "Running Texinfo files through makeinfo..." 133 | make -C $(BUILDDIR)/texinfo info 134 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 135 | 136 | gettext: 137 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 138 | @echo 139 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 140 | 141 | changes: 142 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 143 | @echo 144 | @echo "The overview file is in $(BUILDDIR)/changes." 145 | 146 | linkcheck: 147 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 148 | @echo 149 | @echo "Link check complete; look for any errors in the above output " \ 150 | "or in $(BUILDDIR)/linkcheck/output.txt." 151 | 152 | doctest: 153 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 154 | @echo "Testing of doctests in the sources finished, look at the " \ 155 | "results in $(BUILDDIR)/doctest/output.txt." 156 | -------------------------------------------------------------------------------- /src/emcee/tests/unit/test_ensemble.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit tests of some functionality in ensemble.py when the parameters are named 3 | """ 4 | 5 | import string 6 | from unittest import TestCase 7 | 8 | import numpy as np 9 | import pytest 10 | 11 | from emcee.ensemble import EnsembleSampler, ndarray_to_list_of_dicts 12 | 13 | 14 | class TestNP2ListOfDicts(TestCase): 15 | def test_ndarray_to_list_of_dicts(self): 16 | # Try different numbers of keys 17 | for n_keys in [1, 2, 10, 26]: 18 | keys = list(string.ascii_lowercase[:n_keys]) 19 | key_set = set(keys) 20 | key_dict = {key: i for i, key in enumerate(keys)} 21 | # Try different number of walker/procs 22 | for N in [1, 2, 3, 10, 100]: 23 | x = np.random.rand(N, n_keys) 24 | 25 | LOD = ndarray_to_list_of_dicts(x, key_dict) 26 | assert len(LOD) == N, "need 1 dict per row" 27 | for i, dct in enumerate(LOD): 28 | assert dct.keys() == key_set, "keys are missing" 29 | for j, key in enumerate(keys): 30 | assert dct[key] == x[i, j], f"wrong value at {(i, j)}" 31 | 32 | 33 | class TestNamedParameters(TestCase): 34 | """ 35 | Test that a keyword-based log-probability function instead of 36 | a positional. 37 | """ 38 | 39 | # Keyword based lnpdf 40 | def lnpdf(self, pars) -> np.float64: 41 | mean = pars["mean"] 42 | var = pars["var"] 43 | if var <= 0: 44 | return -np.inf 45 | return ( 46 | -0.5 * ((mean - self.x) ** 2 / var + np.log(2 * np.pi * var)).sum() 47 | ) 48 | 49 | def lnpdf_mixture(self, pars) -> np.float64: 50 | mean1 = pars["mean1"] 51 | var1 = pars["var1"] 52 | mean2 = pars["mean2"] 53 | var2 = pars["var2"] 54 | if var1 <= 0 or var2 <= 0: 55 | return -np.inf 56 | return ( 57 | -0.5 58 | * ( 59 | (mean1 - self.x) ** 2 / var1 60 | + np.log(2 * np.pi * var1) 61 | + (mean2 - self.x - 3) ** 2 / var2 62 | + np.log(2 * np.pi * var2) 63 | ).sum() 64 | ) 65 | 66 | def lnpdf_mixture_grouped(self, pars) -> np.float64: 67 | mean1, mean2 = pars["means"] 68 | var1, var2 = pars["vars"] 69 | const = pars["constant"] 70 | if var1 <= 0 or var2 <= 0: 71 | return -np.inf 72 | return ( 73 | -0.5 74 | * ( 75 | (mean1 - self.x) ** 2 / var1 76 | + np.log(2 * np.pi * var1) 77 | + (mean2 - self.x - 3) ** 2 / var2 78 | + np.log(2 * np.pi * var2) 79 | ).sum() 80 | + const 81 | ) 82 | 83 | def setUp(self): 84 | # Draw some data from a unit Gaussian 85 | self.x = np.random.randn(100) 86 | self.names = ["mean", "var"] 87 | 88 | def test_named_parameters(self): 89 | sampler = EnsembleSampler( 90 | nwalkers=10, 91 | ndim=len(self.names), 92 | log_prob_fn=self.lnpdf, 93 | parameter_names=self.names, 94 | ) 95 | assert sampler.params_are_named 96 | assert list(sampler.parameter_names.keys()) == self.names 97 | 98 | def test_asserts(self): 99 | # ndim name mismatch 100 | with pytest.raises(AssertionError): 101 | _ = EnsembleSampler( 102 | nwalkers=10, 103 | ndim=len(self.names) - 1, 104 | log_prob_fn=self.lnpdf, 105 | parameter_names=self.names, 106 | ) 107 | 108 | # duplicate names 109 | with pytest.raises(AssertionError): 110 | _ = EnsembleSampler( 111 | nwalkers=10, 112 | ndim=3, 113 | log_prob_fn=self.lnpdf, 114 | parameter_names=["a", "b", "a"], 115 | ) 116 | 117 | # vectorize turned on 118 | with pytest.raises(AssertionError): 119 | _ = EnsembleSampler( 120 | nwalkers=10, 121 | ndim=len(self.names), 122 | log_prob_fn=self.lnpdf, 123 | parameter_names=self.names, 124 | vectorize=True, 125 | ) 126 | 127 | def test_compute_log_prob(self): 128 | # Try different numbers of walkers 129 | for N in [4, 8, 10]: 130 | sampler = EnsembleSampler( 131 | nwalkers=N, 132 | ndim=len(self.names), 133 | log_prob_fn=self.lnpdf, 134 | parameter_names=self.names, 135 | ) 136 | coords = np.random.rand(N, len(self.names)) 137 | lnps, _ = sampler.compute_log_prob(coords) 138 | assert len(lnps) == N 139 | assert lnps.dtype == np.float64 140 | 141 | def test_compute_log_prob_mixture(self): 142 | names = ["mean1", "var1", "mean2", "var2"] 143 | # Try different numbers of walkers 144 | for N in [8, 10, 20]: 145 | sampler = EnsembleSampler( 146 | nwalkers=N, 147 | ndim=len(names), 148 | log_prob_fn=self.lnpdf_mixture, 149 | parameter_names=names, 150 | ) 151 | coords = np.random.rand(N, len(names)) 152 | lnps, _ = sampler.compute_log_prob(coords) 153 | assert len(lnps) == N 154 | assert lnps.dtype == np.float64 155 | 156 | def test_compute_log_prob_mixture_grouped(self): 157 | names = {"means": [0, 1], "vars": [2, 3], "constant": 4} 158 | # Try different numbers of walkers 159 | for N in [8, 10, 20]: 160 | sampler = EnsembleSampler( 161 | nwalkers=N, 162 | ndim=5, 163 | log_prob_fn=self.lnpdf_mixture_grouped, 164 | parameter_names=names, 165 | ) 166 | coords = np.random.rand(N, 5) 167 | lnps, _ = sampler.compute_log_prob(coords) 168 | assert len(lnps) == N 169 | assert lnps.dtype == np.float64 170 | 171 | def test_run_mcmc(self): 172 | # Sort of an integration test 173 | n_walkers = 4 174 | sampler = EnsembleSampler( 175 | nwalkers=n_walkers, 176 | ndim=len(self.names), 177 | log_prob_fn=self.lnpdf, 178 | parameter_names=self.names, 179 | ) 180 | guess = np.random.rand(n_walkers, len(self.names)) 181 | n_steps = 50 182 | results = sampler.run_mcmc(guess, n_steps) 183 | assert results.coords.shape == (n_walkers, len(self.names)) 184 | chain = sampler.chain 185 | assert chain.shape == (n_walkers, n_steps, len(self.names)) 186 | 187 | 188 | class TestLnProbFn(TestCase): 189 | # checks that the log_prob_fn can deal with a variety of 'scalar-likes' 190 | def lnpdf(self, x): 191 | v = np.log(np.sqrt(np.pi) * np.exp(-((x / 2.0) ** 2))) 192 | v = float(v[0]) 193 | assert np.isscalar(v) 194 | return v 195 | 196 | def lnpdf_arr1(self, x): 197 | v = self.lnpdf(x) 198 | return np.array([v]) 199 | 200 | def lnpdf_float64(self, x): 201 | v = self.lnpdf(x) 202 | return np.float64(v) 203 | 204 | def lnpdf_arr0D(self, x): 205 | v = self.lnpdf(x) 206 | return np.array(v) 207 | 208 | def test_deal_with_scalar_likes(self): 209 | rng = np.random.default_rng() 210 | fns = [ 211 | self.lnpdf, 212 | self.lnpdf_arr1, 213 | self.lnpdf_float64, 214 | self.lnpdf_arr0D, 215 | ] 216 | for fn in fns: 217 | init = rng.random((50, 1)) 218 | sampler = EnsembleSampler(50, 1, fn) 219 | _ = sampler.run_mcmc(initial_state=init, nsteps=20) 220 | -------------------------------------------------------------------------------- /src/emcee/backends/backend.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from .. import autocorr 6 | from ..state import State 7 | 8 | __all__ = ["Backend"] 9 | 10 | 11 | class Backend(object): 12 | """A simple default backend that stores the chain in memory""" 13 | 14 | def __init__(self, dtype=None): 15 | self.initialized = False 16 | if dtype is None: 17 | dtype = np.float64 18 | self.dtype = dtype 19 | 20 | def reset(self, nwalkers, ndim): 21 | """Clear the state of the chain and empty the backend 22 | 23 | Args: 24 | nwakers (int): The size of the ensemble 25 | ndim (int): The number of dimensions 26 | 27 | """ 28 | self.nwalkers = int(nwalkers) 29 | self.ndim = int(ndim) 30 | self.iteration = 0 31 | self.accepted = np.zeros(self.nwalkers, dtype=self.dtype) 32 | self.chain = np.empty((0, self.nwalkers, self.ndim), dtype=self.dtype) 33 | self.log_prob = np.empty((0, self.nwalkers), dtype=self.dtype) 34 | self.blobs = None 35 | self.random_state = None 36 | self.initialized = True 37 | 38 | def has_blobs(self): 39 | """Returns ``True`` if the model includes blobs""" 40 | return self.blobs is not None 41 | 42 | def get_value(self, name, flat=False, thin=1, discard=0): 43 | if self.iteration <= 0: 44 | raise AttributeError( 45 | "you must run the sampler with " 46 | "'store == True' before accessing the " 47 | "results" 48 | ) 49 | 50 | if name == "blobs" and not self.has_blobs(): 51 | return None 52 | 53 | v = getattr(self, name)[discard + thin - 1 : self.iteration : thin] 54 | if flat: 55 | s = list(v.shape[1:]) 56 | s[0] = np.prod(v.shape[:2]) 57 | return v.reshape(s) 58 | return v 59 | 60 | def get_chain(self, **kwargs): 61 | """Get the stored chain of MCMC samples 62 | 63 | Args: 64 | flat (Optional[bool]): Flatten the chain across the ensemble. 65 | (default: ``False``) 66 | thin (Optional[int]): Take only every ``thin`` steps from the 67 | chain. (default: ``1``) 68 | discard (Optional[int]): Discard the first ``discard`` steps in 69 | the chain as burn-in. (default: ``0``) 70 | 71 | Returns: 72 | array[..., nwalkers, ndim]: The MCMC samples. 73 | 74 | """ 75 | return self.get_value("chain", **kwargs) 76 | 77 | def get_blobs(self, **kwargs): 78 | """Get the chain of blobs for each sample in the chain 79 | 80 | Args: 81 | flat (Optional[bool]): Flatten the chain across the ensemble. 82 | (default: ``False``) 83 | thin (Optional[int]): Take only every ``thin`` steps from the 84 | chain. (default: ``1``) 85 | discard (Optional[int]): Discard the first ``discard`` steps in 86 | the chain as burn-in. (default: ``0``) 87 | 88 | Returns: 89 | array[..., nwalkers]: The chain of blobs. 90 | 91 | """ 92 | return self.get_value("blobs", **kwargs) 93 | 94 | def get_log_prob(self, **kwargs): 95 | """Get the chain of log probabilities evaluated at the MCMC samples 96 | 97 | Args: 98 | flat (Optional[bool]): Flatten the chain across the ensemble. 99 | (default: ``False``) 100 | thin (Optional[int]): Take only every ``thin`` steps from the 101 | chain. (default: ``1``) 102 | discard (Optional[int]): Discard the first ``discard`` steps in 103 | the chain as burn-in. (default: ``0``) 104 | 105 | Returns: 106 | array[..., nwalkers]: The chain of log probabilities. 107 | 108 | """ 109 | return self.get_value("log_prob", **kwargs) 110 | 111 | def get_last_sample(self): 112 | """Access the most recent sample in the chain""" 113 | if (not self.initialized) or self.iteration <= 0: 114 | raise AttributeError( 115 | "you must run the sampler with " 116 | "'store == True' before accessing the " 117 | "results" 118 | ) 119 | it = self.iteration 120 | blobs = self.get_blobs(discard=it - 1) 121 | if blobs is not None: 122 | blobs = blobs[0] 123 | return State( 124 | self.get_chain(discard=it - 1)[0], 125 | log_prob=self.get_log_prob(discard=it - 1)[0], 126 | blobs=blobs, 127 | random_state=self.random_state, 128 | ) 129 | 130 | def get_autocorr_time(self, discard=0, thin=1, **kwargs): 131 | """Compute an estimate of the autocorrelation time for each parameter 132 | 133 | Args: 134 | thin (Optional[int]): Use only every ``thin`` steps from the 135 | chain. The returned estimate is multiplied by ``thin`` so the 136 | estimated time is in units of steps, not thinned steps. 137 | (default: ``1``) 138 | discard (Optional[int]): Discard the first ``discard`` steps in 139 | the chain as burn-in. (default: ``0``) 140 | 141 | Other arguments are passed directly to 142 | :func:`emcee.autocorr.integrated_time`. 143 | 144 | Returns: 145 | array[ndim]: The integrated autocorrelation time estimate for the 146 | chain for each parameter. 147 | 148 | """ 149 | x = self.get_chain(discard=discard, thin=thin) 150 | return thin * autocorr.integrated_time(x, **kwargs) 151 | 152 | @property 153 | def shape(self): 154 | """The dimensions of the ensemble ``(nwalkers, ndim)``""" 155 | return self.nwalkers, self.ndim 156 | 157 | def _check_blobs(self, blobs): 158 | has_blobs = self.has_blobs() 159 | if has_blobs and blobs is None: 160 | raise ValueError("inconsistent use of blobs") 161 | if self.iteration > 0 and blobs is not None and not has_blobs: 162 | raise ValueError("inconsistent use of blobs") 163 | 164 | def grow(self, ngrow, blobs): 165 | """Expand the storage space by some number of samples 166 | 167 | Args: 168 | ngrow (int): The number of steps to grow the chain. 169 | blobs: The current array of blobs. This is used to compute the 170 | dtype for the blobs array. 171 | 172 | """ 173 | self._check_blobs(blobs) 174 | i = ngrow - (len(self.chain) - self.iteration) 175 | a = np.empty((i, self.nwalkers, self.ndim), dtype=self.dtype) 176 | self.chain = np.concatenate((self.chain, a), axis=0) 177 | a = np.empty((i, self.nwalkers), dtype=self.dtype) 178 | self.log_prob = np.concatenate((self.log_prob, a), axis=0) 179 | if blobs is not None: 180 | dt = np.dtype((blobs.dtype, blobs.shape[1:])) 181 | a = np.empty((i, self.nwalkers), dtype=dt) 182 | if self.blobs is None: 183 | self.blobs = a 184 | else: 185 | self.blobs = np.concatenate((self.blobs, a), axis=0) 186 | 187 | def _check(self, state, accepted): 188 | self._check_blobs(state.blobs) 189 | nwalkers, ndim = self.shape 190 | has_blobs = self.has_blobs() 191 | if state.coords.shape != (nwalkers, ndim): 192 | raise ValueError( 193 | "invalid coordinate dimensions; expected {0}".format( 194 | (nwalkers, ndim) 195 | ) 196 | ) 197 | if state.log_prob.shape != (nwalkers,): 198 | raise ValueError( 199 | "invalid log probability size; expected {0}".format(nwalkers) 200 | ) 201 | if state.blobs is not None and not has_blobs: 202 | raise ValueError("unexpected blobs") 203 | if state.blobs is None and has_blobs: 204 | raise ValueError("expected blobs, but none were given") 205 | if state.blobs is not None and len(state.blobs) != nwalkers: 206 | raise ValueError( 207 | "invalid blobs size; expected {0}".format(nwalkers) 208 | ) 209 | if accepted.shape != (nwalkers,): 210 | raise ValueError( 211 | "invalid acceptance size; expected {0}".format(nwalkers) 212 | ) 213 | 214 | def save_step(self, state, accepted): 215 | """Save a step to the backend 216 | 217 | Args: 218 | state (State): The :class:`State` of the ensemble. 219 | accepted (ndarray): An array of boolean flags indicating whether 220 | or not the proposal for each walker was accepted. 221 | 222 | """ 223 | self._check(state, accepted) 224 | 225 | self.chain[self.iteration, :, :] = state.coords 226 | self.log_prob[self.iteration, :] = state.log_prob 227 | if state.blobs is not None: 228 | self.blobs[self.iteration, :] = state.blobs 229 | self.accepted += accepted 230 | self.random_state = state.random_state 231 | self.iteration += 1 232 | 233 | def __enter__(self): 234 | return self 235 | 236 | def __exit__(self, exception_type, exception_value, traceback): 237 | pass 238 | -------------------------------------------------------------------------------- /src/emcee/tests/unit/test_backends.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | from itertools import product 5 | from os.path import join 6 | 7 | import numpy as np 8 | import pytest 9 | 10 | from emcee import EnsembleSampler, State, backends 11 | from emcee.backends.hdf import does_hdf5_support_longdouble 12 | 13 | try: 14 | import h5py 15 | except ImportError: 16 | h5py = None 17 | 18 | __all__ = ["test_backend", "test_reload"] 19 | 20 | all_backends = backends.get_test_backends() 21 | other_backends = all_backends[1:] 22 | dtypes = [None, [("log_prior", float), ("mean", int)]] 23 | 24 | 25 | def normal_log_prob(params): 26 | return -0.5 * np.sum(params**2) 27 | 28 | 29 | def normal_log_prob_blobs(params): 30 | return normal_log_prob(params), 0.1, int(5) 31 | 32 | 33 | def run_sampler( 34 | backend, 35 | nwalkers=32, 36 | ndim=3, 37 | nsteps=25, 38 | seed=1234, 39 | thin_by=1, 40 | dtype=None, 41 | blobs=True, 42 | lp=None, 43 | ): 44 | if lp is None: 45 | lp = normal_log_prob_blobs if blobs else normal_log_prob 46 | if seed is not None: 47 | np.random.seed(seed) 48 | coords = np.random.randn(nwalkers, ndim) 49 | sampler = EnsembleSampler( 50 | nwalkers, ndim, lp, backend=backend, blobs_dtype=dtype 51 | ) 52 | sampler.run_mcmc(coords, nsteps, thin_by=thin_by) 53 | return sampler 54 | 55 | 56 | def _custom_allclose(a, b): 57 | if a.dtype.fields is None: 58 | assert np.allclose(a, b) 59 | else: 60 | for n in a.dtype.names: 61 | assert np.allclose(a[n], b[n]) 62 | 63 | 64 | @pytest.mark.skipif(h5py is None, reason="HDF5 not available") 65 | def test_uninit(tmpdir): 66 | fn = str(tmpdir.join("EMCEE_TEST_FILE_DO_NOT_USE.h5")) 67 | if os.path.exists(fn): 68 | os.remove(fn) 69 | 70 | with backends.HDFBackend(fn) as be: 71 | run_sampler(be) 72 | 73 | assert os.path.exists(fn) 74 | os.remove(fn) 75 | 76 | 77 | @pytest.mark.parametrize("backend", all_backends) 78 | def test_uninit_errors(backend): 79 | with backend() as be: 80 | with pytest.raises(AttributeError): 81 | be.get_last_sample() 82 | 83 | for k in ["chain", "log_prob", "blobs"]: 84 | with pytest.raises(AttributeError): 85 | getattr(be, "get_" + k)() 86 | 87 | 88 | @pytest.mark.parametrize("backend", all_backends) 89 | def test_blob_usage_errors(backend): 90 | with backend() as be: 91 | run_sampler(be, blobs=True) 92 | with pytest.raises(ValueError): 93 | run_sampler(be, blobs=False) 94 | 95 | with backend() as be: 96 | run_sampler(be, blobs=False) 97 | with pytest.raises(ValueError): 98 | run_sampler(be, blobs=True) 99 | 100 | 101 | @pytest.mark.parametrize( 102 | "backend,dtype,blobs", product(other_backends, dtypes, [True, False]) 103 | ) 104 | def test_backend(backend, dtype, blobs): 105 | # Run a sampler with the default backend. 106 | sampler1 = run_sampler(backends.Backend(), dtype=dtype, blobs=blobs) 107 | 108 | with backend() as be: 109 | sampler2 = run_sampler(be, dtype=dtype, blobs=blobs) 110 | 111 | values = ["chain", "log_prob"] 112 | if blobs: 113 | values += ["blobs"] 114 | else: 115 | assert sampler1.get_blobs() is None 116 | assert sampler2.get_blobs() is None 117 | 118 | # Check all of the components. 119 | for k in values: 120 | a = getattr(sampler1, "get_" + k)() 121 | b = getattr(sampler2, "get_" + k)() 122 | _custom_allclose(a, b) 123 | 124 | last1 = sampler1.get_last_sample() 125 | last2 = sampler2.get_last_sample() 126 | assert np.allclose(last1.coords, last2.coords) 127 | assert np.allclose(last1.log_prob, last2.log_prob) 128 | assert all( 129 | np.allclose(l1, l2) 130 | for l1, l2 in zip(last1.random_state[1:], last2.random_state[1:]) 131 | ) 132 | if blobs: 133 | _custom_allclose(last1.blobs, last2.blobs) 134 | else: 135 | assert last1.blobs is None and last2.blobs is None 136 | 137 | a = sampler1.acceptance_fraction 138 | b = sampler2.acceptance_fraction 139 | assert np.allclose(a, b), "inconsistent acceptance fraction" 140 | 141 | 142 | @pytest.mark.parametrize("backend,dtype", product(other_backends, dtypes)) 143 | def test_reload(backend, dtype): 144 | with backend() as backend1: 145 | run_sampler(backend1, dtype=dtype) 146 | 147 | # Test the state 148 | state = backend1.random_state 149 | np.random.set_state(state) 150 | 151 | # Load the file using a new backend object. 152 | backend2 = backends.HDFBackend( 153 | backend1.filename, backend1.name, read_only=True 154 | ) 155 | 156 | with pytest.raises(RuntimeError): 157 | backend2.reset(32, 3) 158 | 159 | assert state[0] == backend2.random_state[0] 160 | assert all( 161 | np.allclose(a, b) 162 | for a, b in zip(state[1:], backend2.random_state[1:]) 163 | ) 164 | 165 | # Check all of the components. 166 | for k in ["chain", "log_prob", "blobs"]: 167 | a = backend1.get_value(k) 168 | b = backend2.get_value(k) 169 | _custom_allclose(a, b) 170 | 171 | last1 = backend1.get_last_sample() 172 | last2 = backend2.get_last_sample() 173 | assert np.allclose(last1.coords, last2.coords) 174 | assert np.allclose(last1.log_prob, last2.log_prob) 175 | assert all( 176 | np.allclose(l1, l2) 177 | for l1, l2 in zip(last1.random_state[1:], last2.random_state[1:]) 178 | ) 179 | _custom_allclose(last1.blobs, last2.blobs) 180 | 181 | a = backend1.accepted 182 | b = backend2.accepted 183 | assert np.allclose(a, b), "inconsistent accepted" 184 | 185 | 186 | @pytest.mark.parametrize("backend,dtype", product(other_backends, dtypes)) 187 | def test_restart(backend, dtype): 188 | # Run a sampler with the default backend. 189 | b = backends.Backend() 190 | run_sampler(b, dtype=dtype) 191 | sampler1 = run_sampler(b, seed=None, dtype=dtype) 192 | 193 | with backend() as be: 194 | run_sampler(be, dtype=dtype) 195 | sampler2 = run_sampler(be, seed=None, dtype=dtype) 196 | 197 | # Check all of the components. 198 | for k in ["chain", "log_prob", "blobs"]: 199 | a = getattr(sampler1, "get_" + k)() 200 | b = getattr(sampler2, "get_" + k)() 201 | _custom_allclose(a, b) 202 | 203 | last1 = sampler1.get_last_sample() 204 | last2 = sampler2.get_last_sample() 205 | assert np.allclose(last1.coords, last2.coords) 206 | assert np.allclose(last1.log_prob, last2.log_prob) 207 | assert all( 208 | np.allclose(l1, l2) 209 | for l1, l2 in zip(last1.random_state[1:], last2.random_state[1:]) 210 | ) 211 | _custom_allclose(last1.blobs, last2.blobs) 212 | 213 | a = sampler1.acceptance_fraction 214 | b = sampler2.acceptance_fraction 215 | assert np.allclose(a, b), "inconsistent acceptance fraction" 216 | 217 | 218 | @pytest.mark.skipif(h5py is None, reason="HDF5 not available") 219 | def test_multi_hdf5(): 220 | with backends.TempHDFBackend() as backend1: 221 | run_sampler(backend1) 222 | 223 | backend2 = backends.HDFBackend(backend1.filename, name="mcmc2") 224 | run_sampler(backend2) 225 | chain2 = backend2.get_chain() 226 | 227 | with h5py.File(backend1.filename, "r") as f: 228 | assert set(f.keys()) == {backend1.name, "mcmc2"} 229 | 230 | backend1.reset(10, 2) 231 | assert np.allclose(backend2.get_chain(), chain2) 232 | with pytest.raises(AttributeError): 233 | backend1.get_chain() 234 | 235 | 236 | @pytest.mark.parametrize("backend", all_backends) 237 | def test_longdouble_preserved(backend): 238 | if ( 239 | issubclass(backend, backends.TempHDFBackend) 240 | and not does_hdf5_support_longdouble() 241 | ): 242 | pytest.xfail("HDF5 does not support long double on this platform") 243 | nwalkers = 10 244 | ndim = 2 245 | nsteps = 5 246 | with backend(dtype=np.longdouble) as b: 247 | b.reset(nwalkers, ndim) 248 | b.grow(nsteps, None) 249 | for i in range(nsteps): 250 | coords = np.zeros((nwalkers, ndim), dtype=np.longdouble) 251 | coords += i + 1 252 | coords += np.arange(nwalkers)[:, None] 253 | coords[:, 1] += coords[:, 0] * 2 * np.finfo(np.longdouble).eps 254 | assert not np.any(coords[:, 1] == coords[:, 0]) 255 | lp = 1 + np.arange(nwalkers) * np.finfo(np.longdouble).eps 256 | state = State(coords, log_prob=lp, random_state=()) 257 | b.save_step(state, np.ones((nwalkers,), dtype=bool)) 258 | s = b.get_last_sample() 259 | # check s has adequate precision and equals state 260 | assert s.coords.dtype == np.longdouble 261 | assert not np.any(s.coords[:, 1] == s.coords[:, 0]) 262 | assert np.all(s.coords == coords) 263 | 264 | assert s.log_prob.dtype == np.longdouble 265 | assert np.all(s.log_prob == lp) 266 | 267 | 268 | @pytest.mark.skipif(h5py is None, reason="HDF5 not available") 269 | def test_hdf5_compression(): 270 | with backends.TempHDFBackend(compression="gzip") as b: 271 | run_sampler(b, blobs=True) 272 | # re-open and read 273 | b.get_chain() 274 | b.get_blobs() 275 | b.get_log_prob() 276 | b.accepted 277 | -------------------------------------------------------------------------------- /src/emcee/backends/hdf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function 4 | 5 | import os 6 | from tempfile import NamedTemporaryFile 7 | 8 | import numpy as np 9 | 10 | from .. import __version__ 11 | from .backend import Backend 12 | 13 | __all__ = ["HDFBackend", "TempHDFBackend", "does_hdf5_support_longdouble"] 14 | 15 | 16 | try: 17 | import h5py 18 | except ImportError: 19 | h5py = None 20 | 21 | 22 | def does_hdf5_support_longdouble(): 23 | if h5py is None: 24 | return False 25 | with NamedTemporaryFile( 26 | prefix="emcee-temporary-hdf5", suffix=".hdf5", delete=False 27 | ) as f: 28 | f.close() 29 | 30 | with h5py.File(f.name, "w") as hf: 31 | g = hf.create_group("group") 32 | g.create_dataset("data", data=np.ones(1, dtype=np.longdouble)) 33 | if g["data"].dtype != np.longdouble: 34 | return False 35 | with h5py.File(f.name, "r") as hf: 36 | if hf["group"]["data"].dtype != np.longdouble: 37 | return False 38 | return True 39 | 40 | 41 | class HDFBackend(Backend): 42 | """A backend that stores the chain in an HDF5 file using h5py 43 | 44 | .. note:: You must install `h5py `_ to use this 45 | backend. 46 | 47 | Args: 48 | filename (str): The name of the HDF5 file where the chain will be 49 | saved. 50 | name (str; optional): The name of the group where the chain will 51 | be saved. 52 | read_only (bool; optional): If ``True``, the backend will throw a 53 | ``RuntimeError`` if the file is opened with write access. 54 | 55 | """ 56 | 57 | def __init__( 58 | self, 59 | filename, 60 | name="mcmc", 61 | read_only=False, 62 | dtype=None, 63 | compression=None, 64 | compression_opts=None, 65 | ): 66 | if h5py is None: 67 | raise ImportError("you must install 'h5py' to use the HDFBackend") 68 | self.filename = filename 69 | self.name = name 70 | self.read_only = read_only 71 | self.compression = compression 72 | self.compression_opts = compression_opts 73 | if dtype is None: 74 | self.dtype_set = False 75 | self.dtype = np.float64 76 | else: 77 | self.dtype_set = True 78 | self.dtype = dtype 79 | 80 | @property 81 | def initialized(self): 82 | if not os.path.exists(self.filename): 83 | return False 84 | try: 85 | with self.open() as f: 86 | return self.name in f 87 | except (OSError, IOError): 88 | return False 89 | 90 | def open(self, mode="r"): 91 | if self.read_only and mode != "r": 92 | raise RuntimeError( 93 | "The backend has been loaded in read-only " 94 | "mode. Set `read_only = False` to make " 95 | "changes." 96 | ) 97 | f = h5py.File(self.filename, mode) 98 | if not self.dtype_set and self.name in f: 99 | g = f[self.name] 100 | if "chain" in g: 101 | self.dtype = g["chain"].dtype 102 | self.dtype_set = True 103 | return f 104 | 105 | def reset(self, nwalkers, ndim): 106 | """Clear the state of the chain and empty the backend 107 | 108 | Args: 109 | nwakers (int): The size of the ensemble 110 | ndim (int): The number of dimensions 111 | 112 | """ 113 | with self.open("a") as f: 114 | if self.name in f: 115 | del f[self.name] 116 | 117 | g = f.create_group(self.name) 118 | g.attrs["version"] = __version__ 119 | g.attrs["nwalkers"] = nwalkers 120 | g.attrs["ndim"] = ndim 121 | g.attrs["has_blobs"] = False 122 | g.attrs["iteration"] = 0 123 | g.create_dataset( 124 | "accepted", 125 | data=np.zeros(nwalkers), 126 | compression=self.compression, 127 | compression_opts=self.compression_opts, 128 | ) 129 | g.create_dataset( 130 | "chain", 131 | (0, nwalkers, ndim), 132 | maxshape=(None, nwalkers, ndim), 133 | dtype=self.dtype, 134 | compression=self.compression, 135 | compression_opts=self.compression_opts, 136 | ) 137 | g.create_dataset( 138 | "log_prob", 139 | (0, nwalkers), 140 | maxshape=(None, nwalkers), 141 | dtype=self.dtype, 142 | compression=self.compression, 143 | compression_opts=self.compression_opts, 144 | ) 145 | 146 | def has_blobs(self): 147 | with self.open() as f: 148 | return f[self.name].attrs["has_blobs"] 149 | 150 | def get_value(self, name, flat=False, thin=1, discard=0): 151 | if not self.initialized: 152 | raise AttributeError( 153 | "You must run the sampler with " 154 | "'store == True' before accessing the " 155 | "results" 156 | ) 157 | with self.open() as f: 158 | g = f[self.name] 159 | iteration = g.attrs["iteration"] 160 | if iteration <= 0: 161 | raise AttributeError( 162 | "You must run the sampler with " 163 | "'store == True' before accessing the " 164 | "results" 165 | ) 166 | 167 | if name == "blobs" and not g.attrs["has_blobs"]: 168 | return None 169 | 170 | v = g[name][discard + thin - 1 : self.iteration : thin] 171 | if flat: 172 | s = list(v.shape[1:]) 173 | s[0] = np.prod(v.shape[:2]) 174 | return v.reshape(s) 175 | return v 176 | 177 | @property 178 | def shape(self): 179 | with self.open() as f: 180 | g = f[self.name] 181 | return g.attrs["nwalkers"], g.attrs["ndim"] 182 | 183 | @property 184 | def iteration(self): 185 | with self.open() as f: 186 | return f[self.name].attrs["iteration"] 187 | 188 | @property 189 | def accepted(self): 190 | with self.open() as f: 191 | return f[self.name]["accepted"][...] 192 | 193 | @property 194 | def random_state(self): 195 | with self.open() as f: 196 | elements = [ 197 | v 198 | for k, v in sorted(f[self.name].attrs.items()) 199 | if k.startswith("random_state_") 200 | ] 201 | return elements if len(elements) else None 202 | 203 | def grow(self, ngrow, blobs): 204 | """Expand the storage space by some number of samples 205 | 206 | Args: 207 | ngrow (int): The number of steps to grow the chain. 208 | blobs: The current array of blobs. This is used to compute the 209 | dtype for the blobs array. 210 | 211 | """ 212 | self._check_blobs(blobs) 213 | 214 | with self.open("a") as f: 215 | g = f[self.name] 216 | ntot = g.attrs["iteration"] + ngrow 217 | g["chain"].resize(ntot, axis=0) 218 | g["log_prob"].resize(ntot, axis=0) 219 | if blobs is not None: 220 | has_blobs = g.attrs["has_blobs"] 221 | if not has_blobs: 222 | nwalkers = g.attrs["nwalkers"] 223 | dt = np.dtype((blobs.dtype, blobs.shape[1:])) 224 | g.create_dataset( 225 | "blobs", 226 | (ntot, nwalkers), 227 | maxshape=(None, nwalkers), 228 | dtype=dt, 229 | compression=self.compression, 230 | compression_opts=self.compression_opts, 231 | ) 232 | else: 233 | g["blobs"].resize(ntot, axis=0) 234 | if g["blobs"].dtype.shape != blobs.shape[1:]: 235 | raise ValueError( 236 | "Existing blobs have shape {} but new blobs " 237 | "requested with shape {}".format( 238 | g["blobs"].dtype.shape, blobs.shape[1:] 239 | ) 240 | ) 241 | g.attrs["has_blobs"] = True 242 | 243 | def save_step(self, state, accepted): 244 | """Save a step to the backend 245 | 246 | Args: 247 | state (State): The :class:`State` of the ensemble. 248 | accepted (ndarray): An array of boolean flags indicating whether 249 | or not the proposal for each walker was accepted. 250 | 251 | """ 252 | self._check(state, accepted) 253 | 254 | with self.open("a") as f: 255 | g = f[self.name] 256 | iteration = g.attrs["iteration"] 257 | 258 | g["chain"][iteration, :, :] = state.coords 259 | g["log_prob"][iteration, :] = state.log_prob 260 | if state.blobs is not None: 261 | g["blobs"][iteration, :] = state.blobs 262 | g["accepted"][:] += accepted 263 | 264 | for i, v in enumerate(state.random_state): 265 | g.attrs["random_state_{0}".format(i)] = v 266 | 267 | g.attrs["iteration"] = iteration + 1 268 | 269 | 270 | class TempHDFBackend(object): 271 | def __init__(self, dtype=None, compression=None, compression_opts=None): 272 | self.dtype = dtype 273 | self.filename = None 274 | self.compression = compression 275 | self.compression_opts = compression_opts 276 | 277 | def __enter__(self): 278 | f = NamedTemporaryFile( 279 | prefix="emcee-temporary-hdf5", suffix=".hdf5", delete=False 280 | ) 281 | f.close() 282 | self.filename = f.name 283 | return HDFBackend( 284 | f.name, 285 | "test", 286 | dtype=self.dtype, 287 | compression=self.compression, 288 | compression_opts=self.compression_opts, 289 | ) 290 | 291 | def __exit__(self, exception_type, exception_value, traceback): 292 | os.remove(self.filename) 293 | -------------------------------------------------------------------------------- /src/emcee/tests/unit/test_sampler.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pickle 4 | from itertools import islice, product 5 | 6 | import numpy as np 7 | import pytest 8 | 9 | from emcee import EnsembleSampler, backends, moves, walkers_independent 10 | 11 | __all__ = ["test_shapes", "test_errors", "test_thin", "test_vectorize"] 12 | 13 | all_backends = backends.get_test_backends() 14 | 15 | 16 | def normal_log_prob(params): 17 | return -0.5 * np.sum(params**2) 18 | 19 | 20 | @pytest.mark.parametrize( 21 | "backend, moves", 22 | product( 23 | all_backends, 24 | [ 25 | None, 26 | moves.GaussianMove(0.5), 27 | [moves.StretchMove(), moves.GaussianMove(0.5)], 28 | [(moves.StretchMove(), 0.3), (moves.GaussianMove(0.5), 0.1)], 29 | ], 30 | ), 31 | ) 32 | def test_shapes(backend, moves, nwalkers=32, ndim=3, nsteps=10, seed=1234): 33 | # Set up the random number generator. 34 | np.random.seed(seed) 35 | 36 | with backend() as be: 37 | # Initialize the ensemble, moves and sampler. 38 | coords = np.random.randn(nwalkers, ndim) 39 | sampler = EnsembleSampler( 40 | nwalkers, ndim, normal_log_prob, moves=moves, backend=be 41 | ) 42 | 43 | # Run the sampler. 44 | sampler.run_mcmc(coords, nsteps) 45 | chain = sampler.get_chain() 46 | assert len(chain) == nsteps, "wrong number of steps" 47 | 48 | tau = sampler.get_autocorr_time(quiet=True) 49 | assert tau.shape == (ndim,) 50 | 51 | # Check the shapes. 52 | with pytest.warns(DeprecationWarning): 53 | assert sampler.chain.shape == ( 54 | nwalkers, 55 | nsteps, 56 | ndim, 57 | ), "incorrect coordinate dimensions" 58 | with pytest.warns(DeprecationWarning): 59 | assert sampler.lnprobability.shape == ( 60 | nwalkers, 61 | nsteps, 62 | ), "incorrect probability dimensions" 63 | assert sampler.get_chain().shape == ( 64 | nsteps, 65 | nwalkers, 66 | ndim, 67 | ), "incorrect coordinate dimensions" 68 | assert sampler.get_log_prob().shape == ( 69 | nsteps, 70 | nwalkers, 71 | ), "incorrect probability dimensions" 72 | 73 | assert sampler.acceptance_fraction.shape == ( 74 | nwalkers, 75 | ), "incorrect acceptance fraction dimensions" 76 | 77 | # Check the shape of the flattened coords. 78 | assert sampler.get_chain(flat=True).shape == ( 79 | nsteps * nwalkers, 80 | ndim, 81 | ), "incorrect coordinate dimensions" 82 | assert sampler.get_log_prob(flat=True).shape == ( 83 | nsteps * nwalkers, 84 | ), "incorrect probability dimensions" 85 | 86 | 87 | @pytest.mark.parametrize("backend", all_backends) 88 | def test_errors(backend, nwalkers=32, ndim=3, nsteps=5, seed=1234): 89 | # Set up the random number generator. 90 | np.random.seed(seed) 91 | 92 | with backend() as be: 93 | # Initialize the ensemble, proposal, and sampler. 94 | coords = np.random.randn(nwalkers, ndim) 95 | sampler = EnsembleSampler(nwalkers, ndim, normal_log_prob, backend=be) 96 | 97 | # Test for not running. 98 | with pytest.raises(AttributeError): 99 | sampler.get_chain() 100 | with pytest.raises(AttributeError): 101 | sampler.get_log_prob() 102 | 103 | # What about not storing the chain. 104 | sampler.run_mcmc(coords, nsteps, store=False) 105 | with pytest.raises(AttributeError): 106 | sampler.get_chain() 107 | 108 | # Now what about if we try to continue using the sampler with an 109 | # ensemble of a different shape. 110 | sampler.run_mcmc(coords, nsteps, store=False) 111 | 112 | coords2 = np.random.randn(nwalkers, ndim + 1) 113 | with pytest.raises(ValueError): 114 | list(sampler.run_mcmc(coords2, nsteps)) 115 | 116 | # Ensure that a warning is logged if the inital coords don't allow 117 | # the chain to explore all of parameter space, and that one is not 118 | # if we explicitly disable it, or the initial coords can. 119 | with pytest.raises(ValueError): 120 | sampler.run_mcmc(np.ones((nwalkers, ndim)), nsteps) 121 | sampler.run_mcmc( 122 | np.ones((nwalkers, ndim)), nsteps, skip_initial_state_check=True 123 | ) 124 | sampler.run_mcmc(np.random.randn(nwalkers, ndim), nsteps) 125 | 126 | 127 | def run_sampler( 128 | backend, 129 | nwalkers=32, 130 | ndim=3, 131 | nsteps=25, 132 | seed=1234, 133 | thin=None, 134 | thin_by=1, 135 | progress=False, 136 | store=True, 137 | ): 138 | np.random.seed(seed) 139 | coords = np.random.randn(nwalkers, ndim) 140 | sampler = EnsembleSampler(nwalkers, ndim, normal_log_prob, backend=backend) 141 | sampler.run_mcmc( 142 | coords, 143 | nsteps, 144 | thin=thin, 145 | thin_by=thin_by, 146 | progress=progress, 147 | store=store, 148 | ) 149 | return sampler 150 | 151 | 152 | @pytest.mark.parametrize("backend", all_backends) 153 | def test_thin(backend): 154 | with backend() as be: 155 | with pytest.raises(ValueError): 156 | with pytest.warns(DeprecationWarning): 157 | run_sampler(be, thin=-1) 158 | with pytest.raises(ValueError): 159 | with pytest.warns(DeprecationWarning): 160 | run_sampler(be, thin=0.1) 161 | thinby = 3 162 | sampler1 = run_sampler(None) 163 | with pytest.warns(DeprecationWarning): 164 | sampler2 = run_sampler(be, thin=thinby) 165 | for k in ["get_chain", "get_log_prob"]: 166 | a = getattr(sampler1, k)()[thinby - 1 :: thinby] 167 | b = getattr(sampler2, k)() 168 | c = getattr(sampler1, k)(thin=thinby) 169 | assert np.allclose(a, b), "inconsistent {0}".format(k) 170 | assert np.allclose(a, c), "inconsistent {0}".format(k) 171 | 172 | 173 | @pytest.mark.parametrize( 174 | "backend,progress", product(all_backends, [True, False]) 175 | ) 176 | def test_thin_by(backend, progress): 177 | with backend() as be: 178 | with pytest.raises(ValueError): 179 | run_sampler(be, thin_by=-1) 180 | with pytest.raises(ValueError): 181 | run_sampler(be, thin_by=0.1) 182 | nsteps = 25 183 | thinby = 3 184 | sampler1 = run_sampler(None, nsteps=nsteps * thinby, progress=progress) 185 | sampler2 = run_sampler( 186 | be, thin_by=thinby, progress=progress, nsteps=nsteps 187 | ) 188 | for k in ["get_chain", "get_log_prob"]: 189 | a = getattr(sampler1, k)()[thinby - 1 :: thinby] 190 | b = getattr(sampler2, k)() 191 | c = getattr(sampler1, k)(thin=thinby) 192 | assert np.allclose(a, b), "inconsistent {0}".format(k) 193 | assert np.allclose(a, c), "inconsistent {0}".format(k) 194 | assert sampler1.iteration == sampler2.iteration * thinby 195 | 196 | 197 | @pytest.mark.parametrize("backend", all_backends) 198 | def test_restart(backend): 199 | with backend() as be: 200 | sampler = run_sampler(be, nsteps=0) 201 | with pytest.raises(ValueError): 202 | sampler.run_mcmc(None, 10) 203 | 204 | sampler = run_sampler(be) 205 | sampler.run_mcmc(None, 10) 206 | 207 | with backend() as be: 208 | sampler = run_sampler(be, store=False) 209 | sampler.run_mcmc(None, 10) 210 | 211 | 212 | def test_vectorize(): 213 | def lp_vec(p): 214 | return -0.5 * np.sum(p**2, axis=1) 215 | 216 | np.random.seed(42) 217 | nwalkers, ndim = 32, 3 218 | coords = np.random.randn(nwalkers, ndim) 219 | sampler = EnsembleSampler(nwalkers, ndim, lp_vec, vectorize=True) 220 | sampler.run_mcmc(coords, 10) 221 | 222 | assert sampler.get_chain().shape == (10, nwalkers, ndim) 223 | 224 | 225 | @pytest.mark.parametrize("backend", all_backends) 226 | def test_pickle(backend): 227 | with backend() as be: 228 | sampler1 = run_sampler(be) 229 | s = pickle.dumps(sampler1, -1) 230 | sampler2 = pickle.loads(s) 231 | for k in ["get_chain", "get_log_prob"]: 232 | a = getattr(sampler1, k)() 233 | b = getattr(sampler2, k)() 234 | assert np.allclose(a, b), "inconsistent {0}".format(k) 235 | 236 | 237 | @pytest.mark.parametrize("nwalkers, ndim", [(10, 2), (20, 5)]) 238 | def test_walkers_dependent_ones(nwalkers, ndim): 239 | assert not walkers_independent(np.ones((nwalkers, ndim))) 240 | 241 | 242 | @pytest.mark.parametrize("nwalkers, ndim", [(10, 11), (2, 3)]) 243 | def test_walkers_dependent_toofew(nwalkers, ndim): 244 | assert not walkers_independent(np.random.randn(nwalkers, ndim)) 245 | 246 | 247 | @pytest.mark.parametrize("nwalkers, ndim", [(10, 2), (20, 5)]) 248 | def test_walkers_independent_randn(nwalkers, ndim): 249 | assert walkers_independent(np.random.randn(nwalkers, ndim)) 250 | 251 | 252 | @pytest.mark.parametrize( 253 | "nwalkers, ndim, offset", [(10, 2, 1e5), (20, 5, 1e10), (30, 10, 1e14)] 254 | ) 255 | def test_walkers_independent_randn_offset(nwalkers, ndim, offset): 256 | assert walkers_independent( 257 | np.random.randn(nwalkers, ndim) + np.ones((nwalkers, ndim)) * offset 258 | ) 259 | 260 | 261 | def test_walkers_dependent_big_offset(): 262 | nwalkers, ndim = 30, 10 263 | offset = 10 / np.finfo(float).eps 264 | assert not walkers_independent( 265 | np.random.randn(nwalkers, ndim) + np.ones((nwalkers, ndim)) * offset 266 | ) 267 | 268 | 269 | def test_walkers_dependent_subtle(): 270 | nwalkers, ndim = 30, 10 271 | w = np.random.randn(nwalkers, ndim) 272 | assert walkers_independent(w) 273 | # random unit vector 274 | p = np.random.randn(ndim) 275 | p /= np.sqrt(np.dot(p, p)) 276 | # project away the direction of p 277 | w -= np.sum(p[None, :] * w, axis=1)[:, None] * p[None, :] 278 | assert not walkers_independent(w) 279 | # shift away from the origin 280 | w += p[None, :] 281 | assert not walkers_independent(w) 282 | 283 | 284 | def test_walkers_almost_dependent(): 285 | nwalkers, ndim = 30, 10 286 | squash = 1e-8 287 | w = np.random.randn(nwalkers, ndim) 288 | assert walkers_independent(w) 289 | # random unit vector 290 | p = np.random.randn(ndim) 291 | p /= np.sqrt(np.dot(p, p)) 292 | # project away the direction of p 293 | proj = np.sum(p[None, :] * w, axis=1)[:, None] * p[None, :] 294 | w -= proj 295 | w += squash * proj 296 | assert not walkers_independent(w) 297 | 298 | 299 | def test_walkers_independent_scaled(): 300 | # Some of these scales will overflow if squared, hee hee 301 | scales = np.array([1, 1e10, 1e100, 1e200, 1e-10, 1e-100, 1e-200]) 302 | ndim = len(scales) 303 | nwalkers = 5 * ndim 304 | w = np.random.randn(nwalkers, ndim) * scales[None, :] 305 | assert walkers_independent(w) 306 | 307 | 308 | @pytest.mark.parametrize( 309 | "nwalkers, ndim, offset", 310 | [ 311 | (10, 2, 1e5), 312 | (20, 5, 1e10), 313 | (30, 10, 1e14), 314 | (40, 15, 0.1 / np.finfo(np.longdouble).eps), 315 | ], 316 | ) 317 | def test_walkers_independent_randn_offset_longdouble(nwalkers, ndim, offset): 318 | assert walkers_independent( 319 | np.random.randn(nwalkers, ndim) 320 | + np.ones((nwalkers, ndim), dtype=np.longdouble) * offset 321 | ) 322 | 323 | 324 | @pytest.mark.parametrize("backend", all_backends) 325 | def test_infinite_iterations_store(backend, nwalkers=32, ndim=3): 326 | with backend() as be: 327 | coords = np.random.randn(nwalkers, ndim) 328 | with pytest.raises(ValueError): 329 | next( 330 | EnsembleSampler( 331 | nwalkers, ndim, normal_log_prob, backend=be 332 | ).sample(coords, iterations=None, store=True) 333 | ) 334 | 335 | 336 | @pytest.mark.parametrize("backend", all_backends) 337 | def test_infinite_iterations(backend, nwalkers=32, ndim=3): 338 | with backend() as be: 339 | coords = np.random.randn(nwalkers, ndim) 340 | for state in islice( 341 | EnsembleSampler( 342 | nwalkers, ndim, normal_log_prob, backend=be 343 | ).sample(coords, iterations=None, store=False), 344 | 10, 345 | ): 346 | pass 347 | -------------------------------------------------------------------------------- /docs/tutorials/parallel.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "(parallel)=\n", 8 | "\n", 9 | "# Parallelization" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "tags": [ 17 | "hide-cell" 18 | ] 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "%config InlineBackend.figure_format = \"retina\"\n", 23 | "\n", 24 | "from matplotlib import rcParams\n", 25 | "\n", 26 | "rcParams[\"savefig.dpi\"] = 100\n", 27 | "rcParams[\"figure.dpi\"] = 100\n", 28 | "rcParams[\"font.size\"] = 20\n", 29 | "\n", 30 | "import multiprocessing\n", 31 | "\n", 32 | "multiprocessing.set_start_method(\"fork\")" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | ":::{note}\n", 40 | "Some builds of NumPy (including the version included with Anaconda) will automatically parallelize some operations using something like the MKL linear algebra. This can cause problems when used with the parallelization methods described here so it can be good to turn that off (by setting the environment variable `OMP_NUM_THREADS=1`, for example).\n", 41 | ":::" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "import os\n", 51 | "\n", 52 | "os.environ[\"OMP_NUM_THREADS\"] = \"1\"" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "With emcee, it's easy to make use of multiple CPUs to speed up slow sampling.\n", 60 | "There will always be some computational overhead introduced by parallelization so it will only be beneficial in the case where the model is expensive, but this is often true for real research problems.\n", 61 | "All parallelization techniques are accessed using the `pool` keyword argument in the :class:`EnsembleSampler` class but, depending on your system and your model, there are a few pool options that you can choose from.\n", 62 | "In general, a `pool` is any Python object with a `map` method that can be used to apply a function to a list of numpy arrays.\n", 63 | "Below, we will discuss a few options." 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "In all of the following examples, we'll test the code with the following convoluted model:" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "import time\n", 80 | "import numpy as np\n", 81 | "\n", 82 | "\n", 83 | "def log_prob(theta):\n", 84 | " t = time.time() + np.random.uniform(0.005, 0.008)\n", 85 | " while True:\n", 86 | " if time.time() >= t:\n", 87 | " break\n", 88 | " return -0.5 * np.sum(theta**2)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "This probability function will randomly sleep for a fraction of a second every time it is called.\n", 96 | "This is meant to emulate a more realistic situation where the model is computationally expensive to compute.\n", 97 | "\n", 98 | "To start, let's sample the usual (serial) way:" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 4, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "name": "stderr", 108 | "output_type": "stream", 109 | "text": [ 110 | "100%|██████████| 100/100 [00:21<00:00, 4.71it/s]" 111 | ] 112 | }, 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "Serial took 21.5 seconds\n" 118 | ] 119 | }, 120 | { 121 | "name": "stderr", 122 | "output_type": "stream", 123 | "text": [ 124 | "\n" 125 | ] 126 | } 127 | ], 128 | "source": [ 129 | "import emcee\n", 130 | "\n", 131 | "np.random.seed(42)\n", 132 | "initial = np.random.randn(32, 5)\n", 133 | "nwalkers, ndim = initial.shape\n", 134 | "nsteps = 100\n", 135 | "\n", 136 | "sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob)\n", 137 | "start = time.time()\n", 138 | "sampler.run_mcmc(initial, nsteps, progress=True)\n", 139 | "end = time.time()\n", 140 | "serial_time = end - start\n", 141 | "print(\"Serial took {0:.1f} seconds\".format(serial_time))" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "## Multiprocessing\n", 149 | "\n", 150 | "The simplest method of parallelizing emcee is to use the [multiprocessing module from the standard library](https://docs.python.org/3/library/multiprocessing.html).\n", 151 | "To parallelize the above sampling, you could update the code as follows:" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 5, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "name": "stderr", 161 | "output_type": "stream", 162 | "text": [ 163 | "100%|██████████| 100/100 [00:06<00:00, 15.65it/s]" 164 | ] 165 | }, 166 | { 167 | "name": "stdout", 168 | "output_type": "stream", 169 | "text": [ 170 | "Multiprocessing took 6.5 seconds\n", 171 | "3.3 times faster than serial\n" 172 | ] 173 | }, 174 | { 175 | "name": "stderr", 176 | "output_type": "stream", 177 | "text": [ 178 | "\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "from multiprocessing import Pool\n", 184 | "\n", 185 | "with Pool() as pool:\n", 186 | " sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob, pool=pool)\n", 187 | " start = time.time()\n", 188 | " sampler.run_mcmc(initial, nsteps, progress=True)\n", 189 | " end = time.time()\n", 190 | " multi_time = end - start\n", 191 | " print(\"Multiprocessing took {0:.1f} seconds\".format(multi_time))\n", 192 | " print(\"{0:.1f} times faster than serial\".format(serial_time / multi_time))" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "I have 4 cores on the machine where this is being tested:" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 6, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "4 CPUs\n" 212 | ] 213 | } 214 | ], 215 | "source": [ 216 | "from multiprocessing import cpu_count\n", 217 | "\n", 218 | "ncpu = cpu_count()\n", 219 | "print(\"{0} CPUs\".format(ncpu))" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "We don't quite get the factor of 4 runtime decrease that you might expect because there is some overhead in the parallelization, but we're getting pretty close with this example and this will get even closer for more expensive models." 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "## MPI\n", 234 | "\n", 235 | "Multiprocessing can only be used for distributing calculations across processors on one machine.\n", 236 | "If you want to take advantage of a bigger cluster, you'll need to use MPI.\n", 237 | "In that case, you need to execute the code using the `mpiexec` executable, so this demo is slightly more convoluted.\n", 238 | "For this example, we'll write the code to a file called `script.py` and then execute it using MPI, but when you really use the MPI pool, you'll probably just want to edit the script directly.\n", 239 | "To run this example, you'll first need to install [the schwimmbad library](https://github.com/adrn/schwimmbad) because emcee no longer includes its own `MPIPool`." 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 7, 245 | "metadata": {}, 246 | "outputs": [ 247 | { 248 | "name": "stdout", 249 | "output_type": "stream", 250 | "text": [ 251 | "MPI took 8.9 seconds\n", 252 | "2.4 times faster than serial\n" 253 | ] 254 | } 255 | ], 256 | "source": [ 257 | "with open(\"script.py\", \"w\") as f:\n", 258 | " f.write(\n", 259 | " \"\"\"\n", 260 | "import sys\n", 261 | "import time\n", 262 | "import emcee\n", 263 | "import numpy as np\n", 264 | "from schwimmbad import MPIPool\n", 265 | "\n", 266 | "def log_prob(theta):\n", 267 | " t = time.time() + np.random.uniform(0.005, 0.008)\n", 268 | " while True:\n", 269 | " if time.time() >= t:\n", 270 | " break\n", 271 | " return -0.5*np.sum(theta**2)\n", 272 | "\n", 273 | "with MPIPool() as pool:\n", 274 | " if not pool.is_master():\n", 275 | " pool.wait()\n", 276 | " sys.exit(0)\n", 277 | "\n", 278 | " np.random.seed(42)\n", 279 | " initial = np.random.randn(32, 5)\n", 280 | " nwalkers, ndim = initial.shape\n", 281 | " nsteps = 100\n", 282 | "\n", 283 | " sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob, pool=pool)\n", 284 | " start = time.time()\n", 285 | " sampler.run_mcmc(initial, nsteps)\n", 286 | " end = time.time()\n", 287 | " print(end - start)\n", 288 | "\"\"\"\n", 289 | " )\n", 290 | "\n", 291 | "mpi_time = !mpiexec -n {ncpu} python script.py\n", 292 | "mpi_time = float(mpi_time[0])\n", 293 | "print(\"MPI took {0:.1f} seconds\".format(mpi_time))\n", 294 | "print(\"{0:.1f} times faster than serial\".format(serial_time / mpi_time))" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "There is often more overhead introduced by MPI than multiprocessing so we get less of a gain this time.\n", 302 | "That being said, MPI is much more flexible and it can be used to scale to huge systems." 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "## Pickling, data transfer & arguments\n", 310 | "\n", 311 | "All parallel Python implementations work by spinning up multiple `python` processes with identical environments then and passing information between the processes using `pickle`.\n", 312 | "This means that the probability function [must be picklable](https://docs.python.org/3/library/pickle.html#pickle-picklable).\n", 313 | "\n", 314 | "Some users might hit issues when they use `args` to pass data to their model.\n", 315 | "These args must be pickled and passed every time the model is called.\n", 316 | "This can be a problem if you have a large dataset, as you can see here:" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 8, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "name": "stderr", 326 | "output_type": "stream", 327 | "text": [ 328 | "100%|██████████| 100/100 [00:21<00:00, 4.70it/s]" 329 | ] 330 | }, 331 | { 332 | "name": "stdout", 333 | "output_type": "stream", 334 | "text": [ 335 | "Serial took 21.5 seconds\n" 336 | ] 337 | }, 338 | { 339 | "name": "stderr", 340 | "output_type": "stream", 341 | "text": [ 342 | "\n" 343 | ] 344 | } 345 | ], 346 | "source": [ 347 | "def log_prob_data(theta, data):\n", 348 | " a = data[0] # Use the data somehow...\n", 349 | " t = time.time() + np.random.uniform(0.005, 0.008)\n", 350 | " while True:\n", 351 | " if time.time() >= t:\n", 352 | " break\n", 353 | " return -0.5 * np.sum(theta**2)\n", 354 | "\n", 355 | "\n", 356 | "data = np.random.randn(5000, 200)\n", 357 | "\n", 358 | "sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob_data, args=(data,))\n", 359 | "start = time.time()\n", 360 | "sampler.run_mcmc(initial, nsteps, progress=True)\n", 361 | "end = time.time()\n", 362 | "serial_data_time = end - start\n", 363 | "print(\"Serial took {0:.1f} seconds\".format(serial_data_time))" 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": {}, 369 | "source": [ 370 | "We basically get no change in performance when we include the `data` argument here.\n", 371 | "Now let's try including this naively using multiprocessing:" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 9, 377 | "metadata": {}, 378 | "outputs": [ 379 | { 380 | "name": "stderr", 381 | "output_type": "stream", 382 | "text": [ 383 | "100%|██████████| 100/100 [01:05<00:00, 1.52it/s]" 384 | ] 385 | }, 386 | { 387 | "name": "stdout", 388 | "output_type": "stream", 389 | "text": [ 390 | "Multiprocessing took 66.0 seconds\n", 391 | "0.3 times faster(?) than serial\n" 392 | ] 393 | }, 394 | { 395 | "name": "stderr", 396 | "output_type": "stream", 397 | "text": [ 398 | "\n" 399 | ] 400 | } 401 | ], 402 | "source": [ 403 | "with Pool() as pool:\n", 404 | " sampler = emcee.EnsembleSampler(\n", 405 | " nwalkers, ndim, log_prob_data, pool=pool, args=(data,)\n", 406 | " )\n", 407 | " start = time.time()\n", 408 | " sampler.run_mcmc(initial, nsteps, progress=True)\n", 409 | " end = time.time()\n", 410 | " multi_data_time = end - start\n", 411 | " print(\"Multiprocessing took {0:.1f} seconds\".format(multi_data_time))\n", 412 | " print(\n", 413 | " \"{0:.1f} times faster(?) than serial\".format(\n", 414 | " serial_data_time / multi_data_time\n", 415 | " )\n", 416 | " )" 417 | ] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": {}, 422 | "source": [ 423 | "Brutal.\n", 424 | "\n", 425 | "We can do better than that though.\n", 426 | "It's a bit ugly, but if we just make `data` a global variable and use that variable within the model calculation, then we take no hit at all." 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": 10, 432 | "metadata": {}, 433 | "outputs": [ 434 | { 435 | "name": "stderr", 436 | "output_type": "stream", 437 | "text": [ 438 | "100%|██████████| 100/100 [00:06<00:00, 14.60it/s]" 439 | ] 440 | }, 441 | { 442 | "name": "stdout", 443 | "output_type": "stream", 444 | "text": [ 445 | "Multiprocessing took 6.9 seconds\n", 446 | "3.1 times faster than serial\n" 447 | ] 448 | }, 449 | { 450 | "name": "stderr", 451 | "output_type": "stream", 452 | "text": [ 453 | "\n" 454 | ] 455 | } 456 | ], 457 | "source": [ 458 | "def log_prob_data_global(theta):\n", 459 | " a = data[0] # Use the data somehow...\n", 460 | " t = time.time() + np.random.uniform(0.005, 0.008)\n", 461 | " while True:\n", 462 | " if time.time() >= t:\n", 463 | " break\n", 464 | " return -0.5 * np.sum(theta**2)\n", 465 | "\n", 466 | "\n", 467 | "with Pool() as pool:\n", 468 | " sampler = emcee.EnsembleSampler(\n", 469 | " nwalkers, ndim, log_prob_data_global, pool=pool\n", 470 | " )\n", 471 | " start = time.time()\n", 472 | " sampler.run_mcmc(initial, nsteps, progress=True)\n", 473 | " end = time.time()\n", 474 | " multi_data_global_time = end - start\n", 475 | " print(\n", 476 | " \"Multiprocessing took {0:.1f} seconds\".format(multi_data_global_time)\n", 477 | " )\n", 478 | " print(\n", 479 | " \"{0:.1f} times faster than serial\".format(\n", 480 | " serial_data_time / multi_data_global_time\n", 481 | " )\n", 482 | " )" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "That's better!\n", 490 | "This works because, in the global variable case, the dataset is only pickled and passed between processes once (when the pool is created) instead of once for every model evaluation." 491 | ] 492 | } 493 | ], 494 | "metadata": { 495 | "kernelspec": { 496 | "display_name": "Python 3", 497 | "language": "python", 498 | "name": "python3" 499 | }, 500 | "language_info": { 501 | "codemirror_mode": { 502 | "name": "ipython", 503 | "version": 3 504 | }, 505 | "file_extension": ".py", 506 | "mimetype": "text/x-python", 507 | "name": "python", 508 | "nbconvert_exporter": "python", 509 | "pygments_lexer": "ipython3", 510 | "version": "3.9.4" 511 | } 512 | }, 513 | "nbformat": 4, 514 | "nbformat_minor": 4 515 | } 516 | -------------------------------------------------------------------------------- /joss/paper.tex: -------------------------------------------------------------------------------- 1 | \documentclass[10pt,a4paper,onecolumn]{article} 2 | \usepackage{marginnote} 3 | \usepackage{graphicx} 4 | \usepackage{xcolor} 5 | \usepackage{authblk,etoolbox} 6 | \usepackage{titlesec} 7 | \usepackage{calc} 8 | \usepackage{tikz} 9 | \usepackage{hyperref} 10 | \hypersetup{colorlinks,breaklinks=true, 11 | urlcolor=[rgb]{0.0, 0.5, 1.0}, 12 | linkcolor=[rgb]{0.0, 0.5, 1.0}} 13 | \usepackage{caption} 14 | \usepackage{tcolorbox} 15 | \usepackage{amssymb,amsmath} 16 | \usepackage{ifxetex,ifluatex} 17 | \usepackage{seqsplit} 18 | \usepackage{xstring} 19 | 20 | \usepackage{float} 21 | \let\origfigure\figure 22 | \let\endorigfigure\endfigure 23 | \renewenvironment{figure}[1][2] { 24 | \expandafter\origfigure\expandafter[H] 25 | } { 26 | \endorigfigure 27 | } 28 | 29 | \usepackage{fixltx2e} % provides \textsubscript 30 | \usepackage[ 31 | backend=biber, 32 | % style=alphabetic, 33 | % citestyle=numeric 34 | ]{biblatex} 35 | \bibliography{paper.bib} 36 | 37 | % --- Splitting \texttt -------------------------------------------------- 38 | 39 | \let\textttOrig=\texttt 40 | \def\texttt#1{\expandafter\textttOrig{\seqsplit{#1}}} 41 | \renewcommand{\seqinsert}{\ifmmode 42 | \allowbreak 43 | \else\penalty6000\hspace{0pt plus 0.02em}\fi} 44 | 45 | 46 | % --- Pandoc does not distinguish between links like [foo](bar) and 47 | % --- [foo](foo) -- a simplistic Markdown model. However, this is 48 | % --- wrong: in links like [foo](foo) the text is the url, and must 49 | % --- be split correspondingly. 50 | % --- Here we detect links \href{foo}{foo}, and also links starting 51 | % --- with https://doi.org, and use path-like splitting (but not 52 | % --- escaping!) with these links. 53 | % --- Another vile thing pandoc does is the different escaping of 54 | % --- foo and bar. This may confound our detection. 55 | % --- This problem we do not try to solve at present, with the exception 56 | % --- of doi-like urls, which we detect correctly. 57 | 58 | 59 | \makeatletter 60 | \let\href@Orig=\href 61 | \def\href@Urllike#1#2{\href@Orig{#1}{\begingroup 62 | \def\Url@String{#2}\Url@FormatString 63 | \endgroup}} 64 | \def\href@Notdoi#1#2{\def\tempa{#1}\def\tempb{#2}% 65 | \ifx\tempa\tempb\relax\href@Urllike{#1}{#2}\else 66 | \href@Orig{#1}{#2}\fi} 67 | \def\href#1#2{% 68 | \IfBeginWith{#1}{https://doi.org}% 69 | {\href@Urllike{#1}{#2}}{\href@Notdoi{#1}{#2}}} 70 | \makeatother 71 | 72 | 73 | % --- Page layout ------------------------------------------------------------- 74 | \usepackage[top=3.5cm, bottom=3cm, right=1.5cm, left=1.0cm, 75 | headheight=2.2cm, reversemp, includemp, marginparwidth=4.5cm]{geometry} 76 | 77 | % --- Default font ------------------------------------------------------------ 78 | \renewcommand\familydefault{\sfdefault} 79 | 80 | % --- Style ------------------------------------------------------------------- 81 | \renewcommand{\bibfont}{\small \sffamily} 82 | \renewcommand{\captionfont}{\small\sffamily} 83 | \renewcommand{\captionlabelfont}{\bfseries} 84 | 85 | % --- Section/SubSection/SubSubSection ---------------------------------------- 86 | \titleformat{\section} 87 | {\normalfont\sffamily\Large\bfseries} 88 | {}{0pt}{} 89 | \titleformat{\subsection} 90 | {\normalfont\sffamily\large\bfseries} 91 | {}{0pt}{} 92 | \titleformat{\subsubsection} 93 | {\normalfont\sffamily\bfseries} 94 | {}{0pt}{} 95 | \titleformat*{\paragraph} 96 | {\sffamily\normalsize} 97 | 98 | 99 | % --- Header / Footer --------------------------------------------------------- 100 | \usepackage{fancyhdr} 101 | \pagestyle{fancy} 102 | \fancyhf{} 103 | %\renewcommand{\headrulewidth}{0.50pt} 104 | \renewcommand{\headrulewidth}{0pt} 105 | \fancyhead[L]{\hspace{-0.75cm}\includegraphics[width=5.5cm]{logo.png}} 106 | \fancyhead[C]{} 107 | \fancyhead[R]{} 108 | \renewcommand{\footrulewidth}{0.25pt} 109 | 110 | \fancyfoot[L]{\parbox[t]{0.98\headwidth}{\footnotesize{\sffamily Foreman-Mackey, (2019). emcee v3: A Python ensemble sampling toolkit for affine-invariant MCMC. \textit{Journal of Open Source Software}, 43(4), 1864. \url{https://doi.org/10.21105/joss.01864}}}} 111 | 112 | 113 | \fancyfoot[R]{\sffamily \thepage} 114 | \makeatletter 115 | \let\ps@plain\ps@fancy 116 | \fancyheadoffset[L]{4.5cm} 117 | \fancyfootoffset[L]{4.5cm} 118 | 119 | % --- Macros --------- 120 | 121 | \definecolor{linky}{rgb}{0.0, 0.5, 1.0} 122 | 123 | \newtcolorbox{repobox} 124 | {colback=red, colframe=red!75!black, 125 | boxrule=0.5pt, arc=2pt, left=6pt, right=6pt, top=3pt, bottom=3pt} 126 | 127 | \newcommand{\ExternalLink}{% 128 | \tikz[x=1.2ex, y=1.2ex, baseline=-0.05ex]{% 129 | \begin{scope}[x=1ex, y=1ex] 130 | \clip (-0.1,-0.1) 131 | --++ (-0, 1.2) 132 | --++ (0.6, 0) 133 | --++ (0, -0.6) 134 | --++ (0.6, 0) 135 | --++ (0, -1); 136 | \path[draw, 137 | line width = 0.5, 138 | rounded corners=0.5] 139 | (0,0) rectangle (1,1); 140 | \end{scope} 141 | \path[draw, line width = 0.5] (0.5, 0.5) 142 | -- (1, 1); 143 | \path[draw, line width = 0.5] (0.6, 1) 144 | -- (1, 1) -- (1, 0.6); 145 | } 146 | } 147 | 148 | % --- Title / Authors --------------------------------------------------------- 149 | % patch \maketitle so that it doesn't center 150 | \patchcmd{\@maketitle}{center}{flushleft}{}{} 151 | \patchcmd{\@maketitle}{center}{flushleft}{}{} 152 | % patch \maketitle so that the font size for the title is normal 153 | \patchcmd{\@maketitle}{\LARGE}{\LARGE\sffamily}{}{} 154 | % patch the patch by authblk so that the author block is flush left 155 | \def\maketitle{{% 156 | \renewenvironment{tabular}[2][] 157 | {\begin{flushleft}} 158 | {\end{flushleft}} 159 | \AB@maketitle}} 160 | \makeatletter 161 | \renewcommand\AB@affilsepx{ \protect\Affilfont} 162 | %\renewcommand\AB@affilnote[1]{{\bfseries #1}\hspace{2pt}} 163 | \renewcommand\AB@affilnote[1]{{\bfseries #1}\hspace{3pt}} 164 | \renewcommand{\affil}[2][]% 165 | {\newaffiltrue\let\AB@blk@and\AB@pand 166 | \if\relax#1\relax\def\AB@note{\AB@thenote}\else\def\AB@note{#1}% 167 | \setcounter{Maxaffil}{0}\fi 168 | \begingroup 169 | \let\href=\href@Orig 170 | \let\texttt=\textttOrig 171 | \let\protect\@unexpandable@protect 172 | \def\thanks{\protect\thanks}\def\footnote{\protect\footnote}% 173 | \@temptokena=\expandafter{\AB@authors}% 174 | {\def\\{\protect\\\protect\Affilfont}\xdef\AB@temp{#2}}% 175 | \xdef\AB@authors{\the\@temptokena\AB@las\AB@au@str 176 | \protect\\[\affilsep]\protect\Affilfont\AB@temp}% 177 | \gdef\AB@las{}\gdef\AB@au@str{}% 178 | {\def\\{, \ignorespaces}\xdef\AB@temp{#2}}% 179 | \@temptokena=\expandafter{\AB@affillist}% 180 | \xdef\AB@affillist{\the\@temptokena \AB@affilsep 181 | \AB@affilnote{\AB@note}\protect\Affilfont\AB@temp}% 182 | \endgroup 183 | \let\AB@affilsep\AB@affilsepx 184 | } 185 | \makeatother 186 | \renewcommand\Authfont{\sffamily\bfseries} 187 | \renewcommand\Affilfont{\sffamily\small\mdseries} 188 | \setlength{\affilsep}{1em} 189 | 190 | 191 | \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex 192 | \usepackage[T1]{fontenc} 193 | \usepackage[utf8]{inputenc} 194 | 195 | \else % if luatex or xelatex 196 | \ifxetex 197 | \usepackage{mathspec} 198 | \usepackage{fontspec} 199 | 200 | \else 201 | \usepackage{fontspec} 202 | \fi 203 | \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase} 204 | 205 | \fi 206 | % use upquote if available, for straight quotes in verbatim environments 207 | \IfFileExists{upquote.sty}{\usepackage{upquote}}{} 208 | % use microtype if available 209 | \IfFileExists{microtype.sty}{% 210 | \usepackage{microtype} 211 | \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts 212 | }{} 213 | 214 | \usepackage{hyperref} 215 | \hypersetup{unicode=true, 216 | pdftitle={emcee v3: A Python ensemble sampling toolkit for affine-invariant MCMC}, 217 | pdfborder={0 0 0}, 218 | breaklinks=true} 219 | \urlstyle{same} % don't use monospace font for urls 220 | 221 | % --- We redefined \texttt, but in sections and captions we want the 222 | % --- old definition 223 | \let\addcontentslineOrig=\addcontentsline 224 | \def\addcontentsline#1#2#3{\bgroup 225 | \let\texttt=\textttOrig\addcontentslineOrig{#1}{#2}{#3}\egroup} 226 | \let\markbothOrig\markboth 227 | \def\markboth#1#2{\bgroup 228 | \let\texttt=\textttOrig\markbothOrig{#1}{#2}\egroup} 229 | \let\markrightOrig\markright 230 | \def\markright#1{\bgroup 231 | \let\texttt=\textttOrig\markrightOrig{#1}\egroup} 232 | 233 | 234 | \usepackage{graphicx,grffile} 235 | \makeatletter 236 | \def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi} 237 | \def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi} 238 | \makeatother 239 | % Scale images if necessary, so that they will not overflow the page 240 | % margins by default, and it is still possible to overwrite the defaults 241 | % using explicit options in \includegraphics[width, height, ...]{} 242 | \setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio} 243 | \IfFileExists{parskip.sty}{% 244 | \usepackage{parskip} 245 | }{% else 246 | \setlength{\parindent}{0pt} 247 | \setlength{\parskip}{6pt plus 2pt minus 1pt} 248 | } 249 | \setlength{\emergencystretch}{3em} % prevent overfull lines 250 | \providecommand{\tightlist}{% 251 | \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} 252 | \setcounter{secnumdepth}{0} 253 | % Redefines (sub)paragraphs to behave more like sections 254 | \ifx\paragraph\undefined\else 255 | \let\oldparagraph\paragraph 256 | \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}} 257 | \fi 258 | \ifx\subparagraph\undefined\else 259 | \let\oldsubparagraph\subparagraph 260 | \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}} 261 | \fi 262 | 263 | \title{emcee v3: A Python ensemble sampling toolkit for affine-invariant MCMC} 264 | 265 | \author[1]{Daniel Foreman-Mackey} 266 | \author[1, 2]{Will M. Farr} 267 | \author[3, 4]{Manodeep Sinha} 268 | \author[5]{Anne M. Archibald} 269 | \author[1, 6]{David W. Hogg} 270 | \author[7]{Jeremy S. Sanders} 271 | \author[8]{Joe Zuntz} 272 | \author[9, 10]{Peter K. G. Williams} 273 | \author[11]{Andrew R. J. Nelson} 274 | \author[12]{Miguel de Val-Borro} 275 | \author[13]{Tobias Erhardt} 276 | \author[14]{Ilya Pashchenko} 277 | \author[15]{Oriol Abril Pla} 278 | 279 | \affil[1]{Center for Computational Astrophysics, Flatiron Institute} 280 | \affil[2]{Department of Physics and Astronomy, Stony Brook University, United 281 | States} 282 | \affil[3]{Centre for Astrophysics \& Supercomputing, Swinburne University of 283 | Technology, Australia} 284 | \affil[4]{ARC Centre of Excellence for All Sky Astrophysics in 3 Dimensions (ASTRO 285 | 3D)} 286 | \affil[5]{University of Newcastle} 287 | \affil[6]{Center for Cosmology and Particle Physics, Department of Physics, New 288 | York University} 289 | \affil[7]{Max Planck Institute for Extraterrestrial Physics} 290 | \affil[8]{Institute for Astronomy, University of Edinburgh, Edinburgh, EH9 3HJ, UK} 291 | \affil[9]{Center for Astrophysics \textbar{} Harvard \& Smithsonian} 292 | \affil[10]{American Astronomical Society} 293 | \affil[11]{Australian Nuclear Science and Technology Organisation, NSW, Australia} 294 | \affil[12]{Planetary Science Institute, 1700 East Fort Lowell Rd., Suite 106, 295 | Tucson, AZ 85719, USA} 296 | \affil[13]{Climate and Environmental Physics and Oeschger Center for Climate Change 297 | Research, University of Bern, Bern, Switzerland} 298 | \affil[14]{P.N. Lebedev Physical Institute of the Russian Academy of Sciences, 299 | Moscow, Russia} 300 | \affil[15]{Universitat Pompeu Fabra, Barcelona} 301 | \date{\vspace{-7ex}} 302 | 303 | \begin{document} 304 | \maketitle 305 | 306 | \marginpar{ 307 | 308 | \begin{flushleft} 309 | %\hrule 310 | \sffamily\small 311 | 312 | {\bfseries DOI:} \href{https://doi.org/10.21105/joss.01864}{\color{linky}{10.21105/joss.01864}} 313 | 314 | \vspace{2mm} 315 | 316 | {\bfseries Software} 317 | \begin{itemize} 318 | \setlength\itemsep{0em} 319 | \item \href{https://github.com/openjournals/joss-reviews/issues/1864}{\color{linky}{Review}} \ExternalLink 320 | \item \href{https://github.com/dfm/emcee}{\color{linky}{Repository}} \ExternalLink 321 | \item \href{https://doi.org/10.5281/zenodo.3543502}{\color{linky}{Archive}} \ExternalLink 322 | \end{itemize} 323 | 324 | \vspace{2mm} 325 | 326 | \par\noindent\hrulefill\par 327 | 328 | \vspace{2mm} 329 | 330 | {\bfseries Editor:} \href{http://juanjobazan.com}{Juanjo Bazán} \ExternalLink \\ 331 | \vspace{1mm} 332 | {\bfseries Reviewers:} 333 | \begin{itemize} 334 | \setlength\itemsep{0em} 335 | \item \href{https://github.com/benjaminrose}{@benjaminrose} 336 | \item \href{https://github.com/mattpitkin}{@mattpitkin} 337 | \end{itemize} 338 | \vspace{2mm} 339 | 340 | {\bfseries Submitted:} 28 October 2019\\ 341 | {\bfseries Published:} 17 November 2019 342 | 343 | \vspace{2mm} 344 | {\bfseries License}\\ 345 | Authors of papers retain copyright and release the work under a Creative Commons Attribution 4.0 International License (\href{http://creativecommons.org/licenses/by/4.0/}{\color{linky}{CC-BY}}). 346 | 347 | 348 | \end{flushleft} 349 | } 350 | 351 | \hypertarget{summary}{% 352 | \section{Summary}\label{summary}} 353 | 354 | \texttt{emcee} is a Python library implementing a class of 355 | affine-invariant ensemble samplers for Markov chain Monte Carlo (MCMC). 356 | This package has been widely applied to probabilistic modeling problems 357 | in astrophysics where it was originally published (Foreman-Mackey, Hogg, 358 | Lang, \& Goodman, 2013), with some applications in other fields. When it 359 | was first released in 2012, the interface implemented in \texttt{emcee} 360 | was fundamentally different from the MCMC libraries that were popular at 361 | the time, such as \texttt{PyMC}, because it was specifically designed to 362 | work with ``black box'' models instead of structured graphical models. 363 | This has been a popular interface for applications in astrophysics 364 | because it is often non-trivial to implement realistic physics within 365 | the modeling frameworks required by other libraries. Since 366 | \texttt{emcee}'s release, other libraries have been developed with 367 | similar interfaces, such as \texttt{dynesty} (Speagle, 2019). The 368 | version 3.0 release of \texttt{emcee} is the first major release of the 369 | library in about 6 years and it includes a full re-write of the 370 | computational backend, several commonly requested features, and a set of 371 | new ``move'' implementations. 372 | 373 | This new release includes both small quality of life improvements---like 374 | a progress bar using \href{https://tqdm.github.io}{\texttt{tqdm}}---and 375 | larger features. For example, the new \texttt{backends} interface 376 | implements real time serialization of sampling results. By default 377 | \texttt{emcee} saves its results in memory (as in the original 378 | implementation), but it now also includes a \texttt{HDFBackend} class 379 | that serializes the chain to disk using 380 | \href{https://www.h5py.org}{h5py}. 381 | 382 | The most important new feature included in the version 3.0 release of 383 | \texttt{emcee} is the new \texttt{moves} interface. Originally, 384 | \texttt{emcee} implemented the affine-invariant ``stretch move'' 385 | proposed by Goodman \& Weare (2010), but there are other ensemble 386 | proposals that can get better performance for certain applications. 387 | \texttt{emcee} now includes implementations of several other ensemble 388 | moves and an interface for defining custom proposals. The implemented 389 | moves include: 390 | 391 | \begin{itemize} 392 | \tightlist 393 | \item 394 | The ``stretch move'' proposed by Goodman \& Weare (2010), 395 | \item 396 | The ``differential evolution'' and ``differential evolution snooker 397 | update'' moves (ter Braak, 2006; ter Braak \& Vrugt, 2008), and 398 | \item 399 | A ``kernel density proposal'' based on the implementation in 400 | \href{https://github.com/bfarr/kombine}{the \texttt{kombine} library} 401 | (Farr \& Farr, 2015). 402 | \end{itemize} 403 | 404 | \texttt{emcee} has been widely used and the original paper has been 405 | highly cited, but there have been many contributions from members of the 406 | community. This paper is meant to highlight these contributions and 407 | provide citation credit to the academic contributors. A full up-to-date 408 | list of contributors can always be found 409 | \href{https://github.com/dfm/emcee/graphs/contributors}{on GitHub}. 410 | 411 | \hypertarget{references}{% 412 | \section*{References}\label{references}} 413 | \addcontentsline{toc}{section}{References} 414 | 415 | \hypertarget{refs}{} 416 | \leavevmode\hypertarget{ref-Farr:2015}{}% 417 | Farr, B., \& Farr, W. M. (2015). Kombine: A kernel-density-based, 418 | embarrassingly parallel ensemble sampler. Retrieved from 419 | \url{https://github.com/bfarr/kombine} 420 | 421 | \leavevmode\hypertarget{ref-Foreman-Mackey:2013}{}% 422 | Foreman-Mackey, D., Hogg, D. W., Lang, D., \& Goodman, J. (2013). emcee: 423 | The MCMC Hammer. \emph{Publications of the Astronomical Society of the 424 | Pacific}, \emph{125}(925), 306. 425 | doi:\href{https://doi.org/10.1086/670067}{10.1086/670067} 426 | 427 | \leavevmode\hypertarget{ref-Goodman:2010}{}% 428 | Goodman, J., \& Weare, J. (2010). Ensemble samplers with affine 429 | invariance. \emph{Communications in applied mathematics and 430 | computational science}, \emph{5}(1), 65--80. 431 | doi:\href{https://doi.org/10.2140/camcos.2010.5.65}{10.2140/camcos.2010.5.65} 432 | 433 | \leavevmode\hypertarget{ref-Speagle:2019}{}% 434 | Speagle, J. S. (2019). dynesty: A Dynamic Nested Sampling Package for 435 | Estimating Bayesian Posteriors and Evidences. \emph{arXiv e-prints}, 436 | arXiv:1904.02180. Retrieved from \url{http://arxiv.org/abs/1904.02180} 437 | 438 | \leavevmode\hypertarget{ref-Ter-Braak:2006}{}% 439 | ter Braak, C. J. F. (2006). A Markov Chain Monte Carlo version of the 440 | genetic algorithm Differential Evolution: easy Bayesian computing for 441 | real parameter spaces. \emph{Statistics and Computing}, \emph{16}(3), 442 | 239--249. 443 | doi:\href{https://doi.org/10.1007/s11222-006-8769-1}{10.1007/s11222-006-8769-1} 444 | 445 | \leavevmode\hypertarget{ref-Ter-Braak:2008}{}% 446 | ter Braak, C. J. F., \& Vrugt, J. A. (2008). Differential evolution 447 | Markov chain with snooker updater and fewer chains. \emph{Statistics and 448 | Computing}, \emph{18}(4), 435--446. 449 | doi:\href{https://doi.org/10.1007/s11222-008-9104-9}{10.1007/s11222-008-9104-9} 450 | 451 | \end{document} 452 | --------------------------------------------------------------------------------