├── tests
    ├── __init__.py
    ├── gpu_tests
    │   ├── __init__.py
    │   └── test_gibbs.py
    ├── design_matrix
    │   ├── __init__.py
    │   └── test_sparse_matrix.py
    ├── regression_tests
    │   ├── __init__.py
    │   ├── saved_outputs
    │   │   ├── cox_hmc_samples.npy
    │   │   ├── logit_cg_samples.npy
    │   │   ├── linear_cg_samples.npy
    │   │   └── logit_cholesky_samples.npy
    │   └── test_gibb.py
    ├── helper.py
    ├── derivative_tester.py
    ├── test_design_matrix.py
    ├── manual_tests
    │   ├── test_stable_distribution.ipynb
    │   ├── test_global_scale_prior_hyperparam.ipynb
    │   └── test_stepsize_adapter.ipynb
    ├── test_likelihood_models.py
    └── test_prior.py
├── bayesbridge
    ├── util
    │   ├── __init__.py
    │   └── simplify_warnings.py
    ├── random
    │   ├── normal
    │   │   ├── __init__.py
    │   │   ├── normal.pxd
    │   │   └── normal.pyx
    │   ├── uniform
    │   │   ├── __init__.py
    │   │   ├── uniform.pxd
    │   │   └── uniform.pyx
    │   ├── __init__.py
    │   ├── polya_gamma
    │   │   ├── __init__.py
    │   │   ├── scipy_ndtr.pxd
    │   │   ├── setup.py
    │   │   ├── test_polyagamma.ipynb
    │   │   ├── polya_gamma.pyx
    │   │   └── scipy_ndtr.c
    │   ├── tilted_stable
    │   │   ├── __init__.py
    │   │   ├── setup.py
    │   │   ├── test_tilted_stable.ipynb
    │   │   ├── compare_methods_speed.ipynb
    │   │   └── tilted_stable.pyx
    │   └── random.py
    ├── reg_coef_sampler
    │   ├── hamiltonian_monte_carlo
    │   │   ├── __init__.py
    │   │   ├── util.py
    │   │   ├── dynamics.py
    │   │   ├── hmc.py
    │   │   ├── stepsize_adapter.py
    │   │   └── nuts.py
    │   ├── __init__.py
    │   ├── direct_gaussian_sampler.py
    │   ├── reg_coef_posterior_summarizer.py
    │   └── cg_sampler.py
    ├── __init__.py
    ├── design_matrix
    │   ├── __init__.py
    │   ├── cython_matmal
    │   │   ├── setup.py
    │   │   └── binary_matmul.pyx
    │   ├── mkl_matvec.py
    │   ├── dense_matrix.py
    │   ├── abstract_matrix.py
    │   └── sparse_matrix.py
    ├── model
    │   ├── __init__.py
    │   ├── abstract_model.py
    │   ├── linear_model.py
    │   ├── factory.py
    │   └── logistic_model.py
    ├── gibbs_util.py
    └── prior.py
├── docs
    ├── demo.ipynb
    ├── requirements.txt
    ├── index.rst
    ├── class.rst
    ├── Makefile
    └── conf.py
├── util
    ├── __init__.py
    └── mcmc_summarizer.py
├── .gitattributes
├── requirements.txt
├── MANIFEST.in
├── pyproject.toml
├── .readthedocs.yml
├── .gitignore
├── .github
    └── workflows
    │   └── python-app.yml
├── setup.py
├── README.rst
└── simulate_data.py


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/bayesbridge/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/demo.ipynb:
--------------------------------------------------------------------------------
1 | ../demo.ipynb


--------------------------------------------------------------------------------
/tests/gpu_tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/design_matrix/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/bayesbridge/random/normal/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/bayesbridge/random/uniform/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/regression_tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
1 | from . import mcmc_summarizer


--------------------------------------------------------------------------------
/bayesbridge/random/__init__.py:
--------------------------------------------------------------------------------
1 | from .random import BasicRandom


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | bayesbridge_demo.ipynb linguist-documentation
2 | 


--------------------------------------------------------------------------------
/bayesbridge/reg_coef_sampler/hamiltonian_monte_carlo/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx>=3.0
2 | nbsphinx
3 | numpy
4 | scipy
5 | 


--------------------------------------------------------------------------------
/bayesbridge/random/polya_gamma/__init__.py:
--------------------------------------------------------------------------------
1 | from .polya_gamma import PolyaGammaDist


--------------------------------------------------------------------------------
/bayesbridge/random/tilted_stable/__init__.py:
--------------------------------------------------------------------------------
1 | from .tilted_stable import ExpTiltedStableDist


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Cython
2 | numpy>=1.19
3 | scipy
4 | pytest
5 | setuptools
6 | wheel
7 | 


--------------------------------------------------------------------------------
/bayesbridge/reg_coef_sampler/__init__.py:
--------------------------------------------------------------------------------
1 | from .reg_coef_sampler import SparseRegressionCoefficientSampler


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include bayesbridge/random/polya_gamma/scipy_ndtr.c
2 | global-include *.pyx
3 | global-include *.pxd
4 | 


--------------------------------------------------------------------------------
/bayesbridge/random/polya_gamma/scipy_ndtr.pxd:
--------------------------------------------------------------------------------
1 | cdef extern from "scipy_ndtr.c":
2 |     double log_ndtr(double a)
3 | 


--------------------------------------------------------------------------------
/bayesbridge/random/normal/normal.pxd:
--------------------------------------------------------------------------------
1 | from numpy.random.bit_generator cimport BitGenerator
2 | 
3 | cdef double random_normal(BitGenerator)


--------------------------------------------------------------------------------
/bayesbridge/random/uniform/uniform.pxd:
--------------------------------------------------------------------------------
1 | from numpy.random.bit_generator cimport BitGenerator
2 | 
3 | cdef double random_uniform(BitGenerator)


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel", "numpy", "scipy", "Cython"]
3 | build-backend = "setuptools.build_meta"
4 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 | 
3 | .. toctree::
4 |     :caption: Documentation
5 |     :maxdepth: 1
6 | 
7 |     demo
8 |     class
9 | 


--------------------------------------------------------------------------------
/tests/regression_tests/saved_outputs/cox_hmc_samples.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/bayes-bridge/HEAD/tests/regression_tests/saved_outputs/cox_hmc_samples.npy


--------------------------------------------------------------------------------
/tests/regression_tests/saved_outputs/logit_cg_samples.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/bayes-bridge/HEAD/tests/regression_tests/saved_outputs/logit_cg_samples.npy


--------------------------------------------------------------------------------
/tests/regression_tests/saved_outputs/linear_cg_samples.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/bayes-bridge/HEAD/tests/regression_tests/saved_outputs/linear_cg_samples.npy


--------------------------------------------------------------------------------
/tests/regression_tests/saved_outputs/logit_cholesky_samples.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/bayes-bridge/HEAD/tests/regression_tests/saved_outputs/logit_cholesky_samples.npy


--------------------------------------------------------------------------------
/bayesbridge/__init__.py:
--------------------------------------------------------------------------------
1 | from .bayesbridge import BayesBridge
2 | from .gibbs_util import SamplerOptions
3 | from .prior import RegressionCoefPrior
4 | from .model import RegressionModel


--------------------------------------------------------------------------------
/bayesbridge/design_matrix/__init__.py:
--------------------------------------------------------------------------------
1 | from .sparse_matrix import SparseDesignMatrix
2 | from .dense_matrix import DenseDesignMatrix
3 | from .abstract_matrix import AbstractDesignMatrix


--------------------------------------------------------------------------------
/bayesbridge/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .factory import RegressionModel
2 | from .linear_model import LinearModel
3 | from .logistic_model import LogisticModel
4 | from .cox_model import CoxModel


--------------------------------------------------------------------------------
/bayesbridge/util/simplify_warnings.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | 
 4 | def simplified_format(
 5 |         message, category, filename, lineno, line=None):
 6 |     to_print = '{:s}:{:d}: {:s}: {:s}\n'.format(
 7 |         filename, lineno, category.__name__, str(message)
 8 |     )
 9 |     return to_print
10 | 
11 | warnings.formatwarning = simplified_format
12 | 


--------------------------------------------------------------------------------
/bayesbridge/random/polya_gamma/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | from Cython.Build import cythonize
 3 | import numpy as np
 4 | 
 5 | ext_modules = [
 6 |     Extension(
 7 |         "polya_gamma",
 8 |         ["polya_gamma.pyx"],
 9 |         include_dirs=[np.get_include()]
10 |     )
11 | ]
12 | 
13 | setup(
14 |     ext_modules = cythonize(ext_modules)
15 | )
16 | 


--------------------------------------------------------------------------------
/bayesbridge/random/tilted_stable/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from setuptools.extension import Extension
 3 | from Cython.Build import cythonize
 4 | import numpy as np
 5 | 
 6 | ext_modules = [
 7 |     Extension(
 8 |         "tilted_stable",
 9 |         ["tilted_stable.pyx"],
10 |         include_dirs=[np.get_include()]
11 |     )
12 | ]
13 | 
14 | setup(
15 |     ext_modules = cythonize(ext_modules)
16 | )
17 | 


--------------------------------------------------------------------------------
/docs/class.rst:
--------------------------------------------------------------------------------
 1 | Main Class and Method
 2 | =====================
 3 | 
 4 | BayesBridge and Gibbs Sampler
 5 | -----------------------------
 6 | .. automodule:: bayesbridge
 7 | 
 8 |     .. autoclass:: BayesBridge(model, prior)
 9 |         :members: gibbs
10 | 
11 | Model and Prior
12 | ---------------
13 | .. automodule:: bayesbridge
14 | 
15 |     .. autofunction:: RegressionModel
16 | 
17 |     .. autoclass:: RegressionCoefPrior
18 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Build documentation in the docs/ directory with Sphinx
 8 | sphinx:
 9 |   configuration: docs/conf.py
10 | 
11 | # Optionally set the version of Python and requirements required to build your docs
12 | python:
13 |   version: 3
14 |   install:
15 |     - requirements: docs/requirements.txt
16 | 


--------------------------------------------------------------------------------
/bayesbridge/reg_coef_sampler/hamiltonian_monte_carlo/util.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | 
 4 | def simplified_format(
 5 |         message, category, filename, lineno, line=None):
 6 |     to_print = '{:s}:{:d}: {:s}: {:s}\n'.format(
 7 |         filename, lineno, category.__name__, str(message)
 8 |     )
 9 |     return to_print
10 | 
11 | warnings.formatwarning = simplified_format
12 | 
13 | def warn_message_only(message, category=UserWarning):
14 |     warnings.warn(message, category, stacklevel=2)
15 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/bayesbridge/design_matrix/cython_matmal/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | from Cython.Build import cythonize
 3 | import subprocess
 4 | import os
 5 | import numpy as np
 6 | 
 7 | # Hack to include the numpy header file.
 8 | cmd = 'export CFLAGS="-I ' + np.get_include() + ' $CFLAGS"'
 9 | subprocess.run(cmd, shell=True, check=True)
10 | os.environ["CC"] = "clang++ -Xpreprocessor -fopenmp -lomp" # "gcc-6 -fopenmp"
11 | 
12 | ext_modules = [
13 |     Extension(
14 |         "binary_matmul",
15 |         ["binary_matmul.pyx"],
16 | #        extra_compile_args=['-Xpreprocessor -fopenmp -lomp'],
17 | #        extra_link_args=['-Xpreprocessor -fopenmp -lomp'],
18 |     )
19 | ]
20 | 
21 | setup(
22 |     ext_modules = cythonize(ext_modules)
23 | )
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | **/__pycache__
 2 | **/.DS_store
 3 | **/.pytest_cache
 4 | **/.ipynb_checkpoints
 5 | 
 6 | bayesbridge/reg_coef_sampler/hamiltonian_monte_carlo/tests
 7 | bayesbridge/random/polya_gamma/polya_gamma.c
 8 | bayesbridge/random/polya_gamma/polya_gamma.*.so
 9 | bayesbridge/random/polya_gamma/polya_gamma.html
10 | bayesbridge/random/tilted_stable/tilted_stable.c
11 | bayesbridge/random/tilted_stable/tilted_stable.*.so
12 | bayesbridge/random/tilted_stable/tilted_stable.html
13 | bayesbridge/random/normal/normal.c
14 | bayesbridge/random/normal/normal.*.so
15 | bayesbridge/random/normal/normal.html
16 | bayesbridge/random/uniform/uniform.c
17 | bayesbridge/random/uniform/uniform.*.so
18 | bayesbridge/random/uniform/uniform.html
19 | 
20 | # C extensions
21 | *.so
22 | 
23 | # Distribution / packaging
24 | **/build
25 | .eggs/
26 | .idea/
27 | env/
28 | dist/
29 | bayesbridge.egg-info/
30 | 
31 | # Environments
32 | venv/
33 | 
34 | # Sphinx documentation
35 | docs/_build/


--------------------------------------------------------------------------------
/bayesbridge/random/normal/normal.pyx:
--------------------------------------------------------------------------------
 1 | from cpython.pycapsule cimport PyCapsule_GetPointer
 2 | from numpy.random cimport bitgen_t
 3 | from numpy.random.bit_generator cimport BitGenerator, bitgen_t
 4 | from numpy.random.c_distributions cimport random_standard_normal
 5 | 
 6 | 
 7 | cdef double random_normal(BitGenerator bit_generator):
 8 |     """
 9 |     Generate a random value from a standard normal distribution.
10 | 
11 |     Parameters
12 |     ----------
13 |     bit_generator : BitGenerator
14 |         Numpy BitGenerator object. This object is *not* locked during generation since the
15 |         sampling runs on a single thread and performance is much better without locking/releasing.
16 | 
17 |     Returns
18 |     -------
19 |     double
20 |         Random number.
21 |     """
22 |     cdef bitgen_t *rng
23 |     cdef const char *capsule_name = "BitGenerator"
24 |     capsule = bit_generator.capsule
25 |     rng = <bitgen_t *> PyCapsule_GetPointer(capsule, capsule_name)
26 |     return random_standard_normal(rng)
27 | 


--------------------------------------------------------------------------------
/bayesbridge/random/uniform/uniform.pyx:
--------------------------------------------------------------------------------
 1 | from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
 2 | from numpy.random cimport bitgen_t
 3 | from numpy.random.bit_generator cimport BitGenerator, bitgen_t
 4 | from numpy.random.c_distributions cimport random_standard_uniform
 5 | 
 6 | 
 7 | cdef double random_uniform(BitGenerator bit_generator):
 8 |     """
 9 |     Generate a random value from a uniform(0,1) distribution.
10 | 
11 |     Parameters
12 |     ----------
13 |     bit_generator : BitGenerator
14 |         Numpy BitGenerator object. This object is *not* locked during generation since the
15 |         sampling runs on a single thread and performance is much better without locking/releasing.
16 | 
17 |     Returns
18 |     -------
19 |     double
20 |         Random number.
21 |     """
22 |     cdef bitgen_t *rng
23 |     cdef const char *capsule_name = "BitGenerator"
24 |     capsule = bit_generator.capsule
25 |     rng = <bitgen_t *> PyCapsule_GetPointer(capsule, capsule_name)
26 |     return random_standard_uniform(rng)
27 | 


--------------------------------------------------------------------------------
/bayesbridge/model/abstract_model.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | 
 4 | class AbstractModel():
 5 | 
 6 |     @property
 7 |     def n_obs(self):
 8 |         return self.design.shape[0]
 9 | 
10 |     @property
11 |     def n_pred(self):
12 |         return self.design.shape[1]
13 | 
14 |     @property
15 |     def intercept_added(self):
16 |         return self.design.intercept_added
17 | 
18 |     @abc.abstractmethod
19 |     def compute_loglik_and_gradient(self, beta, loglik_only=False):
20 |         pass
21 | 
22 |     @abc.abstractmethod
23 |     def compute_hessian(self, beta):
24 |         pass
25 | 
26 |     @abc.abstractmethod
27 |     def get_hessian_matvec_operator(self, beta):
28 |         pass
29 | 
30 |     @abc.abstractmethod
31 |     def get_hessian_matvec_operator(self, beta):
32 |         pass
33 | 
34 |     @abc.abstractmethod
35 |     def calc_intercept_mle(self):
36 |         """ Calculate MLE for intercept assuming other coefficients are zero. """
37 |         pass
38 | 
39 |     @staticmethod
40 |     @abc.abstractmethod
41 |     def simulate_outcome():
42 |         pass


--------------------------------------------------------------------------------
/.github/workflows/python-app.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies and run tests with a single version of Python
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python application
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master, test ]
 9 |   pull_request:
10 |     branches: [ master, test ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ${{ matrix.os }}
16 |     strategy:
17 |       matrix:
18 |         os: [ubuntu-latest, macos-latest, windows-latest]
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v2
22 |     - name: Set up Python 3.12
23 |       uses: actions/setup-python@v2
24 |       with:
25 |         python-version: 3.12
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip
29 |         pip install -r requirements.txt
30 |     - name: Cythonize and install package
31 |       run: |
32 |         python setup.py build_ext --inplace
33 |         python setup.py install
34 |     - name: Test with pytest
35 |       run: |
36 |         pytest tests
37 | 


--------------------------------------------------------------------------------
/bayesbridge/design_matrix/cython_matmal/binary_matmul.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | import cython
 4 | cimport cython
 5 | from cython.parallel cimport prange
 6 | 
 7 | ctypedef np.int32_t INT_t
 8 | ctypedef np.float_t FLOAT_t
 9 | FLOAT = np.float64
10 | 
11 | def binary_matmul(X_csr, v):
12 |   return c_binary_matmul_parallel(X_csr.indices, X_csr.indptr, v)
13 | 
14 | @cython.boundscheck(False)
15 | @cython.wraparound(False)
16 | cdef c_binary_matmul(np.ndarray[INT_t, ndim=1] indices, np.ndarray[INT_t, ndim=1] indptr, np.ndarray[FLOAT_t, ndim=1] v):
17 |     cdef int i, k
18 |     cdef int m = indptr.shape[0] - 1
19 |     cdef FLOAT_t val
20 |     cdef np.ndarray[FLOAT_t, ndim=1] Xv = np.zeros(m, dtype=FLOAT)
21 |     for i in range(m):
22 |         val = 0
23 |         for k in range(indptr[i], indptr[i + 1]):
24 |             val += v[indices[k]]
25 |         Xv[i] = val
26 |     return Xv
27 | 
28 | @cython.boundscheck(False)
29 | @cython.wraparound(False)
30 | cdef c_binary_matmul_parallel(np.ndarray[INT_t, ndim=1] indices, np.ndarray[INT_t, ndim=1] indptr, np.ndarray[FLOAT_t, ndim=1] v):
31 |     cdef int i, k
32 |     cdef int m = indptr.shape[0] - 1
33 |     cdef FLOAT_t val
34 |     cdef np.ndarray[FLOAT_t, ndim=1] Xv = np.zeros(m, dtype=FLOAT)
35 |     for i in prange(m, nogil=True):
36 |         for k in range(indptr[i], indptr[i + 1]):
37 |             Xv[i] += v[indices[k]]
38 |     return Xv
39 | 


--------------------------------------------------------------------------------
/bayesbridge/model/linear_model.py:
--------------------------------------------------------------------------------
 1 | from .abstract_model import AbstractModel
 2 | import math
 3 | import numpy as np
 4 | 
 5 | 
 6 | class LinearModel(AbstractModel):
 7 | 
 8 |     def __init__(self, y, design):
 9 |         self.y = y
10 |         self.design = design
11 |         self.name = 'linear'
12 | 
13 |     def compute_loglik_and_gradient(self, beta, obs_prec, loglik_only=False):
14 |         X_beta = self.design.dot(beta)
15 |         loglik = (
16 |             len(self.y) * math.log(obs_prec) / 2
17 |             - obs_prec * np.sum((self.y - X_beta) ** 2) / 2
18 |         )
19 |         if loglik_only:
20 |             grad = None
21 |         else:
22 |             grad = obs_prec * self.design.Tdot(self.y - X_beta)
23 |         return loglik, grad
24 | 
25 |     def compute_hessian(self, beta):
26 |         pass
27 | 
28 |     def get_hessian_matvec_operator(self, beta, obs_prec):
29 |         hessian_op = lambda v: - obs_prec * self.design.Tdot(self.design.dot(v))
30 |         return hessian_op
31 | 
32 |     def calc_intercept_mle(self):
33 |         return self.y.mean()
34 | 
35 |     @staticmethod
36 |     def simulate_outcome(X, beta, noise_sd, seed=None):
37 |         """
38 |         Parameters
39 |         ----------
40 |         X : DesignMatrix, numpy/scipy matrix
41 |             Only needs to support the `dot()` operation
42 |         """
43 |         if seed is not None:
44 |             np.random.seed(seed)
45 |         y = X.dot(beta) + noise_sd * np.random.randn(X.shape[0])
46 |         return y


--------------------------------------------------------------------------------
/bayesbridge/random/random.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .polya_gamma import PolyaGammaDist
 3 | from .tilted_stable import ExpTiltedStableDist
 4 | 
 5 | class BasicRandom():
 6 |     """
 7 |     Generators of random variables from the basic distributions used in
 8 |     Bayesian sparse regression.
 9 |     """
10 | 
11 |     def __init__(self, seed=None):
12 |         self.np_random = np.random
13 |         self.pg = PolyaGammaDist()
14 |         self.ts = ExpTiltedStableDist()
15 |         self.set_seed(seed)
16 | 
17 |     def set_seed(self, seed):
18 |         self.np_random.seed(seed)
19 |         pg_seed = np.random.randint(1, 1 + np.iinfo(np.int32).max)
20 |         ts_seed = np.random.randint(1, 1 + np.iinfo(np.int32).max)
21 |         self.pg.set_seed(pg_seed)
22 |         self.ts.set_seed(ts_seed)
23 | 
24 |     def get_state(self):
25 |         rand_gen_state = {
26 |             'numpy' : self.np_random.get_state(),
27 |             'tilted_stable' : self.ts.get_state(),
28 |             'polya_gamma' : self.pg.get_state()
29 |         }
30 |         return rand_gen_state
31 | 
32 |     def set_state(self, rand_gen_state):
33 |         self.np_random.set_state(rand_gen_state['numpy'])
34 |         self.ts.set_state(rand_gen_state['tilted_stable'])
35 |         self.pg.set_state(rand_gen_state['polya_gamma'])
36 | 
37 |     def polya_gamma(self, shape, tilt):
38 |         return self.pg.rand_polyagamma(shape, tilt)
39 | 
40 |     def tilted_stable(self, char_exponent, tilt):
41 |         return self.ts.sample(char_exponent, tilt)
42 | 


--------------------------------------------------------------------------------
/util/mcmc_summarizer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from matplotlib.pylab import MaxNLocator
 4 | 
 5 | 
 6 | def plot_conf_interval(
 7 |         coef_samples, conf_level=.95, n_coef_to_plot=None,
 8 |         sort_by_median_val=False, marker_scale=1.0,
 9 |     ):
10 |     tail_prob = (1 - conf_level) / 2
11 |     lower, median, upper = [
12 |         np.quantile(coef_samples, q, axis=-1)
13 |         for q in [tail_prob, .5, 1 - tail_prob]
14 |     ]
15 | 
16 |     if sort_by_median_val:
17 |         sort_ind = np.argsort(median)
18 |     else:
19 |         sort_ind = np.arange(len(median))  # No sorting
20 | 
21 |     if n_coef_to_plot is None:
22 |         n_coef_to_plot = len(median)
23 |     coef_index = sort_ind[:n_coef_to_plot]
24 | 
25 |     plt.plot(
26 |         coef_index, median[coef_index],
27 |         'x', color='tab:blue', ms=marker_scale * 10,
28 |         label='Posterior median'
29 |     )
30 |     plt.plot(
31 |         coef_index, lower[coef_index],
32 |         '_', color='tab:green', ms=marker_scale * 12, lw=marker_scale * 1.2,
33 |         label='{:.1f}% credible interval'.format(100 * conf_level)
34 |     )
35 |     plt.plot(
36 |         coef_index, upper[coef_index],
37 |         '_', color='tab:green', ms=marker_scale * 12, lw=marker_scale * 1.2
38 |     )
39 |     plt.gca().get_xaxis().set_major_locator(MaxNLocator(integer=True))
40 | 
41 |     plotted_quantity = {
42 |         'lower': lower[coef_index],
43 |         'median': median[coef_index],
44 |         'upper': upper[coef_index],
45 |         'coef_index': coef_index
46 |     }
47 |     return plotted_quantity


--------------------------------------------------------------------------------
/tests/helper.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy as sp
 3 | from bayesbridge.model import LinearModel, LogisticModel, CoxModel
 4 | from bayesbridge.design_matrix import SparseDesignMatrix, DenseDesignMatrix
 5 | from simulate_data import simulate_design
 6 | 
 7 | 
 8 | def simulate_data(model, n_obs=100, n_pred=50, seed=None,
 9 |                   return_design_mat=False):
10 |     if seed is not None:
11 |         np.random.seed(seed)
12 | 
13 |     X = simulate_design(n_obs, n_pred, binary_frac=.9)
14 | 
15 |     beta = np.random.randn(n_pred)
16 |     n_trial = None
17 |     if model == 'linear':
18 |         y = LinearModel.simulate_outcome(X, beta, noise_sd=1.)
19 |     elif model == 'logit':
20 |         n_trial = 1 + np.random.binomial(np.arange(n_obs) + 1, .5)
21 |         n_success = LogisticModel.simulate_outcome(n_trial, X, beta)
22 |         y = (n_success, n_trial)
23 |     elif model == 'cox':
24 |         event_time, censoring_time = CoxModel.simulate_outcome(X, beta)
25 |         event_time, censoring_time, X = \
26 |             CoxModel._permute_observations_by_event_and_censoring_time(
27 |                 event_time, censoring_time, X)
28 |         event_time, censoring_time, X = \
29 |             CoxModel._drop_uninformative_observations(event_time,
30 |                                                       censoring_time, X)
31 |         y = (event_time, censoring_time)
32 |     else:
33 |         raise NotImplementedError()
34 | 
35 |     if return_design_mat:
36 |         if sp.sparse.issparse(X):
37 |             X = SparseDesignMatrix(X, add_intercept=False)
38 |         else:
39 |             X = DenseDesignMatrix(X, add_intercept=False)
40 | 
41 |     return y, X, beta


--------------------------------------------------------------------------------
/bayesbridge/reg_coef_sampler/direct_gaussian_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy as sp
 3 | 
 4 | def generate_gaussian_with_weight(design, obs_prec, prior_prec_sqrt, z, rand_gen=None):
 5 |     """
 6 |     Generate a multi-variate Gaussian with covariance Sigma
 7 |         Sigma^{-1} = X diag(obs_prec) X + diag(prior_prec_sqrt) ** 2
 8 |     and mean = Sigma z, where X is the `design` matrix.
 9 | 
10 |     Parameters
11 |     ----------
12 |         obs_prec : 1-d numpy array
13 |         prior_prec_sqrt : 1-d numpy array
14 |     """
15 | 
16 |     diag = prior_prec_sqrt ** 2 \
17 |            + design.compute_fisher_info(weight=obs_prec, diag_only=True)
18 |     jacobi_precond_scale = 1 / np.sqrt(diag)
19 |     Prec_precond = compute_precond_post_prec(
20 |         design, obs_prec, prior_prec_sqrt, jacobi_precond_scale
21 |     )
22 |     Prec_precond_chol = sp.linalg.cholesky(Prec_precond, jacobi_precond_scale)
23 |     mean_precond = sp.linalg.cho_solve(
24 |         (Prec_precond_chol, False), jacobi_precond_scale * z
25 |     )
26 |     if rand_gen is None:
27 |         gaussian_vec = np.random.randn(design.shape[1])
28 |     else:
29 |         gaussian_vec = rand_gen.np_random.randn(design.shape[1])
30 |     sample_precond = mean_precond
31 |     sample_precond += sp.linalg.solve_triangular(
32 |         Prec_precond_chol, gaussian_vec, lower=False
33 |     )
34 |     sample = jacobi_precond_scale * sample_precond
35 | 
36 |     return sample
37 | 
38 | def compute_precond_post_prec(design, obs_prec, prior_prec_sqrt, precond_scale):
39 |     Prec_precond = \
40 |         precond_scale[:, np.newaxis] \
41 |         * design.compute_fisher_info(obs_prec) \
42 |         * precond_scale[np.newaxis, :]
43 |     Prec_precond += np.diag((precond_scale * prior_prec_sqrt) ** 2)
44 |     return Prec_precond
45 | 


--------------------------------------------------------------------------------
/tests/design_matrix/test_sparse_matrix.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | import scipy as sp
 4 | 
 5 | from bayesbridge.design_matrix import SparseDesignMatrix
 6 | 
 7 | 
 8 | @pytest.fixture()
 9 | def X():
10 |     np.random.seed(0)
11 |     X = np.random.normal(size=(2, 4))
12 |     return X
13 | 
14 | 
15 | @pytest.fixture()
16 | def X_sp(X):
17 |     return sp.sparse.csr_matrix(X)
18 | 
19 | 
20 | @pytest.fixture()
21 | def weight(X):
22 |     np.random.seed(0)
23 |     weight = np.random.exponential(size=X.shape[1] + 1)
24 |     return weight
25 | 
26 | 
27 | def test_compute_transposed_fisher_info(X, X_sp, weight):
28 |     design = SparseDesignMatrix(
29 |         X_sp, center_predictor=False, add_intercept=True
30 |     )
31 |     assert np.allclose(
32 |         design.compute_transposed_fisher_info(weight[1:] , include_intrcpt=False),
33 |         X @ np.diag(weight[1:]) @ X.T
34 |     )
35 |     intrcpt_column = np.ones((X.shape[0], 1))
36 |     X_with_intrcpt = np.hstack((intrcpt_column, X))
37 |     assert np.allclose(
38 |         design.compute_transposed_fisher_info(weight, include_intrcpt=True),
39 |         X_with_intrcpt @ np.diag(weight) @ X_with_intrcpt.T
40 |     )
41 | 
42 | 
43 | def test_compute_transposed_fisher_info_centered(X, X_sp, weight):
44 |     design = SparseDesignMatrix(
45 |         X_sp, center_predictor=True, add_intercept=True
46 |     )
47 |     X_centered = X - X.mean(0)
48 |     assert np.allclose(
49 |         design.compute_transposed_fisher_info(weight[1:], include_intrcpt=False),
50 |         X_centered @ np.diag(weight[1:]) @ X_centered.T
51 |     )
52 |     intrcpt_column = np.ones((X.shape[0], 1))
53 |     X_centered_with_intrcpt = np.hstack((intrcpt_column, X_centered))
54 |     assert np.allclose(
55 |         design.compute_transposed_fisher_info(weight, include_intrcpt=True),
56 |         X_centered_with_intrcpt @ np.diag(weight) @ X_centered_with_intrcpt.T
57 |     )
58 | 


--------------------------------------------------------------------------------
/bayesbridge/design_matrix/mkl_matvec.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | import numpy as np
 3 | import scipy as sp
 4 | import scipy.sparse
 5 | import ctypes
 6 | from ctypes import POINTER, c_int, c_char, c_char_p,  c_double, byref
 7 | 
 8 | try:
 9 |     if platform.system() == 'Windows':
10 |         mkl = ctypes.windll.LoadLibrary("mkl_rt.dll")
11 |     else:
12 |         mkl = ctypes.cdll.LoadLibrary("libmkl_rt.dylib")
13 | except:
14 |     raise ImportError("Could not load Intel MKL Library.")
15 | 
16 | 
17 | def mkl_csr_matvec(A, x, transpose=False):
18 |     """
19 |     Parameters
20 |     ----------
21 |     A : scipy.sparse csr matrix
22 |     x : numpy 1d array
23 |     """
24 | 
25 |     if not sp.sparse.isspmatrix_csr(A):
26 |         raise TypeError("The matrix must be a scipy sparse CSR matrix.")
27 | 
28 |     if x.ndim != 1:
29 |         raise TypeError("The vector to be multiplied must be a 1d array.")
30 | 
31 |     if x.dtype.type is not np.double:
32 |         x = x.astype(np.double, copy=True)
33 | 
34 |     # Allocate the result of the matrix-vector multiplication.
35 |     result = np.empty(A.shape[transpose])
36 | 
37 |     # Set the parameters for simply computing A.dot(x) for a general matrix A.
38 |     alpha = byref(c_double(1.0))
39 |     beta = byref(c_double(0.0))
40 |     matrix_description = c_char_p(bytes('G  C  ', 'utf-8'))
41 | 
42 |     # Get pointers to the numpy arrays.
43 |     data_ptr = A.data.ctypes.data_as(POINTER(c_double))
44 |     indices_ptr = A.indices.ctypes.data_as(POINTER(c_int))
45 |     indptr_begin = A.indptr[:-1].ctypes.data_as(POINTER(c_int))
46 |     indptr_end = A.indptr[1:].ctypes.data_as(POINTER(c_int))
47 |     x_ptr = x.ctypes.data_as(POINTER(c_double))
48 |     result_ptr = result.ctypes.data_as(POINTER(c_double))
49 | 
50 |     transpose_flag = byref(c_char(bytes(['n', 't'][transpose], 'utf-8')))
51 |     n_row, n_col = [byref(c_int(size)) for size in A.shape]
52 |     mkl.mkl_dcsrmv(
53 |         transpose_flag, n_row, n_col, alpha, matrix_description,
54 |         data_ptr, indices_ptr, indptr_begin, indptr_end, x_ptr, beta, result_ptr
55 |     )
56 |     return result
57 | 


--------------------------------------------------------------------------------
/bayesbridge/design_matrix/dense_matrix.py:
--------------------------------------------------------------------------------
 1 | from warnings import warn
 2 | 
 3 | import numpy as np
 4 | from .abstract_matrix import AbstractDesignMatrix
 5 | 
 6 | 
 7 | class DenseDesignMatrix(AbstractDesignMatrix):
 8 |     
 9 |     def __init__(self, X, center_predictor=False, add_intercept=True,
10 |                  copy_array=False):
11 |         """
12 |         Params:
13 |         ------
14 |         X : numpy array
15 |         """
16 |         self.use_cupy = False
17 |         if copy_array:
18 |             X = X.copy()
19 |         super().__init__()
20 |         X = self.remove_intercept_indicator(X)
21 |         if center_predictor:
22 |             X -= np.mean(X, axis=0)[np.newaxis, :]
23 |         if add_intercept:
24 |             X = np.hstack((np.ones((X.shape[0], 1)), X))
25 |         self.X = X
26 |         self.intercept_added = add_intercept
27 |         self.centered = center_predictor
28 | 
29 |     @property
30 |     def shape(self):
31 |         return self.X.shape
32 | 
33 |     @property
34 |     def is_sparse(self):
35 |         return False
36 | 
37 |     def dot(self, v):
38 | 
39 |         if self.memoized and np.all(self.v_prev == v):
40 |             return self.X_dot_v
41 | 
42 |         result = self.X.dot(v)
43 |         if self.memoized:
44 |             self.X_dot_v = result
45 |             self.v_prev = v
46 |         self.dot_count += 1
47 | 
48 |         return result
49 | 
50 |     def Tdot(self, v):
51 |         self.Tdot_count += 1
52 |         return self.X.T.dot(v)
53 | 
54 |     def compute_fisher_info(self, weight, diag_only=False):
55 |         if diag_only:
56 |             return np.sum(weight[:, np.newaxis] * self.X ** 2, 0)
57 |         else:
58 |             return self.X.T.dot(weight[:, np.newaxis] * self.X)
59 | 
60 |     def compute_transposed_fisher_info(self, weight):
61 |         # TODO: Implement.
62 |         # Note: with current implementation of the class, `self.X` explicitly includes
63 |         # the intercept when `self.intercept_added == True`.
64 |         pass
65 | 
66 |     def toarray(self):
67 |         return self.X
68 | 
69 |     def extract_matrix(self, order=None):
70 |         return self.X
71 | 


--------------------------------------------------------------------------------
/tests/derivative_tester.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def numerical_grad_is_close(
 4 |         f, x, atol=10E-6, rtol=10E-6, dx=10E-6,
 5 |         subset_index=None, return_grad=False):
 6 |     """
 7 |     Compare the computed gradient to a centered finite difference approximation.
 8 | 
 9 |     Params:
10 |     -------
11 |     f : callable
12 |         Returns a value of a function and its gradient
13 |     """
14 |     if subset_index is None:
15 |         subset_index = np.arange(len(x))
16 | 
17 |     x = np.array(x, ndmin=1)
18 |     grad_est = np.zeros(len(subset_index))
19 |     for i in range(len(subset_index)):
20 |         x_minus = x.copy()
21 |         x_minus[subset_index[i]] -= dx
22 |         x_plus = x.copy()
23 |         x_plus[subset_index[i]] += dx
24 |         f_minus, _ = f(x_minus)
25 |         f_plus, _ = f(x_plus)
26 |         grad_est[i] = (f_plus - f_minus) / (2 * dx)
27 | 
28 |     _, grad = f(x)
29 |     is_close = np.allclose(grad[subset_index], grad_est, atol=atol, rtol=rtol)
30 | 
31 |     if return_grad:
32 |         return is_close, grad[subset_index], grad_est
33 |     else:
34 |         return is_close
35 | 
36 | 
37 | def numerical_direc_deriv_is_close(
38 |         f, x, hess_matvec, n_direction=10,
39 |         atol=10E-6, rtol=10E-6, dx=10E-6, seed=None):
40 |     """
41 |     Compare analytically computed directional derivatives of the gradient of 'f'
42 |     (i.e. the Hessian of 'f' applied to vectors) to its numerical approximations.
43 | 
44 |     Params:
45 |     -------
46 |     f : callable
47 |         Returns a value of a function and its gradient
48 |     """
49 | 
50 |     x = np.array(x, ndmin=1)
51 | 
52 |     np.random.seed(seed)
53 |     all_matched = True
54 | 
55 |     for i in range(n_direction):
56 | 
57 |         v = np.random.randn(len(x))
58 |         v /= np.sqrt(np.sum(v ** 2))
59 |         _, grad_minus = f(x - dx * v)
60 |         _, grad_plus = f(x + dx * v)
61 |         direc_deriv_est = (grad_plus - grad_minus) / (2 * dx)
62 |         direc_deriv = hess_matvec(v)
63 | 
64 |         if not np.allclose(direc_deriv, direc_deriv_est, atol=atol, rtol=rtol):
65 |             all_matched = False
66 |             break
67 | 
68 |     return all_matched
69 | 


--------------------------------------------------------------------------------
/tests/gpu_tests/test_gibbs.py:
--------------------------------------------------------------------------------
 1 | """Tests for GPU functionality. Depends on cupy being installed, so not run on CI."""
 2 | from bayesbridge import BayesBridge, RegressionModel, RegressionCoefPrior
 3 | import numpy as np
 4 | import pytest
 5 | pytest.importorskip("cupy") # Skip all the test in the module if cupy not found
 6 | import cupy as cp
 7 | 
 8 | from ..helper import simulate_data
 9 | 
10 | 
11 | @pytest.fixture
12 | def bridge_gpu():
13 |     y, X, beta = simulate_data(model='logit', seed=1)
14 |     return BayesBridge(
15 |         RegressionModel(y, cp.sparse.csr_matrix(X), 'logit'),
16 |         RegressionCoefPrior())
17 | 
18 | 
19 | @pytest.fixture
20 | def bridge_cpu():
21 |     y, X, beta = simulate_data(model='logit', seed=1)
22 |     return BayesBridge(
23 |         RegressionModel(y, X, 'logit'),
24 |         RegressionCoefPrior()
25 |     )
26 | 
27 | 
28 | def test_use_cupy(bridge_cpu, bridge_gpu):
29 |     """Test use_cupy attribute is set appropriately."""
30 |     assert bridge_gpu.model.design.use_cupy
31 |     assert not bridge_cpu.model.design.use_cupy
32 | 
33 | 
34 | def test_similar_output(bridge_cpu, bridge_gpu):
35 |     """Test that the CPU and GPU results are the same."""
36 |     iters = 10
37 |     seed = 1
38 |     sampler = 'cg'
39 |     init = {'coef': np.ones(bridge_gpu.model.n_pred)}
40 |     samples_cpu, mcmc_info_cpu = bridge_cpu.gibbs(
41 |         n_iter=iters, coef_sampler_type=sampler, init=init, seed=seed)
42 |     samples_gpu, mcmc_info_gpu = bridge_gpu.gibbs(
43 |         n_iter=iters, coef_sampler_type=sampler, init=init, seed=seed)
44 |     assert np.allclose(samples_gpu['coef'], samples_cpu['coef'], atol=1e-5)
45 | 
46 | 
47 | def test_preferred_sampler(bridge_gpu):
48 |     """Test default sampler for cupy matrices is 'cg'."""
49 |     samples_gpu, mcmc_info_gpu = bridge_gpu.gibbs(n_iter=1)
50 |     assert mcmc_info_gpu['options']['coef_sampler_type'] == 'cg'
51 | 
52 | 
53 | def test_unsupported_sampler(bridge_gpu):
54 |     """Test non-'cg' samplers raise errors."""
55 |     with pytest.raises(ValueError):
56 |         bridge_gpu.gibbs(n_iter=1, coef_sampler_type='cholesky')
57 |     with pytest.raises(ValueError):
58 |         bridge_gpu.gibbs(n_iter=1, coef_sampler_type='hmc')
59 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from Cython.Build import cythonize
 3 | from os.path import dirname, join, abspath
 4 | from setuptools import setup, find_packages
 5 | from setuptools.command.build_ext import build_ext
 6 | from setuptools.extension import Extension
 7 | 
 8 | path = dirname(__file__)
 9 | src_dir = join(dirname(path), '..', 'src')
10 | defs = [('NPY_NO_DEPRECATED_API', 0)]
11 | inc_path = np.get_include()
12 | lib_path = [abspath(join(np.get_include(), '..', '..', 'random', 'lib'))]
13 | np_libs = ['npyrandom']
14 | 
15 | class CustomBuildExtCommand(build_ext):
16 |     """ build_ext command when numpy headers are needed. """
17 |     def run(self):
18 |         # Import numpy here, only when headers are needed
19 |         import numpy as np
20 |         self.include_dirs.append(np.get_include())
21 |         build_ext.run(self)
22 | 
23 | 
24 | ext_modules = [
25 |     Extension(
26 |         'bayesbridge.random.tilted_stable.tilted_stable',
27 |         sources=['bayesbridge/random/tilted_stable/tilted_stable.pyx'],
28 |     ),
29 |     Extension(
30 |         'bayesbridge.random.polya_gamma.polya_gamma',
31 |         sources=['bayesbridge/random/polya_gamma/polya_gamma.pyx'],
32 |     ),
33 |     Extension(
34 |         'bayesbridge.random.normal.normal',
35 |         sources=['bayesbridge/random/normal/normal.pyx'],
36 |         library_dirs=lib_path,
37 |         libraries=np_libs,
38 |         define_macros=defs,
39 |     ),
40 |     Extension(
41 |         'bayesbridge.random.uniform.uniform',
42 |         sources=['bayesbridge/random/uniform/uniform.pyx'],
43 |         library_dirs=lib_path,
44 |         libraries=np_libs,
45 |         define_macros=defs,
46 |     )
47 | ]
48 | 
49 | setup(
50 |     name='bayesbridge',
51 |     version='0.2.6',
52 |     description=\
53 |         'Generates posterior samples under Bayesian sparse regression based on '
54 |         + 'the bridge prior using the CG-accelerated Gibbs sampler of Nishimura '
55 |         + 'et. al. (2018). The linear and logistic model are currently supported.',
56 |     url='https://github.com/aki-nishimura/bayes-bridge',
57 |     author='Akihiko (Aki) Nishimura',
58 |     author_email='aki.nishimura@jhu.edu',
59 |     license='MIT',
60 |     packages=find_packages(exclude=['tests', 'tests.*']),
61 |     cmdclass={'build_ext': CustomBuildExtCommand},
62 |     ext_modules=cythonize(ext_modules),
63 |     setup_requires=['numpy>=1.19'],
64 |     extras_require={
65 |         'gpu': 'cupy>=9.4.0'
66 |     },
67 |     install_requires=[
68 |         'numpy>=1.19', 'scipy'
69 |     ],
70 |     zip_safe=False
71 | )
72 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('..'))
16 | # sys.path.insert(0, os.path.abspath('../bayesbridge/'))
17 | 
18 | 
19 | # -- Project information -----------------------------------------------------
20 | 
21 | project = 'bayesbridge'
22 | copyright = '2018, Aki Nishimura'
23 | author = 'Aki Nishimura'
24 | 
25 | # The full version, including alpha/beta/rc tags
26 | release = '0.1'
27 | 
28 | 
29 | # -- General configuration ---------------------------------------------------
30 | 
31 | # Add any Sphinx extension module names here, as strings. They can be
32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
33 | # ones.
34 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'nbsphinx']
35 | 
36 | # Add any paths that contain templates here, relative to this directory.
37 | templates_path = ['_templates']
38 | 
39 | # List of patterns, relative to source directory, that match files and
40 | # directories to ignore when looking for source files.
41 | # This pattern also affects html_static_path and html_extra_path.
42 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
43 | 
44 | # Set the master document to 'index'
45 | master_doc = 'index'
46 | 
47 | # -- Options for HTML output -------------------------------------------------
48 | 
49 | # The theme to use for HTML and HTML Help pages.  See the documentation for
50 | # a list of builtin themes.
51 | #
52 | html_theme = 'sphinx_rtd_theme'
53 | 
54 | # Add any paths that contain custom static files (such as style sheets) here,
55 | # relative to this directory. They are copied after the builtin static files,
56 | # so a file named "default.css" will overwrite the builtin "default.css".
57 | html_static_path = ['_static']
58 | 
59 | 
60 | # -- Autodoc configuration ---------------------------------------------------
61 | autoclass_content = 'both'
62 | 
63 | # Dependency on C-extension modules break ReadTheDocs without the mock import.
64 | autodoc_mock_imports = ["bayesbridge.random"]
65 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | BayesBridge
 2 | ===========
 3 | 
 4 | Python package for Bayesian sparse regression, implementing the standard (Polya-Gamma augmented) Gibbs sampler as well as the CG-accelerated sampler of Nishimura and Suchard (2022). The latter algorithm can be orders of magnitudes faster for a large and sparse design matrix.
 5 | 
 6 | Installation
 7 | ------------
 8 | .. code-block:: bash
 9 | 
10 |     pip install bayesbridge
11 | 
12 | Background
13 | ----------
14 | The Bayesian bridge is based on the following prior on the regression coefficients :math:`\beta_j`'s:
15 | 
16 | ..
17 |     .. math::
18 |         \pi(\beta_j \, | \, \tau) \propto \tau^{-1} \exp \big(-|\beta_j / \tau|^\alpha \big) \ \text{ for } \ 0 < \alpha \leq 1
19 | 
20 | .. raw:: html
21 | 
22 |     <img src="https://latex.codecogs.com/gif.latex?\pi(\beta_j&space;\,&space;|&space;\,&space;\tau)&space;\propto&space;\tau^{-1}&space;\exp&space;\big(-|\beta_j&space;/&space;\tau|^\alpha&space;\big)&space;\&space;\text{&space;for&space;}&space;\&space;0&space;<&space;\alpha&space;\leq&space;1." title="\pi(\beta_j \, | \, \tau) \propto \tau^{-1} \exp \big(-|\beta_j / \tau|^\alpha \big) \ \text{ for } \ 0 < \alpha \leq 1." />
23 | 
24 | The Bayesian bridge recovers the the Bayesian lasso when :math:`\alpha = 1` but can provide an improved separation of the significant coefficients from the rest when :math:`\alpha < 1`.
25 | 
26 | Usage
27 | -----
28 | 
29 | .. code-block:: python
30 | 
31 |     from bayesbridge import BayesBridge, RegressionModel, RegressionCoefPrior
32 | 
33 |     model = RegressionModel(y, X, family='logit')
34 |     prior = RegressionCoefPrior(bridge_exponent=.5)
35 |     bridge = BayesBridge(model, prior)
36 |     samples, mcmc_info = bridge.gibbs(
37 |         n_burnin=100, n_post_burnin=1000, thin=1,
38 |         coef_sampler_type='cholesky' # Try 'cg' for large and sparse X
39 |     )
40 |     coef_samples = samples['coef']
41 | 
42 | where `y` is a 1-D numpy array and `X` is a 2-D numpy array or scipy sparse matrix.
43 | 
44 | Currently the linear and logistic model (binomial outcomes) are supported. See `demo.ipynb` for demonstration of further features.
45 | 
46 | Citation
47 | --------
48 | If you find this package useful, please consider citing:
49 | 
50 |     Akihiko Nishimura and Marc A. Suchard (2022).
51 |     Prior-preconditioned conjugate gradient method for accelerated Gibbs sampling in "large *n*, large *p*" Bayesian sparse regression. *Journal of the American Statistical Association*.
52 | 
53 |     Akihiko Nishimura and Marc A. Suchard (2022).
54 |     Shrinkage with shrunken shoulders: Gibbs sampling shrinkage model posteriors with guaranteed convergence rates. *Bayesian Analysis*.


--------------------------------------------------------------------------------
/bayesbridge/model/factory.py:
--------------------------------------------------------------------------------
 1 | from warnings import warn
 2 | import scipy as sp
 3 | 
 4 | from .linear_model import LinearModel
 5 | from .logistic_model import LogisticModel
 6 | from .cox_model import CoxModel
 7 | from ..design_matrix import DenseDesignMatrix, SparseDesignMatrix
 8 | from ..design_matrix import AbstractDesignMatrix
 9 | 
10 | def RegressionModel(
11 |         outcome, X, family='linear',
12 |         add_intercept=None, center_predictor=True
13 |     ):
14 |     """ Prepare input data to BayesBridge, with pre-processings as needed.
15 | 
16 |     For the Cox model, the observations (rows of X) are reordered to optimize
17 |     likelihood, gradient, and Hessian evaluations.
18 | 
19 |     Parameters
20 |     ----------
21 |     outcome : 1-d numpy array, tuple of two 1-d numpy arrays
22 |         n_success or (n_success, n_trial) if family == 'logistic'. If
23 |         the input is a single array, then outcome is assumed binary.
24 |         (event_time, censoring_time) if family == 'cox'.
25 |     X : numpy array or scipy sparse matrix
26 |     family : str, {'linear', 'logit', 'cox'}
27 |     add_intercept : bool, None
28 |         If None, add intercept except when family == 'cox'
29 |     center_predictor : bool
30 |     """
31 | 
32 |     if add_intercept is None:
33 |         add_intercept = (family != 'cox')
34 | 
35 |     if family == 'cox':
36 |         if add_intercept:
37 |             add_intercept = False
38 |             warn("Intercept is not identifiable in Cox model and won't be added.")
39 |         if AbstractDesignMatrix.is_cupy_matrix(X):
40 |             raise ValueError("cupy matrix not yet supported for the Cox model.")
41 |         event_time, censoring_time = outcome
42 |         event_time, censoring_time, X = CoxModel.preprocess_data(
43 |             event_time, censoring_time, X
44 |         )
45 | 
46 |     if AbstractDesignMatrix.is_cupy_dense(X):
47 |         raise ValueError("cupy not yet supported for a dense design matrix.")
48 |     is_sparse = sp.sparse.issparse(X) or SparseDesignMatrix.is_cupy_sparse(X)
49 |     DesignMatrix = SparseDesignMatrix if is_sparse else DenseDesignMatrix
50 |     design = DesignMatrix(
51 |         X, add_intercept=add_intercept, center_predictor=center_predictor
52 |     )
53 | 
54 |     if family == 'linear':
55 |         model = LinearModel(outcome, design)
56 |     elif family == 'logit':
57 |         if isinstance(outcome, tuple):
58 |             n_success, n_trial = outcome
59 |         else:
60 |             n_success = outcome
61 |             n_trial = None
62 |         model = LogisticModel(n_success, n_trial, design)
63 |     elif family == 'cox':
64 |         model = CoxModel(event_time, censoring_time, design)
65 |     else:
66 |         raise NotImplementedError()
67 | 
68 |     return model
69 | 


--------------------------------------------------------------------------------
/bayesbridge/reg_coef_sampler/hamiltonian_monte_carlo/dynamics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | 
 4 | 
 5 | """
 6 | Defines a (numerical) Hamiltonian dynamics based on a Gaussian momentum and the 
 7 | velocity Verlet integrator. The code is written so that other integrators & 
 8 | momentum distributions can also be employed straightwardly.
 9 | """
10 | 
11 | class HamiltonianDynamics():
12 | 
13 |     def __init__(self, mass=None):
14 |         """
15 |         Parameters
16 |         ----------
17 |         mass: None, numpy 1d array, or callable `mass(p, power)`
18 |             If callable, should return a vector obtained by multiplying the
19 |             vector p with matrix M ** power for power == -1 or power == 1/2.
20 |             The matrix L corresponding to M ** 1/2 only needs to satisfy L L' = M.
21 |             Passing M = None defaults to a dynamics with the identity mass matrix.
22 |         """
23 | 
24 |         if mass is None:
25 |             mass_operator = lambda p, power: p
26 |         elif isinstance(mass, np.ndarray):
27 |             sqrt_mass = np.sqrt(mass)
28 |             inv_mass = 1 / mass
29 |             def mass_operator(p, power):
30 |                 if power == -1:
31 |                     return inv_mass * p
32 |                 elif power == 1 / 2:
33 |                     return sqrt_mass * p
34 |         elif callable(mass):
35 |             mass_operator = mass
36 |         else:
37 |             raise ValueError("Unsupported type for the mass matrix.")
38 | 
39 |         self.integrator = velocity_verlet
40 |         self.momentum = GaussianMomentum(mass_operator)
41 | 
42 |     def integrate(self, f, dt, q, p, grad):
43 |         q, p, logp, grad \
44 |             = self.integrator(f, self.momentum.get_grad, dt, q, p, grad)
45 |         return q, p, logp, grad
46 | 
47 |     def draw_momentum(self, n_param):
48 |         return self.momentum.draw_random(n_param)
49 | 
50 |     def compute_hamiltonian(self, logp, p):
51 |         potential = - logp
52 |         kinetic = - self.momentum.get_logp(p)
53 |         return potential + kinetic
54 | 
55 |     def convert_to_velocity(self, p):
56 |         return - self.momentum.get_grad(p)
57 | 
58 | 
59 | def velocity_verlet(
60 |         get_position_logp_and_grad, get_momentum_grad, dt, q, p, position_grad
61 |     ):
62 |     p = p + 0.5 * dt * position_grad
63 |     q = q - dt * get_momentum_grad(p)
64 |     position_logp, position_grad = get_position_logp_and_grad(q)
65 |     if math.isfinite(position_logp):
66 |         p += 0.5 * dt * position_grad
67 |     return q, p, position_logp, position_grad
68 | 
69 | 
70 | class GaussianMomentum():
71 | 
72 |     def __init__(self, mass=None):
73 |         self.mass = mass
74 | 
75 |     def draw_random(self, n_param):
76 |         p = self.mass(np.random.randn(n_param), 1/2)
77 |         return p
78 | 
79 |     def get_grad(self, p):
80 |         return - self.mass(p, -1)
81 | 
82 |     def get_logp(self, p):
83 |         return - 0.5 * np.dot(p, self.mass(p, -1))


--------------------------------------------------------------------------------
/tests/test_design_matrix.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy as sp
 3 | import scipy.sparse
 4 | 
 5 | from bayesbridge.design_matrix import SparseDesignMatrix, DenseDesignMatrix
 6 | from simulate_data import simulate_design
 7 | 
 8 | atol = 10e-6
 9 | rtol = 10e-6
10 | 
11 | 
12 | def test_sparse_design_intercept_and_centering():
13 | 
14 |     n_obs, n_pred = (100, 10)
15 |     X = simulate_design(n_obs, n_pred, binary_frac=.5, format_='sparse')
16 |     X_design = SparseDesignMatrix(X, center_predictor=True, add_intercept=True)
17 |     X_ndarray = center_and_add_intercept(X.toarray())
18 |     w, v = (np.random.randn(size) for size in X_design.shape)
19 |     assert np.allclose(
20 |         X_design.dot(v), X_ndarray.dot(v), atol=atol, rtol=rtol
21 |     )
22 |     assert np.allclose(
23 |         X_design.Tdot(w), X_ndarray.T.dot(w), atol=atol, rtol=rtol
24 |     )
25 | 
26 | 
27 | def test_sparse_design_centered_fisher_info():
28 | 
29 |     n_obs, n_pred = (5, 3)
30 |     X = simulate_design(n_obs, n_pred, binary_frac=.5, format_='sparse')
31 |     X_design = SparseDesignMatrix(
32 |         X, center_predictor=True, add_intercept=True, copy_array=True
33 |     )
34 |     X_ndarray = center_and_add_intercept(X.toarray())
35 |     weight = np.random.exponential(size=n_obs)
36 |     benchmark_fisher_info = X_ndarray.T.dot(weight[:, np.newaxis] * X_ndarray)
37 |     assert np.allclose(
38 |         X_design.compute_fisher_info(weight),
39 |         benchmark_fisher_info,
40 |         atol=atol, rtol=rtol
41 |     )
42 |     assert np.allclose(
43 |         X_design.compute_fisher_info(weight, diag_only=True),
44 |         np.diag(benchmark_fisher_info),
45 |         atol=atol, rtol=rtol
46 |     )
47 | 
48 | 
49 | def test_dense_design_intercept_and_centering():
50 | 
51 |     n_obs, n_pred = (100, 10)
52 |     X = simulate_design(n_obs, n_pred, binary_frac=.5, format_='dense')
53 |     X_design = DenseDesignMatrix(X, center_predictor=True, add_intercept=True)
54 |     X_ndarray = center_and_add_intercept(X)
55 |     w, v = (np.random.randn(size) for size in X_design.shape)
56 |     assert np.allclose(
57 |         X_design.dot(v), X_ndarray.dot(v), atol=atol, rtol=rtol
58 |     )
59 |     assert np.allclose(
60 |         X_design.Tdot(w), X_ndarray.T.dot(w), atol=atol, rtol=rtol
61 |     )
62 | 
63 | 
64 | def center_and_add_intercept(X):
65 |     X -= X.mean(axis=0)[np.newaxis, :]
66 |     intercept_column = np.ones((X.shape[0], 1))
67 |     X = np.hstack((intercept_column, X))
68 |     return X
69 | 
70 | 
71 | def test_intercept_removal():
72 | 
73 |     n_obs, n_pred = (100, 10)
74 |     X = simulate_design(n_obs, n_pred, binary_frac=.5, format_='sparse')
75 |     X_with_const_col = sp.sparse.hstack([
76 |         np.ones((n_obs, 1)), X[:, :5], -.5 * np.ones((n_obs, 1)), X[:, 5:]
77 |     ]).tocsr()
78 |     assert np.allclose(
79 |         X.toarray(),
80 |         SparseDesignMatrix.remove_intercept_indicator(X_with_const_col).toarray()
81 |     )
82 |     assert np.allclose(
83 |         X.toarray(),
84 |         DenseDesignMatrix.remove_intercept_indicator(X_with_const_col.toarray())
85 |     )


--------------------------------------------------------------------------------
/tests/manual_tests/test_stable_distribution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import sys\n",
 10 |     "sys.path.append('../')\n",
 11 |     "sys.path.append('../../')\n",
 12 |     "\n",
 13 |     "from bayesbridge.random.tilted_stable_dist.rand_exp_tilted_stable \\\n",
 14 |     "    import ExpTiltedStableDist\n",
 15 |     "from random import normalvariate as norm_rv\n",
 16 |     "from math import sqrt, gamma\n",
 17 |     "import numpy as np\n",
 18 |     "\n",
 19 |     "import matplotlib.pyplot as plt\n",
 20 |     "%matplotlib inline"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "## Gibbs sample a powered-exponential distribution using the tilted stable sampler."
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "tilted_stable = ExpTiltedStableDist(seed=0)\n",
 37 |     "\n",
 38 |     "def powered_exp_dist_gibbs(n_sample, n_burnin=0, exponent=.5, scale=1.):\n",
 39 |     "    beta = 0.\n",
 40 |     "    samples = np.zeros(n_sample + n_burnin)\n",
 41 |     "    for i in range(n_burnin + n_sample):\n",
 42 |     "        lshrink_sq = .5 / tilted_stable.rv(exponent / 2, (beta / scale) ** 2)\n",
 43 |     "        lshrink = sqrt(lshrink_sq)\n",
 44 |     "        beta = lshrink * norm_rv(0., 1.)\n",
 45 |     "        samples[i] = beta\n",
 46 |     "    samples = samples[n_burnin:]\n",
 47 |     "    return samples\n",
 48 |     "\n",
 49 |     "def powered_exp_pdf(x, exponent, scale=1., normed=True):\n",
 50 |     "    pdf = np.exp(- np.abs(x / scale) ** exponent)\n",
 51 |     "    if normed:\n",
 52 |     "        pdf *= exponent / (2 * scale * gamma(exponent ** -1))\n",
 53 |     "    return pdf"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "n_sample = 10 ** 6\n",
 63 |     "n_burnin = 10 ** 3\n",
 64 |     "exponent = 1 / 2\n",
 65 |     "samples = powered_exp_dist_gibbs(n_sample, n_burnin, exponent)"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "plt.figure(figsize=(8, 5))\n",
 75 |     "plt.rcParams['font.size'] = 20\n",
 76 |     "\n",
 77 |     "plot_range = 25 * np.array([-1, 1])\n",
 78 |     "bins = np.linspace(plot_range[0], plot_range[1], 100)\n",
 79 |     "x = np.linspace(plot_range[0], plot_range[1], 10001)\n",
 80 |     "plt.hist(samples, bins=bins, density=True)\n",
 81 |     "plt.plot(x, powered_exp_pdf(x, exponent), 'tab:green')\n",
 82 |     "plt.show()"
 83 |    ]
 84 |   }
 85 |  ],
 86 |  "metadata": {
 87 |   "kernelspec": {
 88 |    "display_name": "Python 3",
 89 |    "language": "python",
 90 |    "name": "python3"
 91 |   },
 92 |   "language_info": {
 93 |    "codemirror_mode": {
 94 |     "name": "ipython",
 95 |     "version": 3
 96 |    },
 97 |    "file_extension": ".py",
 98 |    "mimetype": "text/x-python",
 99 |    "name": "python",
100 |    "nbconvert_exporter": "python",
101 |    "pygments_lexer": "ipython3",
102 |    "version": "3.7.3"
103 |   }
104 |  },
105 |  "nbformat": 4,
106 |  "nbformat_minor": 2
107 | }
108 | 


--------------------------------------------------------------------------------
/bayesbridge/design_matrix/abstract_matrix.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import numpy as np
  3 | import scipy as sp
  4 | import scipy.sparse
  5 | import warnings
  6 | try:
  7 |     import cupy as cp
  8 |     import cupyx as cpx
  9 | except (ImportError, ModuleNotFoundError) as e:
 10 |     cp = None
 11 |     cupy_exception = e
 12 | 
 13 | 
 14 | class AbstractDesignMatrix():
 15 | 
 16 |     def __init__(self):
 17 |         self.dot_count = 0
 18 |         self.Tdot_count = 0
 19 |         self.memoized = False
 20 |         self.X_dot_v = None # For memoization
 21 |         self.v_prev = None # For memoization
 22 | 
 23 |     @property
 24 |     @abc.abstractmethod
 25 |     def shape(self):
 26 |         pass
 27 | 
 28 |     @abc.abstractmethod
 29 |     def dot(self, v):
 30 |         pass
 31 | 
 32 |     @abc.abstractmethod
 33 |     def Tdot(self, v):
 34 |         """ Multiply by the transpose of the matrix. """
 35 |         pass
 36 | 
 37 |     @property
 38 |     @abc.abstractmethod
 39 |     def is_sparse(self):
 40 |         pass
 41 | 
 42 |     def memoize_dot(self, flag=True):
 43 |         self.memoized = flag
 44 |         if self.v_prev is None:
 45 |             self.v_prev = np.full(self.shape[1], float('nan'))
 46 |         if not flag:
 47 |             self.X_dot_v = None
 48 |             self.v_prev = None
 49 | 
 50 |     @abc.abstractmethod
 51 |     def compute_fisher_info(self, weight, diag_only):
 52 |         """ Computes X' diag(weight) X and returns it as a numpy array. """
 53 |         pass
 54 | 
 55 |     @abc.abstractmethod
 56 |     def compute_transposed_fisher_info(self, weight, include_intrcpt):
 57 |         """ Computes X diag(weight) X' and returns it as a numpy array, where
 58 |         `X` is a design matrix. """
 59 |         pass
 60 | 
 61 |     @property
 62 |     def n_matvec(self):
 63 |         return self.dot_count + self.Tdot_count
 64 | 
 65 |     def get_dot_count(self):
 66 |         return self.dot_count, self.Tdot_count
 67 | 
 68 |     def reset_matvec_count(self, count=0):
 69 |         if not hasattr(count, "__len__"):
 70 |             count = 2 * [count]
 71 |         self.dot_count = count[0]
 72 |         self.Tdot_count = count[1]
 73 | 
 74 |     @abc.abstractmethod
 75 |     def toarray(self):
 76 |         """ Returns a 2-dimensional numpy array. """
 77 |         pass
 78 | 
 79 |     @staticmethod
 80 |     def is_cupy_matrix(X):
 81 |         return AbstractDesignMatrix.is_cupy_dense(X) \
 82 |             or AbstractDesignMatrix.is_cupy_sparse(X)
 83 | 
 84 |     @staticmethod
 85 |     def is_cupy_dense(X):
 86 |         return (cp is not None) and isinstance(X, cp.ndarray)
 87 | 
 88 |     @staticmethod
 89 |     def is_cupy_sparse(X):
 90 |         return (cp is not None) and isinstance(X, cpx.scipy.sparse.spmatrix)
 91 | 
 92 |     @staticmethod
 93 |     def remove_intercept_indicator(X):
 94 |         squeeze, array, power = (cp.squeeze, cp.array, cp.power) if \
 95 |             AbstractDesignMatrix.is_cupy_sparse(X) else (np.squeeze, np.array, np.power)
 96 |         if sp.sparse.issparse(X) or AbstractDesignMatrix.is_cupy_sparse(X):
 97 |             col_variance = squeeze(array(X.power(2).mean(axis=0) - power(X.mean(axis=0), 2)))
 98 |         else:
 99 |             col_variance = np.var(X, axis=0)
100 |         has_zero_variance = (col_variance < X.shape[0] * 2 ** -52)
101 |         if np.any(has_zero_variance):
102 |             warnings.warn(
103 |                 "Intercept column (or numerically indistinguishable from "
104 |                 "such) detected. Do not add intercept manually. Removing...."
105 |             )
106 |             X = X[:, np.logical_not(has_zero_variance)]
107 |         return X


--------------------------------------------------------------------------------
/bayesbridge/random/tilted_stable/test_tilted_stable.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import sys\n",
 10 |     "sys.path.insert(0, '../')\n",
 11 |     "\n",
 12 |     "import numpy as np\n",
 13 |     "import matplotlib.pyplot as plt\n",
 14 |     "\n",
 15 |     "from tilted_stable import ExpTiltedStableDist"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import rpy2.robjects as robjects\n",
 25 |     "import rpy2.robjects.packages as rpackages\n",
 26 |     "\n",
 27 |     "try:\n",
 28 |     "    copula = rpackages.importr('copula')\n",
 29 |     "except:\n",
 30 |     "    utils = rpackages.importr('utils')\n",
 31 |     "    utils.install_packages('copula')\n",
 32 |     "    copula = rpackages.importr('copula')"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "## Cross check Python module outputs with R package\n",
 40 |     "Generating the half-million samples via an external R call takes about 45 sec on iMac 2015."
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "def comparison_hist(samples_1, samples_2, use_log_scale=False):\n",
 50 |     "    if use_log_scale:\n",
 51 |     "        samples_1 = np.log(samples_1)\n",
 52 |     "        samples_2 = np.log(samples_2)\n",
 53 |     "    x_max = max(samples_1.max(), samples_2.max())\n",
 54 |     "    x_min = min(samples_1.min(), samples_2.min())\n",
 55 |     "    bins = np.linspace(x_min, x_max, 51)\n",
 56 |     "    \n",
 57 |     "    plt.hist(samples_1, alpha=.5, bins=bins, density=True)\n",
 58 |     "    plt.hist(samples_2, alpha=.5, bins=bins, density=True)\n",
 59 |     "    for side in ['left', 'top', 'right']:\n",
 60 |     "        plt.gca().spines[side].set_visible(False)\n",
 61 |     "    plt.yticks([])"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "plt.figure(figsize=(14, 4.5))\n",
 71 |     "plt.rcParams['font.size'] = 20\n",
 72 |     "\n",
 73 |     "n_samples = 5 * 10 ** 5\n",
 74 |     "char_exponent = [1. / 32, 1. / 4]\n",
 75 |     "tilt = [.01, 100.]\n",
 76 |     "tilted_stable = ExpTiltedStableDist()\n",
 77 |     "\n",
 78 |     "for i in range(2):\n",
 79 |     "    \n",
 80 |     "    python_samples = tilted_stable.sample(\n",
 81 |     "        char_exponent[i], np.tile(tilt[i], n_samples)\n",
 82 |     "    )\n",
 83 |     "    R_samples = np.array([\n",
 84 |     "        copula.retstable(char_exponent[i], 1.0, tilt[i])[0]\n",
 85 |     "        for _ in range(n_samples)\n",
 86 |     "    ])\n",
 87 |     "    \n",
 88 |     "    plt.subplot(1, 2, i + 1)\n",
 89 |     "    comparison_hist(R_samples, python_samples, use_log_scale=True)\n",
 90 |     "    \n",
 91 |     "    plt.xlabel('log(tilted stable)')\n",
 92 |     "    if i == 0:\n",
 93 |     "        plt.legend(['from R package', 'from Python'], frameon=False)\n",
 94 |     "\n",
 95 |     "plt.show()"
 96 |    ]
 97 |   }
 98 |  ],
 99 |  "metadata": {
100 |   "kernelspec": {
101 |    "display_name": "Python 3",
102 |    "language": "python",
103 |    "name": "python3"
104 |   },
105 |   "language_info": {
106 |    "codemirror_mode": {
107 |     "name": "ipython",
108 |     "version": 3
109 |    },
110 |    "file_extension": ".py",
111 |    "mimetype": "text/x-python",
112 |    "name": "python",
113 |    "nbconvert_exporter": "python",
114 |    "pygments_lexer": "ipython3",
115 |    "version": "3.6.10"
116 |   }
117 |  },
118 |  "nbformat": 4,
119 |  "nbformat_minor": 4
120 | }
121 | 


--------------------------------------------------------------------------------
/tests/regression_tests/test_gibb.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy as sp
  3 | import scipy.sparse
  4 | import math
  5 | import sys
  6 | 
  7 | from bayesbridge import BayesBridge, RegressionModel, RegressionCoefPrior
  8 | from bayesbridge.model import LinearModel, LogisticModel, CoxModel
  9 | 
 10 | data_folder = 'saved_outputs'
 11 | test_combo = [
 12 |     ('linear', 'cg', 'dense', False),
 13 |     ('logit', 'cholesky', 'dense', False),
 14 |     ('logit', 'cholesky', 'dense', True),
 15 |     ('logit', 'cg', 'sparse', False),
 16 |     ('cox', 'hmc', 'sparse', False)
 17 | ]
 18 | 
 19 | def test_gibbs(request):
 20 | 
 21 |     test_dirname = request.fspath.dirname
 22 |     for model, sampling_method, matrix_format, restart_im_middle in test_combo:
 23 |         samples = run_gibbs(model, sampling_method, matrix_format, restart_im_middle)
 24 |         assert is_same_as_prev_output(samples, sampling_method, model, test_dirname)
 25 | 
 26 | def run_gibbs(model_type, sampling_method, matrix_format, restart_in_middle=False):
 27 | 
 28 |     n_burnin = 0
 29 |     n_post_burnin = 10
 30 |     thin = 1
 31 |     bridge_exponent = 0.25
 32 | 
 33 |     outcome, X = simulate_data(model_type, matrix_format)
 34 |     prior = RegressionCoefPrior(
 35 |         sd_for_intercept=2., regularizing_slab_size=1.,
 36 |         bridge_exponent=bridge_exponent
 37 |     )
 38 |     model = RegressionModel(outcome, X, model_type)
 39 |     bridge = BayesBridge(model, prior)
 40 | 
 41 |     if restart_in_middle:
 42 |         n_total_post_burnin = n_post_burnin
 43 |         n_post_burnin = math.ceil(n_total_post_burnin / 2)
 44 | 
 45 |     init = {
 46 |         'global_scale': 0.1,
 47 |         'local_scale': np.ones(X.shape[1]),
 48 |     }
 49 |     samples, mcmc_info = bridge.gibbs(
 50 |         n_burnin + n_post_burnin, n_burnin, init=init, thin=thin,
 51 |         coef_sampler_type=sampling_method, seed=0, params_to_save='all'
 52 |     )
 53 | 
 54 |     if restart_in_middle:
 55 |         reinit_bridge = BayesBridge(model, prior)
 56 |         samples, mcmc_info = reinit_bridge.gibbs_resume(
 57 |             mcmc_info, n_post_burnin, merge=True, prev_samples=samples
 58 |         )
 59 | 
 60 |     return samples
 61 | 
 62 | def simulate_data(model, matrix_format):
 63 | 
 64 |     np.random.seed(1)
 65 |     n = 100
 66 |     p = 50
 67 | 
 68 |     # True parameters
 69 |     sigma_true = 2
 70 |     beta_true = np.zeros(p)
 71 |     beta_true[:4] = 1
 72 |     beta_true[4:15] = 2 ** - np.linspace(0.0, 5, 11)
 73 | 
 74 |     X = np.random.randn(n, p)
 75 | 
 76 |     if model == 'linear':
 77 |         outcome = LinearModel.simulate_outcome(X, beta_true, sigma_true)
 78 |     elif model == 'logit':
 79 |         n_trial = np.ones(n, dtype=np.int32)
 80 |         n_success = LogisticModel.simulate_outcome(n_trial, X, beta_true)
 81 |         outcome = (n_success, n_trial)
 82 |     elif model == 'cox':
 83 |         outcome = CoxModel.simulate_outcome(X, beta_true)
 84 |     else:
 85 |         raise NotImplementedError()
 86 | 
 87 |     if matrix_format == 'sparse':
 88 |         X = sp.sparse.csr_matrix(X)
 89 | 
 90 |     return outcome, X
 91 | 
 92 | def load_data(sampling_method, model, test_dirname):
 93 |     filepath = '/'.join([
 94 |         test_dirname, data_folder, get_filename(sampling_method, model)
 95 |     ])
 96 |     return np.load(filepath)
 97 | 
 98 | def get_filename(sampling_method, model):
 99 |     return '_'.join([
100 |         model, sampling_method, 'samples.npy'
101 |     ])
102 | 
103 | def save_data(samples, sampling_method, model):
104 |     filepath = data_folder + '/' + get_filename(sampling_method, model)
105 |     np.save(filepath, samples['coef'][:, -1])
106 | 
107 | def is_same_as_prev_output(samples, sampling_method, model, test_dirname):
108 |     prev_sample = load_data(sampling_method, model, test_dirname)
109 |     return np.allclose(samples['coef'][:, -1], prev_sample, rtol=.001, atol=10e-6)
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     option = sys.argv[-1]
114 |     if option == 'update':
115 |         for model, sampling_method, matrix_format, restart_im_middle in test_combo:
116 |             samples = run_gibbs(model, sampling_method, matrix_format, restart_im_middle)
117 |             save_data(samples, sampling_method, model)


--------------------------------------------------------------------------------
/tests/manual_tests/test_global_scale_prior_hyperparam.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import math\n",
 10 |     "import numpy as np\n",
 11 |     "import matplotlib.pyplot as plt"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import sys\n",
 21 |     "sys.path.insert(0, '../../')\n",
 22 |     "\n",
 23 |     "from bayesbridge import BayesBridge"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "## Specify mean and sd in log10 scale and find the matching prior."
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "bridge_exponent = 1 / 8\n",
 40 |     "log10_mean = - 4.\n",
 41 |     "log10_sd = 1.\n",
 42 |     "gscale_parametrization = ['raw', 'coefficient'][1]"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "# Dummy data just to initialize BayesBridge\n",
 52 |     "y = np.random.randn(10)\n",
 53 |     "X = np.random.randn(10, 2)\n",
 54 |     "bridge = BayesBridge(y, X, global_scale_parametrization=gscale_parametrization)\n",
 55 |     "bridge.set_global_scale_prior(log10_mean, log10_sd, bridge_exponent)\n",
 56 |     "prior_param = bridge.prior_param['gscale_neg_power']"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "## Check that the prior indeed has the specified mean and sd."
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "shape = prior_param['shape']\n",
 73 |     "scale = prior_param['rate'] ** -1"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "n_sample = 10 ** 6\n",
 83 |     "samples = np.random.gamma(shape, scale, size=n_sample) ** (- 1 / bridge_exponent)\n",
 84 |     "if gscale_parametrization == 'coefficient':\n",
 85 |     "    samples *= bridge.compute_power_exp_ave_magnitude(bridge_exponent, 1.)\n",
 86 |     "log10_gscale_samples = np.log10(samples)\n",
 87 |     "\n",
 88 |     "mean_est = np.mean(log10_gscale_samples)\n",
 89 |     "sd_est = np.std(log10_gscale_samples)\n",
 90 |     "\n",
 91 |     "rtol = .01\n",
 92 |     "mean_is_close = abs((mean_est - log10_mean) / log10_mean) < rtol\n",
 93 |     "sd_is_close = abs((sd_est - log10_sd) / log10_sd) < rtol\n",
 94 |     "if mean_is_close and sd_is_close:\n",
 95 |     "    print(\"Monte Carlo estimates agree with theoretical values.\")\n",
 96 |     "else:\n",
 97 |     "    print(\"Warning! Monte Carlo estimates do NOT agree with theoretical values.\")"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "plt.figure(figsize=(7, 5))\n",
107 |     "plt.rcParams['font.size'] = 18\n",
108 |     "\n",
109 |     "plt.hist(\n",
110 |     "    log10_gscale_samples, bins=51, density=True,\n",
111 |     "    label='prior dist.'\n",
112 |     ")\n",
113 |     "plt.axvline(\n",
114 |     "    mean_est, linestyle='--', color='tab:orange',\n",
115 |     "    label='mean'\n",
116 |     ")\n",
117 |     "plt.axvline(\n",
118 |     "    mean_est + 2 * sd_est, linestyle='--', color='tab:olive',\n",
119 |     "    label=r'mean $\\pm$ 2 std'\n",
120 |     ")\n",
121 |     "plt.axvline(\n",
122 |     "    mean_est - 2 * sd_est, linestyle='--', color='tab:olive'\n",
123 |     ")\n",
124 |     "plt.xlabel(r'$\\log(\\tau)$')\n",
125 |     "plt.yticks([])\n",
126 |     "plt.legend(frameon=False)\n",
127 |     "plt.tight_layout()"
128 |    ]
129 |   }
130 |  ],
131 |  "metadata": {
132 |   "kernelspec": {
133 |    "display_name": "Python 3",
134 |    "language": "python",
135 |    "name": "python3"
136 |   },
137 |   "language_info": {
138 |    "codemirror_mode": {
139 |     "name": "ipython",
140 |     "version": 3
141 |    },
142 |    "file_extension": ".py",
143 |    "mimetype": "text/x-python",
144 |    "name": "python",
145 |    "nbconvert_exporter": "python",
146 |    "pygments_lexer": "ipython3",
147 |    "version": "3.6.10"
148 |   }
149 |  },
150 |  "nbformat": 4,
151 |  "nbformat_minor": 4
152 | }
153 | 


--------------------------------------------------------------------------------
/bayesbridge/reg_coef_sampler/reg_coef_posterior_summarizer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | class RegressionCoeffficientPosteriorSummarizer():
  4 | 
  5 |     def __init__(self, n_coef, n_unshrunk, regularizing_slab_size,
  6 |                  pc_summary_method='average'):
  7 |         self.n_unshrunk = n_unshrunk
  8 |         self.coef_scaled_summarizer = OntheflySummarizer(n_coef)
  9 |         self.slab_size = regularizing_slab_size
 10 |         self.pc_summarizer = DirectionSummarizer(pc_summary_method)
 11 | 
 12 |     def scale_coef(self, coef, gscale, lscale):
 13 |         coef_scaled = coef.copy()
 14 |         coef_scaled[self.n_unshrunk:] \
 15 |             /= self.compute_prior_scale(gscale, lscale)
 16 |         return coef_scaled
 17 | 
 18 |     def update(self, coef, gscale, lscale):
 19 |         coef_scaled = self.scale_coef(coef, gscale, lscale)
 20 |         self.coef_scaled_summarizer.update_stats(coef_scaled)
 21 | 
 22 |     def update_precond_hessian_pc(self, pc):
 23 |         self.pc_summarizer.update(pc)
 24 | 
 25 |     def extrapolate_coef_condmean(self, gscale, lscale):
 26 |         coef_condmean_guess = self.coef_scaled_summarizer.stats['mean'].copy()
 27 |         coef_condmean_guess[self.n_unshrunk:] \
 28 |             *= self.compute_prior_scale(gscale, lscale)
 29 |         return coef_condmean_guess
 30 | 
 31 |     def estimate_coef_precond_scale_sd(self):
 32 |         return self.coef_scaled_summarizer.estimate_post_sd()
 33 | 
 34 |     def estimate_precond_hessian_pc(self):
 35 |         return self.pc_summarizer.get_mean()
 36 | 
 37 |     def compute_prior_scale(self, gscale, lscale):
 38 |         """ Compute the regularized prior scale in a numerically stable way. """
 39 |         unreg_prior_scale = gscale * lscale
 40 |         return unreg_prior_scale \
 41 |                / np.sqrt(1 + (unreg_prior_scale / self.slab_size) ** 2)
 42 | 
 43 | class DirectionSummarizer():
 44 | 
 45 |     def __init__(self, summary_method):
 46 |         """
 47 |         Parameters
 48 |         ----------
 49 |         summary_method: str, {'average', 'previous'}
 50 |         """
 51 |         self.method = summary_method
 52 |         self.n_averaged = 0
 53 |         self.v = None
 54 | 
 55 |     def update(self, v):
 56 |         if self.n_averaged == 0 or self.method == 'previous':
 57 |             self.v = v
 58 |         else:
 59 |             v *= np.sign(np.inner(self.v, v))
 60 |             weight = 1 / (1 + self.n_averaged)
 61 |             self.v = weight * v + (1 - weight) * self.v
 62 |         self.n_averaged += 1
 63 | 
 64 |     def get_mean(self):
 65 |         return self.v
 66 | 
 67 | 
 68 | class OntheflySummarizer():
 69 |     """
 70 |     Carries out online updates of the mean, variance, and other statistics of a
 71 |     random sequence.
 72 |     """
 73 | 
 74 |     def __init__(self, n_param, sd_prior_samplesize=5):
 75 |         """
 76 | 
 77 |         Params
 78 |         ------
 79 |         init: dict
 80 |         sd_prior_samplesize: int
 81 |             Weight on the initial estimate of the posterior standard
 82 |             deviation; the estimate is treated as if it is an average of
 83 |             'prior_samplesize' previous values.
 84 |         """
 85 |         self.sd_prior_samplesize = sd_prior_samplesize
 86 |         self.sd_prior_guess = np.ones(n_param)
 87 |         self.n_averaged = 0
 88 |         self.stats = {
 89 |             'mean': np.zeros(n_param),
 90 |             'square': np.ones(n_param)
 91 |         }
 92 | 
 93 |     def update_stats(self, theta):
 94 | 
 95 |         weight = 1 / (1 + self.n_averaged)
 96 |         self.stats['mean'] = (
 97 |             weight * theta + (1 - weight) * self.stats['mean']
 98 |         )
 99 |         self.stats['square'] = (
100 |             weight * theta ** 2
101 |             + (1 - weight) * self.stats['square']
102 |         )
103 |         self.n_averaged += 1
104 | 
105 |     def estimate_post_sd(self):
106 | 
107 |         # TODO: implment Welford's algorithm for better numerical accuracy.
108 |         mean = self.stats['mean']
109 |         sec_moment = self.stats['square']
110 | 
111 |         if self.n_averaged > 1:
112 |             var_estimator = self.n_averaged / (self.n_averaged - 1) * (
113 |                 sec_moment - mean ** 2
114 |             )
115 |             estimator_weight = (self.n_averaged - 1) \
116 |                 / (self.n_averaged - 1 + self.sd_prior_samplesize)
117 |             sd_estimator = np.sqrt(
118 |                 estimator_weight * var_estimator \
119 |                 + (1 - estimator_weight) * self.sd_prior_guess ** 2
120 |             )
121 |         else:
122 |             sd_estimator = self.sd_prior_guess
123 | 
124 |         return sd_estimator


--------------------------------------------------------------------------------
/tests/test_likelihood_models.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import numpy.random
  3 | import scipy as sp
  4 | import scipy.sparse
  5 | from functools import partial
  6 | from .derivative_tester \
  7 |     import numerical_grad_is_close, numerical_direc_deriv_is_close
  8 | from .helper import simulate_data
  9 | from bayesbridge.model import LinearModel, LogisticModel, CoxModel
 10 | 
 11 | 
 12 | def test_linear_model_gradient_and_hessian():
 13 |     y, X, beta = simulate_data(model='linear', seed=0, return_design_mat=True)
 14 |     obs_prec = 1.
 15 |     linear_model = LinearModel(y, X)
 16 |     f = partial(linear_model.compute_loglik_and_gradient, obs_prec=obs_prec)
 17 |     hessian_matvec = linear_model.get_hessian_matvec_operator(beta, obs_prec)
 18 |     assert numerical_grad_is_close(f, beta)
 19 |     assert numerical_direc_deriv_is_close(f, beta, hessian_matvec, seed=0)
 20 | 
 21 | 
 22 | def test_logitstic_model_hessian_matvec():
 23 |     y, X, beta = simulate_data(model='logit', seed=0, return_design_mat=True)
 24 |     n_success, n_trial = y
 25 |     logit_model = LogisticModel(n_success, n_trial, X)
 26 |     f = logit_model.compute_loglik_and_gradient
 27 |     hessian_matvec = logit_model.get_hessian_matvec_operator(beta)
 28 |     assert numerical_direc_deriv_is_close(f, beta, hessian_matvec, seed=0)
 29 | 
 30 | 
 31 | def set_up_cox_model_test(seed=0):
 32 |     y, X, beta = simulate_data(model='cox', seed=seed, return_design_mat=True)
 33 |     event_order, censoring_time = y
 34 |     cox_model = CoxModel(event_order, censoring_time, X)
 35 |     return cox_model, beta
 36 | 
 37 | 
 38 | def test_cox_model_observation_reordering_and_risk_set_counting():
 39 | 
 40 |     event_time = np.array(
 41 |         [1, 5, np.inf, 2.5, 2.5, np.inf, 2]
 42 |     )
 43 |     censoring_time = np.array(
 44 |         [np.inf, np.inf, 3, np.inf, np.inf, 2, np.inf]
 45 |     )
 46 |     X = np.arange(len(event_time))[:, np.newaxis]
 47 |     event_time, censoring_time, X = \
 48 |         CoxModel._permute_observations_by_event_and_censoring_time(
 49 |             event_time, censoring_time, X
 50 |         )
 51 |     assert np.all(
 52 |         event_time == np.array([1, 2, 2.5, 2.5, 5, np.inf, np.inf])
 53 |     )
 54 |     assert np.all(
 55 |         censoring_time == np.array([np.inf, np.inf, np.inf, np.inf, np.inf, 3, 2])
 56 |     )
 57 |     assert np.all(X == np.array([0, 6, 3, 4, 1, 2, 5])[:, np.newaxis])
 58 | 
 59 |     cox_model = CoxModel(event_time, censoring_time, X)
 60 |     assert np.all(
 61 |         cox_model.risk_set_start_index == np.array([0, 1, 2, 2, 4])
 62 |     )
 63 | 
 64 |     n_censored_before_event = np.array([0, 0, 1, 1, 2])
 65 |     assert np.all(
 66 |         cox_model.risk_set_end_index \
 67 |             == len(event_time) - 1 - n_censored_before_event
 68 |     )
 69 |     assert np.all(
 70 |         cox_model.n_appearance_in_risk_set == np.array([1, 2, 4, 4, 5, 4, 2])
 71 |     ) # Tied events are both considered to be in the risk set.
 72 | 
 73 | 
 74 | def test_cox_model_drop_uninformative():
 75 |     event_time = np.array(
 76 |         [2, 4, np.inf, np.inf]
 77 |     )
 78 |     censoring_time = np.array(
 79 |         [np.inf, np.inf, 3, 1]
 80 |     )
 81 |     X = np.arange(4)[:, np.newaxis]
 82 |     event_time, censoring_time, X = \
 83 |         CoxModel._drop_uninformative_observations(event_time, censoring_time, X)
 84 |     assert np.all(event_time == np.array([2, 4, np.inf]))
 85 |     assert np.all(censoring_time == np.array([np.inf, np.inf, 3]))
 86 |     assert np.all(X == np.array([0, 1, 2])[:, np.newaxis])
 87 | 
 88 | 
 89 | def test_cox_model_sum_over_risk_set():
 90 |     arr = np.array([1, 3, 2])
 91 |     start_index = np.array([0, 1])
 92 |     end_index = np.array([2, 1])
 93 |     assert np.all(
 94 |         CoxModel._sum_over_start_end(arr, start_index, end_index) == np.array([6, 3])
 95 |     )
 96 | 
 97 | def text_cox_model_sum_over_events():
 98 | 
 99 |     cox_model, beta = set_up_cox_model_test()
100 |     _, hazard_increase, sum_over_risk_set \
101 |         = cox_model._compute_relative_hazard(beta)
102 |     hazard_matrix = cox_model._HazardMultinomialProbMatrix(
103 |         hazard_increase, sum_over_risk_set,
104 |         cox_model.risk_set_start_index,
105 |         cox_model.risk_set_end_index,
106 |         cox_model.n_appearance_in_risk_set
107 |     )
108 |     assert np.allclose(
109 |         hazard_matrix.sum_over_events(),
110 |         np.sum(hazard_matrix.compute_matrix(), 0)
111 |     )
112 | 
113 | 
114 | def test_cox_model_gradient():
115 |     cox_model, beta = set_up_cox_model_test()
116 |     f = cox_model.compute_loglik_and_gradient
117 |     assert numerical_grad_is_close(f, beta)
118 | 
119 | 
120 | def test_cox_model_hessian_matvec():
121 |     cox_model, beta = set_up_cox_model_test()
122 |     f = cox_model.compute_loglik_and_gradient
123 |     hessian_matvec = cox_model.get_hessian_matvec_operator(beta)
124 |     assert numerical_direc_deriv_is_close(f, beta, hessian_matvec, seed=0)


--------------------------------------------------------------------------------
/bayesbridge/model/logistic_model.py:
--------------------------------------------------------------------------------
  1 | from .abstract_model import AbstractModel
  2 | import numpy as np
  3 | import numpy.random
  4 | from warnings import warn
  5 | 
  6 | class LogisticModel(AbstractModel):
  7 | 
  8 |     # TODO: Python crushes during the Gibbs if n_success has the second
  9 |     # dimension (instead of being a vector). Add checks for the inputs.
 10 |     def __init__(self, n_success, n_trial, design):
 11 | 
 12 |         self.check_input_validity(n_success, n_trial, design)
 13 |         if n_trial is None:
 14 |             n_trial = np.ones(len(n_success))
 15 |             warn(
 16 |                 "The numbers of trials were not specified. The binary "
 17 |                 "outcome is assumed."
 18 |             )
 19 | 
 20 |         self.n_trial = n_trial.astype('float64')
 21 |         self.n_success = n_success.astype('float64')
 22 |         self.design = design
 23 |         self.name = 'logit'
 24 | 
 25 |     def check_input_validity(self, n_success, n_trial, design):
 26 | 
 27 |         if n_trial is None:
 28 |             if np.max(n_success) > 1:
 29 |                 raise ValueError(
 30 |                     "If not binary, the number of trials must be specified.")
 31 |             if not len(n_success) == design.shape[0]:
 32 |                 raise ValueError(
 33 |                     "Incompatible sizes of the outcome and design matrix."
 34 |                 )
 35 |             return # No need to check the rest for the default initialization.
 36 | 
 37 |         if not len(n_trial) == len(n_success) == design.shape[0]:
 38 |             raise ValueError(
 39 |                 "Incompatible sizes of the outcome vectors and design matrix."
 40 |             )
 41 | 
 42 |         if np.any(n_trial <= 0):
 43 |             raise ValueError("Number of trials must be strictly positive.")
 44 | 
 45 |         if np.any(n_success > n_trial):
 46 |             raise ValueError(
 47 |                 "Number of successes cannot be larger than that of trials.")
 48 | 
 49 |     def compute_loglik_and_gradient(self, beta, loglik_only=False):
 50 |         logit_prob = self.design.dot(beta)
 51 |         predicted_prob = LogisticModel.convert_to_probability_scale(logit_prob)
 52 |         loglik = np.sum(
 53 |             self.n_success * logit_prob \
 54 |             - self.n_trial * np.logaddexp(0, logit_prob)
 55 |         )
 56 |         if loglik_only:
 57 |             grad = None
 58 |         else:
 59 |             grad = self.design.Tdot(self.n_success - self.n_trial * predicted_prob)
 60 |         return loglik, grad
 61 | 
 62 |     def compute_hessian(self, beta):
 63 |         predicted_prob = LogisticModel.compute_predicted_prob(self.design, beta)
 64 |         weight = predicted_prob * (1 - predicted_prob)
 65 |         return - self.design.compute_fisher_info(weight)
 66 | 
 67 |     def get_hessian_matvec_operator(self, beta):
 68 |         predicted_prob = LogisticModel.compute_predicted_prob(self.design, beta)
 69 |         weight = predicted_prob * (1 - predicted_prob)
 70 |         hessian_op = lambda v: \
 71 |             - self.design.Tdot(self.n_trial * weight * self.design.dot(v))
 72 |         return hessian_op
 73 | 
 74 |     def calc_intercept_mle(self):
 75 |         binom_prob_mle = self.n_success.mean() / self.n_trial.mean()
 76 |         intercept = np.log(binom_prob_mle / (1 - binom_prob_mle))
 77 |         return intercept
 78 | 
 79 |     @staticmethod
 80 |     def compute_polya_gamma_mean(shape, tilt):
 81 |         min_magnitude = 1e-5
 82 |         pg_mean = shape.copy() / 2
 83 |         is_nonzero = (np.abs(tilt) > min_magnitude)
 84 |         pg_mean[is_nonzero] \
 85 |             *= 1 / tilt[is_nonzero] \
 86 |                * (np.exp(tilt[is_nonzero]) - 1) / (np.exp(tilt[is_nonzero]) + 1)
 87 |         return pg_mean
 88 | 
 89 |     @staticmethod
 90 |     def compute_predicted_prob(X, beta, truncate=False):
 91 |         logit_prob = X.dot(beta)
 92 |         return LogisticModel.convert_to_probability_scale(logit_prob, truncate)
 93 | 
 94 |     @staticmethod
 95 |     def convert_to_probability_scale(logit_prob, truncate=False):
 96 |         # The flag 'truncate == True' guarantees 0 < prob < 1.
 97 |         if truncate:
 98 |             upper_bd = 36.7  # approximately - log(2 ** -53)
 99 |             lower_bd = - 709  # approximately - log(2 ** 1023)
100 |             logit_prob[logit_prob > upper_bd] = upper_bd
101 |             logit_prob[logit_prob < lower_bd] = lower_bd
102 |         prob = 1 / (1 + np.exp(-logit_prob))
103 |         return prob
104 | 
105 |     @staticmethod
106 |     def simulate_outcome(n_trial, X, beta, seed=None):
107 |         """
108 |         Parameters
109 |         ----------
110 |         X : DesignMatrix, numpy/scipy matrix
111 |             Only needs to support the `dot()` operation
112 |         """
113 |         prob = LogisticModel.compute_predicted_prob(X, beta)
114 |         if seed is not None:
115 |             np.random.seed(seed)
116 |         y = np.random.binomial(n_trial, prob)
117 |         return y


--------------------------------------------------------------------------------
/bayesbridge/random/tilted_stable/compare_methods_speed.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import sys\n",
 10 |     "sys.path.insert(0, '../')\n",
 11 |     "\n",
 12 |     "import time\n",
 13 |     "import numpy as np\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "from tilted_stable import ExpTiltedStableDist"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "## Specify the range of parameters to run the samplers"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "bridge_exponent = 1. / 16\n",
 32 |     "divide_conquer_cost = 10 ** np.linspace(-1., 1., 101)\n",
 33 |     "\n",
 34 |     "char_exponent = bridge_exponent / 2\n",
 35 |     "tilt = divide_conquer_cost ** (1. / char_exponent)\n",
 36 |     "# For Bayesian bridge, tilt parameter is given by beta / global_scale\n",
 37 |     "tilt_power = tilt ** char_exponent"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "## Time the samplers at given parameter values"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "tilted_stable = ExpTiltedStableDist(seed=0)\n",
 54 |     "\n",
 55 |     "def time_method(char_exponent, tilt, method, n_rep=1000):\n",
 56 |     "    start = time.time()\n",
 57 |     "    tilted_stable.sample(\n",
 58 |     "        char_exponent * np.ones(n_rep), tilt * np.ones(n_rep), \n",
 59 |     "        method=method\n",
 60 |     "    );\n",
 61 |     "    elapsed = time.time() - start\n",
 62 |     "    return elapsed"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "n_repetition = 1000\n",
 72 |     "\n",
 73 |     "exec_time = {\n",
 74 |     "    method: \n",
 75 |     "        np.array([\n",
 76 |     "            time_method(char_exponent, tilt_i, method, n_repetition)\n",
 77 |     "            for tilt_i in tilt\n",
 78 |     "        ]) \n",
 79 |     "    for method in ['double-rejection', 'divide-conquer']\n",
 80 |     "}"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "## Plot the result"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "plt.figure(figsize=(7, 4.5))\n",
 97 |     "plt.rcParams['font.size'] = 18\n",
 98 |     "\n",
 99 |     "for method in ['double-rejection', 'divide-conquer']:\n",
100 |     "    plt.plot(tilt_power, exec_time[method] / n_repetition)\n",
101 |     "plt.xlabel('Cost of divide-conquer (= tilt ^ char-exponent)')\n",
102 |     "plt.ylabel('Sec. per sample')\n",
103 |     "plt.ticklabel_format(axis='y', scilimits=(0,0))\n",
104 |     "plt.ylim(bottom=0)\n",
105 |     "\n",
106 |     "for side in ['top', 'right']:\n",
107 |     "    plt.gca().spines[side].set_visible(False)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "## Run a basic check to confim the sampler chooses the faster method \n",
115 |     "Call the method with randomly generated tilting parameters. If the sampler correctly chooses the faster method for each parameter setting, then it should run faster than using one fixed method for all the parameter settings."
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "n_samples = 10 ** 3\n",
125 |     "\n",
126 |     "double_rejection_cost = 2.\n",
127 |     "divide_conquer_cost \\\n",
128 |     "    = double_rejection_cost * np.random.exponential(size=n_samples)\n",
129 |     "tilt = divide_conquer_cost ** (1. / char_exponent)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "tilted_stable.set_seed(0)\n",
139 |     "%timeit -n 100 tilted_stable.sample(char_exponent, tilt)\n",
140 |     "\n",
141 |     "tilted_stable.set_seed(0)\n",
142 |     "%timeit -n 100 tilted_stable.sample(char_exponent, tilt, 'double-rejection')\n",
143 |     "\n",
144 |     "tilted_stable.set_seed(0)\n",
145 |     "%timeit -n 100 tilted_stable.sample(char_exponent, tilt, 'divide-conquer')"
146 |    ]
147 |   }
148 |  ],
149 |  "metadata": {
150 |   "kernelspec": {
151 |    "display_name": "Python 3",
152 |    "language": "python",
153 |    "name": "python3"
154 |   },
155 |   "language_info": {
156 |    "codemirror_mode": {
157 |     "name": "ipython",
158 |     "version": 3
159 |    },
160 |    "file_extension": ".py",
161 |    "mimetype": "text/x-python",
162 |    "name": "python",
163 |    "nbconvert_exporter": "python",
164 |    "pygments_lexer": "ipython3",
165 |    "version": "3.7.7"
166 |   }
167 |  },
168 |  "nbformat": 4,
169 |  "nbformat_minor": 4
170 | }
171 | 


--------------------------------------------------------------------------------
/simulate_data.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import scipy as sp
  4 | import scipy.sparse
  5 | from bayesbridge.model import CoxModel
  6 | 
  7 | 
  8 | def simulate_outcome(X, beta, model, intercept=0., n_trial=None, seed=None):
  9 | 
 10 |     if seed is not None:
 11 |         np.random.seed(seed)
 12 | 
 13 |     if model == 'linear':
 14 |         sigma = 1.
 15 |         outcome = intercept + X.dot(beta) + sigma * np.random.randn(X.shape[0])
 16 |     elif model == 'logit':
 17 |         if n_trial is None:
 18 |             n_trial = np.ones(X.shape[0])
 19 |         prob = 1 / (1 + np.exp(- intercept - X.dot(beta)))
 20 |         n_success = np.random.binomial(n_trial.astype(np.int32), prob)
 21 |         outcome = (n_success, n_trial)
 22 |     elif model == 'cox':
 23 |         outcome = CoxModel.simulate_outcome(X, beta, censoring_frac=.5)
 24 |     else:
 25 |         raise NotImplementedError()
 26 | 
 27 |     return outcome
 28 | 
 29 | def simulate_design(
 30 |         n_obs, n_pred, binary_frac=0., categorical_frac=0.,
 31 |         corr_dense_design=False, binary_pred_freq=.1, n_category=5,
 32 |         shuffle_columns=False, seed=None, format_='sparse'
 33 |     ):
 34 | 
 35 |     if seed is not None:
 36 |         np.random.seed(seed)
 37 | 
 38 |     n_dense_pred = int(n_pred * (1 - binary_frac - categorical_frac))
 39 |     n_categorical_pred = int((n_pred * categorical_frac) / (n_category - 1))
 40 |     n_binary_pred = n_pred - n_dense_pred - n_categorical_pred * (n_category - 1)
 41 | 
 42 |     X_dense = simulate_dense_design(n_obs, n_dense_pred, corr_dense_design)
 43 | 
 44 |     if n_binary_pred + n_categorical_pred == 0:
 45 |         X = X_dense
 46 |     else:
 47 |         X_binary = simulate_binary_design(n_obs, n_binary_pred, binary_pred_freq)
 48 |         X_categorical = simulate_categorical_design(
 49 |             n_obs, n_categorical_pred, n_category
 50 |         )
 51 |         X = sp.sparse.hstack((
 52 |             sp.sparse.csr_matrix(X_dense), X_binary, X_categorical
 53 |         )).tocsr()
 54 | 
 55 |     if shuffle_columns:
 56 |         X = X[:, np.random.permutation(n_pred)]
 57 | 
 58 |     if format_ == 'sparse':
 59 |         X = sp.sparse.csr_matrix(X)
 60 |     elif sp.sparse.issparse(X):
 61 |         X = X.toarray()
 62 | 
 63 |     return X
 64 | 
 65 | def simulate_dense_design(n_obs, n_pred, corr_design, standardize=False):
 66 |     if corr_design:
 67 |         X = generate_corr_design(n_obs, n_pred)
 68 |     else:
 69 |         X = np.random.randn(n_obs, n_pred)
 70 |     if standardize:
 71 |         X = np_standardize(X)
 72 |     return X
 73 | 
 74 | def np_standardize(X, divide_by='std'):
 75 |     X = X - np.mean(X, axis=0)[np.newaxis, :]
 76 |     if divide_by == 'max':
 77 |         X = X / np.max(X, axis=0)[np.newaxis, :]
 78 |     else:
 79 |         X = X / np.std(X, axis=0)[np.newaxis, :]
 80 |     return X
 81 | 
 82 | def generate_corr_design(n_obs, n_pred, n_factor=None, max_sd=100, min_sd=1):
 83 |     """
 84 |     Each column is drawn from a Gaussian with a covariance proportional to
 85 |         I + F L F'
 86 |     where F is an orthogonal matrix of size p by n_factor and L is diagonal.
 87 |     """
 88 |     if n_factor is None:
 89 |         n_factor = min(100, int(n_pred / 2))
 90 |     factor, _ = np.linalg.qr(np.random.randn(n_pred, n_factor))
 91 |     principal_comp_sd = np.linspace(max_sd, min_sd, n_factor + 1)
 92 |     loading = principal_comp_sd[:n_factor] - min_sd
 93 |     X = np.dot(
 94 |         factor,
 95 |         loading[:, np.newaxis] * np.random.randn(n_factor, n_obs)
 96 |     ).T
 97 |     X += min_sd * np.random.randn(n_obs, n_pred)
 98 |     return X
 99 | 
100 | def simulate_binary_design(n_obs, n_binary_pred, sparsity, max_freq_per_col=.5):
101 |     """
102 |     Returns a binary matrix where the non-zero frequency (on average) equals
103 |     the value of 'sparsity'. Also, the non-zero frequency along each column is
104 |     bounded by 'max_freq_per_col'.
105 |     """
106 |     if n_binary_pred == 0:
107 |         return None
108 | 
109 |     a = .5
110 |     b = a * (max_freq_per_col / sparsity - 1)
111 |         # Solve a / (a + b) = sparsity / max_freq_per_col for 'b'.
112 |     binary_freq = max_freq_per_col * np.random.beta(a, b, n_binary_pred)
113 |     X = np.zeros((n_obs, n_binary_pred))
114 |     for j in range(n_binary_pred):
115 |         nnz = math.ceil(n_obs * binary_freq[j])
116 |         X[np.random.choice(n_obs, nnz, replace=False), j] = 1.
117 |     return X
118 | 
119 | def simulate_categorical_design(n_obs, n_categorical_pred, n_category=5):
120 |     if n_categorical_pred == 0:
121 |         return None
122 | 
123 |     X = sp.sparse.hstack([
124 |         sp.sparse.csr_matrix(draw_categorical_pred(n_obs, n_category))
125 |         for dummy in range(n_categorical_pred)
126 |     ])
127 |     return X
128 | 
129 | def draw_categorical_pred(n_obs, n_category):
130 |     # Returns a matrix of size n by (n_category - 1).
131 |     category_freq = np.random.dirichlet(np.ones(n_category))
132 |     category_freq = np.sort(category_freq)[::-1][1:]
133 |         # Use the most frequent category as baseline
134 |     n_within_category = np.concatenate((
135 |         [0], np.floor(n_obs * np.cumsum(category_freq))
136 |     )).astype(np.int)
137 |     X = np.zeros((n_obs, n_category - 1))
138 |     for j in range(n_category - 1):
139 |         start = n_within_category[j]
140 |         end = n_within_category[j + 1]
141 |         X[start:end, j] = 1
142 |     X = X[np.random.permutation(n_obs), :]
143 |     return X
144 | 


--------------------------------------------------------------------------------
/tests/test_prior.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from .helper import simulate_data
  3 | from bayesbridge.model import LinearModel, LogisticModel, CoxModel
  4 | from bayesbridge import BayesBridge, RegressionModel, RegressionCoefPrior
  5 | 
  6 | 
  7 | def test_clone():
  8 | 
  9 |     kwargs = {
 10 |         'bridge_exponent': 1. / 8,
 11 |         'n_fixed_effect': 1,
 12 |         'sd_for_fixed_effect': 1.11,
 13 |         'regularizing_slab_size': 2.22,
 14 |         'global_scale_prior_hyper_param': {'log10_mean': - 4., 'log10_sd': 1.}
 15 |     }
 16 | 
 17 |     prior = RegressionCoefPrior(**kwargs)
 18 | 
 19 |     changed_kw = {
 20 |         'n_fixed_effect': 3,
 21 |         'global_scale_prior_hyper_param': {'log10_mean': - 6., 'log10_sd': 1.5}
 22 |     }
 23 |     kwargs_alt = kwargs.copy()
 24 |     for key, val in changed_kw.items():
 25 |         kwargs_alt[key] = val
 26 |     cloned = prior.clone(**changed_kw)
 27 |     changed_prior = RegressionCoefPrior(**kwargs_alt)
 28 | 
 29 |     assert np.all(
 30 |         cloned.__dict__.pop('sd_for_fixed')
 31 |         == changed_prior.__dict__.pop('sd_for_fixed')
 32 |     )
 33 |     assert cloned.__dict__ == changed_prior.__dict__
 34 | 
 35 | 
 36 | def test_gscale_parametrization():
 37 |     """ Check that the Gamma hyper-parameters do not depend on parametrization. """
 38 | 
 39 |     gscale_hyper_param = {'log10_mean': - 4., 'log10_sd': 1.}
 40 |     bridge_exp = .25
 41 | 
 42 |     prior_coef_scale = RegressionCoefPrior(
 43 |         bridge_exponent=bridge_exp,
 44 |         global_scale_prior_hyper_param=gscale_hyper_param,
 45 |         _global_scale_parametrization='coef_magnitude'
 46 |     )
 47 | 
 48 |     unit_bridge_magnitude \
 49 |         = RegressionCoefPrior.compute_power_exp_ave_magnitude(bridge_exp)
 50 |     gscale_hyper_param['log10_mean'] -= np.log10(unit_bridge_magnitude)
 51 |     prior_raw_scale = RegressionCoefPrior(
 52 |         bridge_exponent=bridge_exp,
 53 |         global_scale_prior_hyper_param=gscale_hyper_param,
 54 |         _global_scale_parametrization='raw'
 55 |     )
 56 |     assert (
 57 |         prior_coef_scale.param['gscale_neg_power'] == prior_raw_scale.param['gscale_neg_power']
 58 |     )
 59 | 
 60 | 
 61 | def test_gscale_paramet_invariance():
 62 |     """ Check sampler outputs are invariant under global scale parametrization. """
 63 | 
 64 |     y, X, beta = simulate_data(model='logit', seed=0)
 65 |     model = RegressionModel(y, X, family='logit')
 66 |     bridge_exp = .25
 67 |     bridge_magnitude \
 68 |         = RegressionCoefPrior.compute_power_exp_ave_magnitude(bridge_exp)
 69 |     init_gscale = 0.1
 70 |     init_lscale = np.ones(X.shape[1])
 71 |     init_raw_gscale = init_gscale / bridge_magnitude
 72 |     init_raw_lscale = bridge_magnitude * init_lscale
 73 |     init = {
 74 |         'global_scale': init_gscale,
 75 |         'local_scale': init_lscale
 76 |     }
 77 |     raw_init = {
 78 |         'global_scale': init_raw_gscale,
 79 |         'local_scale': init_raw_lscale
 80 |     }
 81 | 
 82 |     # Two samples should agree since the default prior is scale invariant.
 83 |     prior = RegressionCoefPrior(
 84 |         bridge_exponent=bridge_exp,
 85 |         regularizing_slab_size=1.,
 86 |         _global_scale_parametrization='raw'
 87 |     )
 88 |     bridge = BayesBridge(model, prior)
 89 |     coef_sample_raw_scaling = get_last_sample_from_gibbs(bridge, raw_init)
 90 | 
 91 |     prior = RegressionCoefPrior(
 92 |         bridge_exponent=bridge_exp,
 93 |         regularizing_slab_size=1.,
 94 |         _global_scale_parametrization='coef_magnitude'
 95 |     )
 96 |     bridge = BayesBridge(model, prior)
 97 |     coef_sample_expected_mag_scaling = get_last_sample_from_gibbs(bridge, init)
 98 | 
 99 |     assert np.allclose(
100 |         coef_sample_raw_scaling,
101 |         coef_sample_expected_mag_scaling,
102 |         rtol=1e-10
103 |     )
104 | 
105 |     # Place a prior on the global scale; the two samples should *not* coincide.
106 | 
107 |     gscale_hyper_param = {
108 |         'log10_mean': -2. - np.log10(bridge_magnitude),
109 |         'log10_sd': 1.,
110 |     }
111 |     prior = RegressionCoefPrior(
112 |         bridge_exponent=bridge_exp,
113 |         regularizing_slab_size=1.,
114 |         global_scale_prior_hyper_param=gscale_hyper_param,
115 |         _global_scale_parametrization='raw'
116 |     )
117 |     bridge = BayesBridge(model, prior)
118 |     coef_sample_raw_scaling \
119 |         = get_last_sample_from_gibbs(bridge, raw_init)
120 | 
121 |     prior = RegressionCoefPrior(
122 |         bridge_exponent=bridge_exp,
123 |         regularizing_slab_size=1.,
124 |         global_scale_prior_hyper_param=gscale_hyper_param,
125 |         _global_scale_parametrization='coef_magnitude'
126 |     )
127 |     bridge = BayesBridge(model, prior)
128 |     coef_sample_expected_mag_scaling \
129 |         = get_last_sample_from_gibbs(bridge, init)
130 | 
131 |     assert not np.allclose(
132 |         coef_sample_raw_scaling,
133 |         coef_sample_expected_mag_scaling,
134 |         rtol=1e-10
135 |     )
136 | 
137 |     # After appropriately adjusting the hyper-parameter, the two samples
138 |     # should agree.
139 |     gscale_hyper_param['log10_mean'] += np.log10(bridge_magnitude)
140 |     prior = prior.clone(global_scale_prior_hyper_param=gscale_hyper_param)
141 |     bridge = BayesBridge(model, prior)
142 |     coef_sample_expected_mag_scaling \
143 |         = get_last_sample_from_gibbs(bridge, init)
144 | 
145 |     assert np.allclose(
146 |         coef_sample_raw_scaling,
147 |         coef_sample_expected_mag_scaling,
148 |         rtol=1e-10
149 |     )
150 | 
151 | 
152 | def get_last_sample_from_gibbs(bridge, init, seed=0):
153 |     samples, _ = bridge.gibbs(
154 |         n_iter=10, n_burnin=0, init=init,
155 |         coef_sampler_type='cholesky',
156 |         seed=seed, n_status_update=0
157 |     )
158 |     return samples['coef'][:, -1]


--------------------------------------------------------------------------------
/bayesbridge/reg_coef_sampler/cg_sampler.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy as sp
  3 | import scipy.sparse
  4 | import scipy.linalg
  5 | from warnings import warn
  6 | 
  7 | try:
  8 |     import cupyx.scipy.sparse.linalg
  9 |     import cupyx as cpx
 10 |     import cupy as cp
 11 | except (ImportError, ModuleNotFoundError) as e:
 12 |     cp = None
 13 |     cupy_exception = e
 14 | 
 15 | class ConjugateGradientSampler():
 16 | 
 17 |     def __init__(self, n_coef_wo_shrinkage):
 18 |         self.n_coef_wo_shrinkage = n_coef_wo_shrinkage
 19 | 
 20 |     def sample(
 21 |             self, design, obs_prec, prior_prec_sqrt, z,
 22 |             coef_cg_init=None, precond_by='prior', coef_scaled_sd=None,
 23 |             maxiter=None, atol=10e-6, seed=None):
 24 |         """
 25 |         Generate a multi-variate Gaussian with mean mu and covariance Sigma of the form
 26 |             mu = Sigma z,
 27 |             Sigma^{-1} = X' diag(obs_prec) X + prior_prec_sqrt ** 2,
 28 |         For numerical stability, the code first sample from the scaled parameter
 29 |         coef / precond_scale.
 30 | 
 31 |         Param:
 32 |         ------
 33 |         D : vector
 34 |         atol : float
 35 |             The absolute tolerance on the residual norm at the termination
 36 |             of CG iterations.
 37 |         coef_scaled_sd : vector of length design.shape[1]
 38 |             Used to estimate a good preconditioning scale for the coefficient
 39 |             without shrinkage. Used only if precond_by == 'prior'.
 40 |         precond_by : {'prior', 'diag'}
 41 |         """
 42 |         if design.use_cupy:
 43 |             coef_cg_init = cp.asarray(coef_cg_init)
 44 |             coef_scaled_sd = cp.asarray(coef_scaled_sd)
 45 |             prior_prec_sqrt = cp.asarray(prior_prec_sqrt)
 46 |             cg = cpx.scipy.sparse.linalg.cg
 47 |             LinearOperator = cpx.scipy.sparse.linalg.LinearOperator
 48 |         else:
 49 |             cg = sp.sparse.linalg.cg
 50 |             LinearOperator = sp.sparse.linalg.LinearOperator
 51 |         if seed is not None:
 52 |             np.random.seed(seed)
 53 | 
 54 |         # Define a preconditioned linear operator.
 55 |         Prec_precond_op, precond_scale = \
 56 |             self.precondition_linear_system(
 57 |                 prior_prec_sqrt, obs_prec, design, precond_by, coef_scaled_sd, LinearOperator
 58 |             )
 59 | 
 60 |         # Draw a target vector.
 61 |         randn_vec_1 = np.random.randn(design.shape[0])
 62 |         randn_vec_2 = np.random.randn(design.shape[1])
 63 |         if design.use_cupy:
 64 |             randn_vec_1 = cp.asarray(randn_vec_1)
 65 |             randn_vec_2 = cp.asarray(randn_vec_2)
 66 |         v = design.Tdot(obs_prec ** (1 / 2) * randn_vec_1) \
 67 |             + prior_prec_sqrt * randn_vec_2
 68 |         b = precond_scale * (z + v)
 69 | 
 70 |         # Callback function to count the number of PCG iterations.
 71 |         cg_info = {'n_iter': 0}
 72 |         def cg_callback(x): cg_info['n_iter'] += 1
 73 | 
 74 |         # Run PCG.
 75 |         rtol = atol / np.linalg.norm(b)
 76 |         coef_scaled_cg_init = coef_cg_init / precond_scale
 77 |         coef_scaled, info = cg(
 78 |             Prec_precond_op, b, x0=coef_scaled_cg_init, maxiter=maxiter, tol=rtol,
 79 |             callback=cg_callback
 80 |         )
 81 | 
 82 |         if info != 0:
 83 |             warn(
 84 |                 "The conjugate gradient algorithm did not achieve the requested " +
 85 |                 "tolerance level. You may increase the maxiter or use the dense " +
 86 |                 "linear algebra instead."
 87 |             )
 88 | 
 89 |         coef = precond_scale * coef_scaled
 90 |         cg_info['valid_input'] = (info >= 0)
 91 |         cg_info['converged'] = (info == 0)
 92 |         if design.use_cupy:
 93 |             coef = cp.asnumpy(coef)
 94 |         return coef, cg_info
 95 | 
 96 |     def precondition_linear_system(
 97 |             self, prior_prec_sqrt, obs_prec, design, precond_by, coef_scaled_sd, LinearOperator):
 98 | 
 99 |         # Compute the preconditioners.
100 |         precond_scale = self.choose_preconditioner(
101 |             prior_prec_sqrt, obs_prec, design, precond_by, coef_scaled_sd
102 |         )
103 | 
104 |         # Define a preconditioned linear operator.
105 |         precond_prior_prec = (precond_scale * prior_prec_sqrt) ** 2
106 |         def Prec_precond(x):
107 |             Prec_precond_x = precond_prior_prec * x \
108 |                 + precond_scale * design.Tdot(obs_prec * design.dot(precond_scale * x))
109 |             return Prec_precond_x
110 |         Prec_precond_op = LinearOperator(
111 |             (design.shape[1], design.shape[1]), matvec=Prec_precond
112 |         )
113 |         return Prec_precond_op, precond_scale
114 | 
115 |     def choose_preconditioner(
116 |             self, prior_prec_sqrt, obs_prec, design, precond_by, beta_scaled_sd):
117 | 
118 |         precond_scale = self.choose_diag_preconditioner(
119 |             prior_prec_sqrt, obs_prec, design, precond_by, beta_scaled_sd)
120 | 
121 |         return precond_scale
122 | 
123 |     def choose_diag_preconditioner(
124 |             self, prior_prec_sqrt, obs_prec, design, precond_by='diag',
125 |             beta_scaled_sd=None):
126 |         # Compute the diagonal (sqrt) preconditioner.
127 | 
128 |         if precond_by == 'prior':
129 |             precond_scale = cp.ones(len(prior_prec_sqrt)) if design.use_cupy \
130 |                 else np.ones(len(prior_prec_sqrt))
131 |             precond_scale[self.n_coef_wo_shrinkage:] = \
132 |                 prior_prec_sqrt[self.n_coef_wo_shrinkage:] ** -1
133 |             if self.n_coef_wo_shrinkage > 0:
134 |                 target_sd_scale = 2.
135 |                     # Larger than 1 because it is better to err on the side
136 |                     # of introducing large precisions.
137 |                 precond_scale[:self.n_coef_wo_shrinkage] = \
138 |                     target_sd_scale * beta_scaled_sd[:self.n_coef_wo_shrinkage]
139 | 
140 |         elif precond_by == 'diag':
141 |             diag = prior_prec_sqrt ** 2 \
142 |                 + design.compute_fisher_info(weight=obs_prec, diag_only=True)
143 |             precond_scale = 1 / np.sqrt(diag)
144 | 
145 |         elif precond_by is None:
146 |             precond_scale = np.ones(design.shape[1])
147 | 
148 |         else:
149 |             raise NotImplementedError()
150 | 
151 |         return precond_scale


--------------------------------------------------------------------------------
/bayesbridge/reg_coef_sampler/hamiltonian_monte_carlo/hmc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | import time
  4 | from .stepsize_adapter import HamiltonianBasedStepsizeAdapter, initialize_stepsize
  5 | from .util import warn_message_only
  6 | from .dynamics import HamiltonianDynamics
  7 | 
  8 | 
  9 | dynamics = HamiltonianDynamics()
 10 | integrator = dynamics.integrate
 11 | compute_hamiltonian = dynamics.compute_hamiltonian
 12 | draw_momentum = dynamics.draw_momentum
 13 | 
 14 | 
 15 | def generate_samples(
 16 |         f, q0, n_burnin, n_sample, nstep_range, dt_range=None,
 17 |         seed=None, n_update=0, adapt_stepsize=False, target_accept_prob=.9,
 18 |         final_adaptsize=.05):
 19 |     """ Run HMC and return samples and some additional info. """
 20 | 
 21 |     if seed is not None:
 22 |         np.random.seed(seed)
 23 | 
 24 |     q = q0
 25 |     logp, grad = f(q)
 26 | 
 27 |     if np.isscalar(dt_range):
 28 |         dt_range = np.array(2 * [dt_range])
 29 | 
 30 |     elif dt_range is None:
 31 |         p = draw_momentum(len(q))
 32 |         logp_joint0 = - compute_hamiltonian(logp, p)
 33 |         dt = initialize_stepsize(
 34 |             lambda dt: compute_onestep_accept_prob(dt, f, q, p, grad, logp_joint0)
 35 |         )
 36 |         dt_range = dt * np.array([.8, 1.0])
 37 |         adapt_stepsize = True
 38 | 
 39 |     if np.isscalar(nstep_range):
 40 |         nstep_range = np.array(2 * [nstep_range])
 41 | 
 42 |     max_stepsize_adapter = HamiltonianBasedStepsizeAdapter(
 43 |         init_stepsize=1., target_accept_prob=target_accept_prob,
 44 |         reference_iteration=n_burnin, adaptsize_at_reference=final_adaptsize
 45 |     )
 46 | 
 47 |     if n_update > 0:
 48 |         n_per_update = math.ceil((n_burnin + n_sample) / n_update)
 49 |     else:
 50 |         n_per_update = float('inf')
 51 | 
 52 |     samples = np.zeros((len(q), n_sample + n_burnin))
 53 |     logp_samples = np.zeros(n_sample + n_burnin)
 54 |     accept_prob = np.zeros(n_sample + n_burnin)
 55 | 
 56 |     tic = time.time()  # Start clock
 57 |     use_averaged_stepsize = False
 58 |     for i in range(n_sample + n_burnin):
 59 |         dt = np.random.uniform(dt_range[0], dt_range[1])
 60 |         dt *= max_stepsize_adapter.get_current_stepsize(use_averaged_stepsize)
 61 |         nstep = np.random.randint(nstep_range[0], nstep_range[1] + 1)
 62 |         q, info = generate_next_state(
 63 |             f, dt, nstep, q, logp0=logp, grad0=grad
 64 |         )
 65 |         logp, grad, pathlen, accept_prob[i] = (
 66 |             info[key] for key in ['logp', 'grad', 'n_grad_evals', 'accept_prob']
 67 |         )
 68 |         if i < n_burnin and adapt_stepsize:
 69 |             max_stepsize_adapter.adapt_stepsize(info['hamiltonian_error'])
 70 |         elif i == n_burnin - 1:
 71 |             use_averaged_stepsize = True
 72 |         samples[:, i] = q
 73 |         logp_samples[i] = logp
 74 |         if (i + 1) % n_per_update == 0:
 75 |             print('{:d} iterations have been completed.'.format(i + 1))
 76 | 
 77 |     toc = time.time()
 78 |     time_elapsed = toc - tic
 79 | 
 80 |     return samples, logp_samples, accept_prob, time_elapsed
 81 | 
 82 | 
 83 | def compute_onestep_accept_prob(dt, f, q0, p0, grad0, logp_joint0):
 84 |     _, p, logp, _ = integrator(f, dt, q0, p0, grad0)
 85 |     logp_joint = - compute_hamiltonian(logp, p)
 86 |     accept_prob = np.exp(logp_joint - logp_joint0)
 87 |     return accept_prob
 88 | 
 89 | 
 90 | def generate_next_state(
 91 |         f, dt, n_step, q0,
 92 |         p0=None, logp0=None, grad0=None, hamiltonian_tol=100.):
 93 | 
 94 |     n_grad_evals = 0
 95 | 
 96 |     if (logp0 is None) or (grad0 is None):
 97 |         logp0, grad0 = f(q0)
 98 |         n_grad_evals += 1
 99 | 
100 |     if p0 is None:
101 |         p0 = draw_momentum(len(q0))
102 | 
103 |     log_joint0 = - compute_hamiltonian(logp0, p0)
104 | 
105 |     q, p, logp, grad, simulation_info = simulate_dynamics(
106 |         f, dt, n_step, q0, p0, logp0, grad0, hamiltonian_tol
107 |     )
108 |     n_grad_evals += simulation_info['n_grad_evals']
109 |     instability_detected = simulation_info['instability_detected']
110 | 
111 |     if instability_detected:
112 |         acceptprob = 0.
113 |         hamiltonian_error = - float('inf')
114 |     else:
115 |         log_joint = - compute_hamiltonian(logp, p)
116 |         hamiltonian_error = log_joint - log_joint0
117 |         acceptprob = min(1, np.exp(hamiltonian_error))
118 | 
119 |     accepted = acceptprob > np.random.rand()
120 |     if not accepted:
121 |         q = q0
122 |         logp = logp0
123 |         grad = grad0
124 | 
125 |     info = {
126 |         'logp': logp,
127 |         'grad': grad,
128 |         'accepted': accepted,
129 |         'accept_prob': acceptprob,
130 |         'hamiltonian_error': hamiltonian_error,
131 |         'instability_detected': instability_detected,
132 |         'n_grad_evals': n_grad_evals
133 |     }
134 | 
135 |     return q, info
136 | 
137 | 
138 | def simulate_dynamics(f, dt, n_step, q0, p0, logp0, grad0, hamiltonian_tol=float('inf')):
139 | 
140 |     n_grad_evals = 0
141 |     instability_detected = False
142 | 
143 |     # Keep track of Hamiltonians along the trajectory.
144 |     hamiltonians = np.full(n_step + 1, float('nan'))
145 |     hamiltonian = compute_hamiltonian(logp0, p0)
146 |     hamiltonians[0] = hamiltonian
147 |     min_h, max_h = 2 * [hamiltonian]
148 | 
149 |     q, p, logp, grad = q0, p0, logp0, grad0
150 |     if n_step == 0:
151 |         warn_message_only("The number of integration steps was set to be 0.")
152 | 
153 |     for i in range(n_step):
154 |         q, p, logp, grad \
155 |             = integrator(f, dt, q, p, grad)
156 |         hamiltonian = compute_hamiltonian(logp, p)
157 |         hamiltonians[i + 1] = hamiltonian
158 |         min_h, max_h = update_running_minmax(min_h, max_h, hamiltonian)
159 |         n_grad_evals += 1
160 |         instability_detected \
161 |             = math.isinf(logp) or (max_h - min_h) > hamiltonian_tol
162 |         if instability_detected:
163 |             warn_message_only(
164 |                 "Numerical integration became unstable while simulating the "
165 |                 "HMC trajectory."
166 |             )
167 |             break
168 | 
169 |     info = {
170 |         'energy_trajectory': hamiltonians,
171 |         'n_grad_evals': n_grad_evals,
172 |         'instability_detected': instability_detected,
173 |     }
174 | 
175 |     return q, p, logp, grad, info
176 | 
177 | 
178 | def update_running_minmax(running_min, running_max, curr_val):
179 |     running_min = min(running_min, curr_val)
180 |     running_max = max(running_max, curr_val)
181 |     return running_min, running_max
182 | 


--------------------------------------------------------------------------------
/bayesbridge/random/polya_gamma/test_polyagamma.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import matplotlib.pyplot as plt\n",
 10 |     "import numpy as np\n",
 11 |     "\n",
 12 |     "from polya_gamma import PolyaGammaDist"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "import rpy2.robjects as robjects\n",
 22 |     "import rpy2.robjects.packages as rpackages\n",
 23 |     "import rpy2.robjects.numpy2ri\n",
 24 |     "\n",
 25 |     "try:\n",
 26 |     "    bayeslogit = rpackages.importr('BayesLogit')\n",
 27 |     "except:\n",
 28 |     "    utils = rpackages.importr('utils')\n",
 29 |     "    utils.install_packages('BayesLogit')\n",
 30 |     "    bayeslogit = rpackages.importr('BayesLogit')"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "## Compare Python output against that of R package"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "def comparison_hist(samples_1, samples_2, use_log_scale=False):\n",
 47 |     "    if use_log_scale:\n",
 48 |     "        samples_1 = np.log(samples_1)\n",
 49 |     "        samples_2 = np.log(samples_2)\n",
 50 |     "    x_max = max(samples_1.max(), samples_2.max())\n",
 51 |     "    x_min = min(samples_1.min(), samples_2.min())\n",
 52 |     "    bins = np.linspace(x_min, x_max, 51)\n",
 53 |     "    \n",
 54 |     "    plt.hist(samples_1, alpha=.5, bins=bins, density=True)\n",
 55 |     "    plt.hist(samples_2, alpha=.5, bins=bins, density=True)\n",
 56 |     "    for side in ['left', 'top', 'right']:\n",
 57 |     "        plt.gca().spines[side].set_visible(False)\n",
 58 |     "    plt.yticks([])"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "n_samples = 10 ** 6\n",
 68 |     "shape = [1., 2.] # BayesLogit apparently requires double\n",
 69 |     "tilt = [.01, 100.]\n",
 70 |     "    # Sqrt of twice the negative tilting parameter, actually\n",
 71 |     "    \n",
 72 |     "pg = PolyaGammaDist()"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "plt.figure(figsize=(14, 4.5))\n",
 82 |     "plt.rcParams['font.size'] = 20\n",
 83 |     "\n",
 84 |     "for i in range(2):\n",
 85 |     "    \n",
 86 |     "    python_samples = pg.rand_polyagamma(\n",
 87 |     "        np.tile(shape[i], n_samples).astype(np.int), \n",
 88 |     "        np.tile(tilt[i], n_samples)\n",
 89 |     "    )\n",
 90 |     "    \n",
 91 |     "    # Sample via R package.\n",
 92 |     "    try:\n",
 93 |     "        rpy2.robjects.numpy2ri.activate()\n",
 94 |     "        r_samples = np.array(\n",
 95 |     "            bayeslogit.rpg(n_samples, shape[i], tilt[i])\n",
 96 |     "        )\n",
 97 |     "    except:\n",
 98 |     "        # In case 'numpy2ri.activate()' fails\n",
 99 |     "        r_samples = np.array([\n",
100 |     "            bayeslogit.rpg(1, shape[i], tilt[i])[0]\n",
101 |     "            for i in range(n_samples)\n",
102 |     "        ])\n",
103 |     "    \n",
104 |     "    plt.subplot(1, 2, i + 1)\n",
105 |     "    comparison_hist(r_samples, python_samples, use_log_scale=True)\n",
106 |     "    \n",
107 |     "    plt.xlabel('log(tilted stable)')\n",
108 |     "    if i == 0:\n",
109 |     "        plt.legend(['from R', 'from Python'], loc=[.65, .7], frameon=False)\n",
110 |     "\n",
111 |     "plt.show()"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "## Compare against another R package"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "import rpy2.robjects as robjects\n",
128 |     "import rpy2.robjects.packages as rpackages\n",
129 |     "import rpy2.robjects.numpy2ri\n",
130 |     "\n",
131 |     "try:\n",
132 |     "    pgdraw = rpackages.importr('pgdraw')\n",
133 |     "except:\n",
134 |     "    utils = rpackages.importr('utils')\n",
135 |     "    utils.install_packages('pgdraw')\n",
136 |     "    pgdraw = rpackages.importr('pgdraw')"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "plt.figure(figsize=(14, 4.5))\n",
146 |     "plt.rcParams['font.size'] = 20\n",
147 |     "\n",
148 |     "for i in range(2):\n",
149 |     "    \n",
150 |     "    python_samples = pg.rand_polyagamma(\n",
151 |     "        np.tile(shape[i], n_samples).astype(np.int), \n",
152 |     "        np.tile(tilt[i], n_samples)\n",
153 |     "    )\n",
154 |     "    \n",
155 |     "    # Sample via R package.\n",
156 |     "    try:\n",
157 |     "        rpy2.robjects.numpy2ri.activate()\n",
158 |     "        r_samples = np.array(\n",
159 |     "            pgdraw.rcpp_pgdraw(shape[i], tilt[i] * np.ones(n_samples))\n",
160 |     "        )\n",
161 |     "    except:\n",
162 |     "        r_samples = np.array([\n",
163 |     "            pgdraw.rcpp_pgdraw(shape[i], tilt[i])[0]\n",
164 |     "            for i in range(n_samples)\n",
165 |     "        ])\n",
166 |     "    \n",
167 |     "    plt.subplot(1, 2, i + 1)\n",
168 |     "    comparison_hist(r_samples, python_samples, use_log_scale=True)\n",
169 |     "    \n",
170 |     "    plt.xlabel('log(tilted stable)')\n",
171 |     "    if i == 0:\n",
172 |     "        plt.legend(['from R', 'from Python'], loc=[.65, .7], frameon=False)\n",
173 |     "\n",
174 |     "plt.show()"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "markdown",
179 |    "metadata": {},
180 |    "source": [
181 |     "## Make sure general and specialized method return same outputs"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": null,
187 |    "metadata": {},
188 |    "outputs": [],
189 |    "source": [
190 |     "n_samples = 10 ** 2\n",
191 |     "tilt = .1\n",
192 |     "shape = 1"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "metadata": {},
199 |    "outputs": [],
200 |    "source": [
201 |     "seed = 0\n",
202 |     "\n",
203 |     "pg = PolyaGammaDist(seed)\n",
204 |     "general_samples = pg.rand_polyagamma(\n",
205 |     "    np.ones(n_samples, dtype=np.int), \n",
206 |     "    tilt * np.ones(n_samples)\n",
207 |     ")\n",
208 |     "\n",
209 |     "pg.set_seed(seed)\n",
210 |     "simplified_samples = pg.rand_unit_shape_polyagamma(\n",
211 |     "    tilt * np.ones(n_samples)\n",
212 |     ")\n",
213 |     "\n",
214 |     "assert np.all(general_samples == simplified_samples)"
215 |    ]
216 |   }
217 |  ],
218 |  "metadata": {
219 |   "kernelspec": {
220 |    "display_name": "Python 3",
221 |    "language": "python",
222 |    "name": "python3"
223 |   },
224 |   "language_info": {
225 |    "codemirror_mode": {
226 |     "name": "ipython",
227 |     "version": 3
228 |    },
229 |    "file_extension": ".py",
230 |    "mimetype": "text/x-python",
231 |    "name": "python",
232 |    "nbconvert_exporter": "python",
233 |    "pygments_lexer": "ipython3",
234 |    "version": "3.6.10"
235 |   }
236 |  },
237 |  "nbformat": 4,
238 |  "nbformat_minor": 4
239 | }
240 | 


--------------------------------------------------------------------------------
/bayesbridge/design_matrix/sparse_matrix.py:
--------------------------------------------------------------------------------
  1 | from warnings import warn
  2 | 
  3 | import numpy as np
  4 | import scipy.sparse as sparse
  5 | 
  6 | from .abstract_matrix import AbstractDesignMatrix
  7 | 
  8 | try:
  9 |     from .mkl_matvec import mkl_csr_matvec
 10 | except:
 11 |     mkl_csr_matvec = None
 12 | try:
 13 |     import cupy as cp
 14 | except (ImportError, ModuleNotFoundError) as e:
 15 |     cp = None
 16 |     cupy_exception = e
 17 | 
 18 | 
 19 | class SparseDesignMatrix(AbstractDesignMatrix):
 20 | 
 21 |     def __init__(self, X, use_mkl=True, center_predictor=False, add_intercept=True,
 22 |                  copy_array=False, dot_format='csr', Tdot_format='csr'):
 23 |         """
 24 |         Params:
 25 |         ------
 26 |         X : scipy sparse matrix
 27 |         """
 28 |         if copy_array:
 29 |             X = X.copy()
 30 |         super().__init__()
 31 |         if dot_format == 'csc' or Tdot_format == 'csc':
 32 |             raise NotImplementedError(
 33 |                 "Current dot operations are only implemented for the CSR format."
 34 |             )
 35 |         self.use_cupy = self.is_cupy_sparse(X)
 36 |         if use_mkl and (mkl_csr_matvec is None) and (not self.use_cupy):
 37 |             warn("Could not load MKL Library. Will use Scipy's 'dot'.")
 38 |             use_mkl = False
 39 |         self.centered = center_predictor
 40 |         self.intercept_added = add_intercept
 41 |         self.use_mkl = (not self.use_cupy) and use_mkl
 42 |         X = self.remove_intercept_indicator(X)
 43 |         squeeze, array, zeros = (cp.squeeze, cp.array, cp.zeros) if self.use_cupy \
 44 |             else (np.squeeze, np.array, np.zeros)
 45 |         if center_predictor:
 46 |             self.column_offset = squeeze(array(X.mean(axis=0)))
 47 |         else:
 48 |             self.column_offset = zeros(X.shape[1])
 49 |         self.X_main = cp.sparse.csr_matrix(X) if self.use_cupy else X.tocsr()
 50 | 
 51 |     @property
 52 |     def shape(self):
 53 |         shape = self.X_main.shape
 54 |         return shape[0], shape[1] + int(self.intercept_added)
 55 | 
 56 |     @property
 57 |     def is_sparse(self):
 58 |         return True
 59 | 
 60 |     @property
 61 |     def nnz(self):
 62 |         """ Currently exists only to estimate the cost of matrix-matrix and
 63 |         matrix-vector operations. Does not correspond to the actual nnz of the
 64 |         represented design matrix.
 65 |         """
 66 |         return self.X_main.nnz
 67 | 
 68 |     def dot(self, v):
 69 |         if self.memoized:
 70 |             if np.all(self.v_prev == v):
 71 |                 return self.X_dot_v
 72 |             self.v_prev = v.copy()
 73 | 
 74 |         input_is_cupy = (cp is not None) and isinstance(v, cp.ndarray)
 75 |         if self.use_cupy and not input_is_cupy:
 76 |             v = cp.asarray(v)
 77 |         intercept_effect = 0.
 78 |         if self.intercept_added:
 79 |             intercept_effect += v[0]
 80 |             v = v[1:]
 81 |         result = intercept_effect + self.main_dot(v)
 82 |         if self.use_cupy and not input_is_cupy:
 83 |             result = cp.asnumpy(result)
 84 |         if self.memoized:
 85 |             self.X_dot_v = result
 86 |         self.dot_count += 1
 87 | 
 88 |         return result
 89 | 
 90 |     def main_dot(self, v):
 91 |         """ Multiply by the main effect part of the design matrix. """
 92 |         X = self.X_main
 93 |         if self.use_mkl:
 94 |             result = mkl_csr_matvec(X, v)
 95 |         else:
 96 |             result = X.dot(v)
 97 |         inner = cp.inner if self.use_cupy else np.inner
 98 |         result -= inner(self.column_offset, v)
 99 |         if self.memoized:
100 |             self.X_dot_v = result
101 |         return result
102 | 
103 |     def Tdot(self, v):
104 |         input_is_cupy = (cp is not None) and isinstance(v, cp.ndarray)
105 |         if self.use_cupy and not input_is_cupy:
106 |             v = cp.asarray(v)
107 | 
108 |         result = self.main_Tdot(v)
109 | 
110 |         if self.intercept_added:
111 |             if self.use_cupy:
112 |                 result = cp.concatenate((cp.asarray([cp.sum(v)]), result))
113 |             else:
114 |                 result = np.concatenate(([np.sum(v)], result))
115 |         self.Tdot_count += 1
116 | 
117 |         if self.use_cupy and not input_is_cupy:
118 |             result = cp.asnumpy(result)
119 |         return result
120 | 
121 |     def main_Tdot(self, v):
122 |         X = self.X_main
123 |         if self.use_mkl:
124 |             result = mkl_csr_matvec(X, v, transpose=True)
125 |         else:
126 |             result = X.T.dot(v)
127 |         sum = cp.sum if self.use_cupy else np.sum
128 |         result -= sum(v) * self.column_offset
129 |         return result
130 | 
131 |     def compute_fisher_info(self, weight, diag_only=False):
132 |         """ Compute $X^T W X$ where W is the diagonal matrix of a given weight."""
133 | 
134 |         if diag_only:
135 |             return self.compute_fisher_diag(weight)
136 | 
137 |         weight_mat = self.create_diag_matrix(weight)
138 |         X = self.X_main
139 |         X_T = X.T
140 |         weighted_X = weight_mat.dot(X).tocsc()
141 | 
142 |         n_pred = self.shape[1]
143 |         fisher_info = np.zeros((n_pred, n_pred))
144 |         if self.intercept_added:
145 |             fisher_info[0, 0] = np.sum(weight)
146 |             fisher_info[0, 1:] \
147 |                 = weighted_X.sum(0) - np.sum(weight) * self.column_offset
148 |             fisher_info[1:, 0] = fisher_info[0, 1:]
149 |             fisher_info_wo_intercept = fisher_info[1:, 1:]
150 |         else:
151 |             fisher_info_wo_intercept = fisher_info
152 | 
153 |         fisher_info_wo_intercept += X_T.dot(weighted_X).toarray()
154 |         if self.centered:
155 |             outer_prod_term = np.outer(
156 |                 self.column_offset, weighted_X.sum(0)
157 |             )
158 |             fisher_info_wo_intercept -= outer_prod_term + outer_prod_term.T
159 |             fisher_info_wo_intercept \
160 |                 += np.sum(weight) * np.outer(self.column_offset, self.column_offset)
161 | 
162 |         return fisher_info
163 | 
164 |     def compute_fisher_diag(self, weight):
165 | 
166 |         weight_mat = self.create_diag_matrix(weight)
167 |         diag = weight_mat.dot(self.X_main.power(2)).sum(0)
168 |         if self.centered:
169 |             weighted_X = weight_mat.dot(self.X_main).tocsc()
170 |             diag -= 2 * self.column_offset \
171 |                     * np.squeeze(np.asarray(weighted_X.sum(0)))
172 |             diag += np.sum(weight) * self.column_offset ** 2
173 |         diag = np.squeeze(np.asarray(diag))
174 |         if self.intercept_added:
175 |             diag = np.concatenate(([np.sum(weight)], diag))
176 | 
177 |         return diag
178 | 
179 |     def create_diag_matrix(self, v):
180 |         return sparse.dia_matrix((v, 0), (len(v), len(v)))
181 | 
182 |     def compute_transposed_fisher_info(self, weight, include_intrcpt=False):
183 |         X = self.X_main
184 |         weight_ex_intrcpt = weight[1:] if include_intrcpt else weight
185 |         weight_mat = self.create_diag_matrix(weight_ex_intrcpt)
186 |         weighted_X_T = weight_mat.dot(X.T).tocsc()
187 |         transposed_fisher_info = X.dot(weighted_X_T).toarray()
188 |         offset_weight_X = self.column_offset @ weighted_X_T
189 |         if self.centered:
190 |             transposed_fisher_info -= offset_weight_X[np.newaxis, :]
191 |             transposed_fisher_info -= offset_weight_X[:, np.newaxis]
192 |             transposed_fisher_info \
193 |                 += np.sum(weight_ex_intrcpt * self.column_offset ** 2)
194 |         if include_intrcpt:
195 |             transposed_fisher_info += weight[0]
196 |         return transposed_fisher_info
197 | 
198 |     def toarray(self):
199 |         X = self.X_main.toarray() - self.column_offset[np.newaxis, :]
200 |         if self.intercept_added:
201 |             X = np.hstack((np.ones(X.shape[0]), X))
202 |         return X
203 | 
204 |     def extract_matrix(self, order=None):
205 |         pass
206 | 


--------------------------------------------------------------------------------
/bayesbridge/random/polya_gamma/polya_gamma.pyx:
--------------------------------------------------------------------------------
  1 | # cython: cdivision = True
  2 | from libc.math cimport exp, log, sqrt, fabs, M_PI
  3 | import random
  4 | import cython
  5 | import numpy as np
  6 | cimport numpy as np
  7 | from numpy.random import PCG64
  8 | from numpy.random.bit_generator cimport BitGenerator
  9 | from .scipy_ndtr cimport log_ndtr as normal_logcdf
 10 | from bayesbridge.random.normal.normal cimport random_normal
 11 | from bayesbridge.random.uniform.uniform cimport random_uniform
 12 | 
 13 | 
 14 | 
 15 | cdef class PolyaGammaDist():
 16 |     # Threshold below (and above) which the target density is bounded by inverse
 17 |     # Gaussian (and exponential) and have different analytical series expressions.
 18 |     cdef double THRESHOLD
 19 |     # Number of terms in the infinite alternating series beyond which to truncate.
 20 |     cdef int MAX_SERIES_TERMS
 21 |     cdef BitGenerator bitgen
 22 | 
 23 |     def __init__(self, seed=None):
 24 |         self.set_seed(seed)
 25 |         self.THRESHOLD = 2.0 / M_PI
 26 |         self.MAX_SERIES_TERMS = 100
 27 |         self.bitgen = PCG64(seed)
 28 | 
 29 |     def set_seed(self, seed):
 30 |         self.bitgen = PCG64(seed)
 31 | 
 32 |     def get_state(self):
 33 |         return self.bitgen.state
 34 | 
 35 |     def set_state(self, state):
 36 |         self.bitgen.state = state
 37 | 
 38 |     @cython.boundscheck(False)
 39 |     @cython.wraparound(False)
 40 |     def rand_polyagamma(self, shape, tilt):
 41 |         """
 42 |         Sample from exponentially tilted Polya-Gamma distribution
 43 |             p(x | shape, tilt) \propto \exp(- tilt^2 / 2 * x) p(x | shape, tilt=0)
 44 |         via Devroye's alternatig series method.
 45 | 
 46 |         Parameters
 47 |         ----------
 48 |         shape : numpy array of integers
 49 |             Distribution is defined for non-integer values but the implemented
 50 |             algorithm only support integer values.
 51 |         tilt : numpy array of doubles
 52 |             Sqrt of twice the negative tilting parameter, actually.
 53 |         """
 54 | 
 55 |         if not isinstance(shape, np.ndarray) and isinstance(tilt, np.ndarray):
 56 |             raise TypeError('Input must be numpy arrays.')
 57 |         if not shape.size == tilt.size:
 58 |             raise ValueError('Input arrays must be of the same length.')
 59 |         if not np.issubdtype(shape.dtype, np.integer):
 60 |             raise ValueError('Shape parameter must be integers.')
 61 |         shape = shape.astype(np.intc)
 62 |         tilt = tilt.astype(np.double)
 63 |         result = np.zeros(shape.size, dtype=np.double)
 64 | 
 65 |         cdef int[:] shape_view = shape
 66 |         cdef double[:] tilt_view = tilt
 67 |         cdef double[:] result_view = result
 68 |         cdef long n_samples = shape_view.size
 69 |         cdef Py_ssize_t index, j
 70 |         for index in range(n_samples):
 71 |             for j in range(shape_view[index]):
 72 |                 result_view[index] \
 73 |                     += self.rand_scalar_unit_shape_polyagamma(tilt_view[index])
 74 |         return result
 75 | 
 76 |     @cython.boundscheck(False)
 77 |     @cython.wraparound(False)
 78 |     def rand_unit_shape_polyagamma(self, tilt):
 79 | 
 80 |         if not isinstance(tilt, np.ndarray):
 81 |             raise TypeError('Input must be numpy arrays.')
 82 |         tilt = tilt.astype(np.double)
 83 |         result = np.empty(tilt.size, dtype=np.double)
 84 | 
 85 |         cdef double[:] tilt_view = tilt
 86 |         cdef double[:] result_view = result
 87 |         cdef long n_samples = tilt_view.size
 88 |         cdef Py_ssize_t index
 89 |         for index in range(n_samples):
 90 |             result_view[index] \
 91 |                 = self.rand_scalar_unit_shape_polyagamma(tilt_view[index])
 92 |         return result
 93 | 
 94 |     cdef double rand_scalar_unit_shape_polyagamma(self, double tilt):
 95 |         return .25 * self.rand_tilted_jocobi(.5 * fabs(tilt))
 96 | 
 97 |     cdef double rand_tilted_jocobi(self, double tilt):
 98 |         """
 99 |         Sample from tilted Jacobi distribution
100 |             p(x | tilt) \propto \exp(- tilt^2 / 2 * x) p(x | 0)
101 |         via Devroye's alternatig series method.
102 |         """
103 |         cdef double X, U, proposal_density
104 |         cdef bint accepted = False
105 | 
106 |         # Main sampling loop page 130 of the Windle PhD thesis
107 |         while not accepted:
108 |             X, proposal_density = self.rand_proposal(tilt)
109 |             U = random_uniform(self.bitgen) * proposal_density
110 |             accepted = self.decide_acceptability(U, X, proposal_density)
111 | 
112 |         return X
113 | 
114 |     cdef (double, double) rand_proposal(self, double tilt):
115 |         # Many quantities here can be cached and reused in case of rejection, but
116 |         # the acceptance rate is so high that it does not matter.
117 |         cdef double exp_rate = .5 * tilt ** 2 + .125 * M_PI ** 2
118 |         cdef double prob_to_right = self.calc_prob_to_right(tilt, exp_rate)
119 |         if random_uniform(self.bitgen) < prob_to_right:
120 |             X = self.rand_left_truncated_exp(1. / exp_rate, self.THRESHOLD)
121 |         else:
122 |             X = self.rand_right_truncated_unit_shape_invgauss(tilt, self.THRESHOLD)
123 |         proposal_density = self.calc_next_term_in_series(0, X)
124 |         return X, proposal_density
125 | 
126 |     cdef double calc_prob_to_right(self, double tilt, double exp_rate):
127 |         cdef double log_mass_expo \
128 |             = - log(exp_rate) - exp_rate * self.THRESHOLD + log(.25 * M_PI)
129 |         cdef double log_mass_invg_1 \
130 |             = - tilt + normal_logcdf(
131 |                 (self.THRESHOLD * tilt - 1.) / sqrt(self.THRESHOLD))
132 |         cdef double log_mass_invg_2 \
133 |             = tilt + normal_logcdf(
134 |                 - (self.THRESHOLD * tilt + 1.) / sqrt(self.THRESHOLD))
135 |         cdef double mass_ratio = (
136 |             exp(log_mass_invg_1 - log_mass_expo)
137 |             + exp(log_mass_invg_2 - log_mass_expo)
138 |         )
139 |         return 1.0 / (1.0 + mass_ratio)
140 | 
141 |     # Equations (12) and (13) of Polson, Scott, and Windle (2013)
142 |     cdef double calc_next_term_in_series(self, int n, double x):
143 |         cdef double log_result = log(M_PI * (n + 0.5))
144 |         if x <= self.THRESHOLD:
145 |             log_result += - 1.5 * log(.5 * x * M_PI) - 2 * (n + 0.5) ** 2 / x
146 |         else:
147 |             log_result += - 0.5 * x * M_PI ** 2 * (n + 0.5) ** 2
148 |         return exp(log_result)
149 | 
150 |     cdef bint decide_acceptability(self, double U, double X, double zeroth_term):
151 | 
152 |         cdef double partial_sum = zeroth_term
153 |         cdef int n_summed = 1
154 |         cdef int sign = -1 # Sign of the next term in the alternating sequence
155 |         cdef bint acceted
156 |         cdef bint is_determinate = False
157 | 
158 |         while not is_determinate:
159 |             partial_sum += sign * self.calc_next_term_in_series(n_summed, X)
160 |             n_summed += 1
161 |             if sign == -1:
162 |                 if U <= partial_sum:
163 |                     accepted = True
164 |                     is_determinate = True
165 |             else: # sign == 1
166 |                 if U > partial_sum:
167 |                     accepted = False
168 |                     is_determinate = True
169 |                 elif n_summed >= self.MAX_SERIES_TERMS:
170 |                     acceted = True # Take the partial sum lower-bound as the target
171 |                     is_determinate = True
172 |             sign = - sign
173 | 
174 |         return accepted
175 | 
176 |     cdef double rand_left_truncated_exp(self, double scale, double trunc):
177 |         return trunc - scale * log(1.0 - random_uniform(self.bitgen))
178 | 
179 |     # Ref: "Simulation of truncated gamma variables" by Younshik Chung
180 |     # Korean Journal of Computational & Applied Mathematics, 1998
181 |     cdef double rand_left_truncated_chisq(self, double trunc):
182 |         cdef double X, density_ratio
183 |         cdef bint accepted = False
184 |         while not accepted:
185 |             X = self.rand_left_truncated_exp(2., trunc)
186 |             density_ratio = sqrt(0.5 * M_PI / X)
187 |             accepted = (random_uniform(self.bitgen) <= density_ratio)
188 |         return X
189 | 
190 | 
191 |     cdef double rand_right_truncated_unit_shape_invgauss(self, double rate, double trunc):
192 |         # Shape parameter is assumed to be one.
193 |         cdef double X
194 |         cdef double mean = 1. / rate
195 |         cdef bint accepted = False
196 | 
197 |         # Choose a better sampler depending on the input parameters
198 |         if mean > trunc:
199 |             # Algorithm 3 in Windle's PhD thesis, page 128
200 |             while not accepted:
201 |                 X = 1.0 / self.rand_left_truncated_chisq(.5 * M_PI)
202 |                 accepted = (log(random_uniform(self.bitgen)) < - 0.5 * X * rate ** 2)
203 |         else:
204 |             while not accepted:
205 |                 X = self.rand_unit_shape_invgauss(mean)
206 |                 accepted = (X < trunc)
207 |         return X
208 | 
209 |     cdef double rand_unit_shape_invgauss(self, double mean):
210 |         cdef double V = random_normal(self.bitgen) ** 2
211 |         cdef double X = mean + 0.5 * mean * (
212 |             mean * V - sqrt(4.0 * mean * V + mean ** 2 * V ** 2)
213 |         )
214 |         if random_uniform(self.bitgen) > mean / (mean + X):
215 |             X = mean ** 2 / X
216 |         return X
217 | 
218 | 


--------------------------------------------------------------------------------
/bayesbridge/gibbs_util.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import time
  3 | from warnings import warn
  4 | import numpy as np
  5 | 
  6 | 
  7 | class SamplerOptions():
  8 | 
  9 |     def __init__(self, coef_sampler_type,
 10 |                  global_scale_update='sample',
 11 |                  hmc_curvature_est_stabilized=False):
 12 |         """
 13 |         Parameters
 14 |         ----------
 15 |         coef_sampler_type : {'cholesky', 'cg', 'hmc'}
 16 |         global_scale_update : str, {'sample', 'optimize', None}
 17 |         hmc_curvature_est_stabilized : bool
 18 |         """
 19 |         if coef_sampler_type not in ('cholesky', 'cg', 'hmc'):
 20 |             raise ValueError("Unsupported regression coefficient sampler.")
 21 |         self.coef_sampler_type = coef_sampler_type
 22 |         self.gscale_update = global_scale_update
 23 |         self.curvature_est_stabilized = hmc_curvature_est_stabilized
 24 | 
 25 |     def get_info(self):
 26 |         return {
 27 |             'coef_sampler_type': self.coef_sampler_type,
 28 |             'global_scale_update': self.gscale_update,
 29 |             'hmc_curvature_est_stabilized': self.curvature_est_stabilized
 30 |         }
 31 | 
 32 |     @staticmethod
 33 |     def pick_default_and_create(coef_sampler_type, options, model_name, design):
 34 |         """ Initialize class with, if unspecified, an appropriate default
 35 |         sampling method based on the type and size of model.
 36 |         """
 37 |         if options is None:
 38 |             options = {}
 39 | 
 40 |         if 'coef_sampler_type' in options:
 41 |             if coef_sampler_type is not None:
 42 |                 warn("Duplicate specification of method for sampling "
 43 |                      "regression coefficient. Will use the dictionary one.")
 44 |             coef_sampler_type = options['coef_sampler_type']
 45 | 
 46 |         if coef_sampler_type not in (None, 'cholesky', 'cg', 'hmc'):
 47 |             raise ValueError("Unsupported sampler type.")
 48 | 
 49 |         if coef_sampler_type not in (None, 'cg') and design.use_cupy:
 50 |             raise ValueError("Only 'cg' sampler supported with cupy matrices.")
 51 | 
 52 |         if model_name in ('linear', 'logit'):
 53 | 
 54 |             n_obs, n_pred = design.shape
 55 |             if not design.is_sparse:
 56 |                 preferred_method = 'cholesky'
 57 |             elif design.use_cupy:
 58 |                 preferred_method = 'cg'
 59 |             else:
 60 |                 # TODO: Make more informed choice between Cholesky and CG.
 61 |                 frac = design.nnz / (n_obs * n_pred)
 62 |                 fisher_info_cost = frac ** 2 * n_obs * n_pred ** 2
 63 |                 cg_cost = design.nnz * 100.
 64 |                 preferred_method = 'cg' if cg_cost < fisher_info_cost \
 65 |                     else 'cholesky'
 66 | 
 67 |             # TODO: Implement Woodbury-based Gaussian sampler.
 68 |             if n_pred > n_obs:
 69 |                 warn("Sampler has not been optimized for 'small n' problem.")
 70 | 
 71 |             if coef_sampler_type is None:
 72 |                 coef_sampler_type = preferred_method
 73 |             elif coef_sampler_type not in ('hmc', preferred_method):
 74 |                 warn("Specified sampler may not be optimal. Worth experimenting "
 75 |                      "with the '{:s}' option.".format(preferred_method))
 76 | 
 77 |         else:
 78 |             if coef_sampler_type != 'hmc':
 79 |                 warn("Specified sampler type is not supported for the {:s} "
 80 |                      "model. Will use HMC instead.".format(model_name))
 81 |             coef_sampler_type = 'hmc'
 82 | 
 83 |         options['coef_sampler_type'] = coef_sampler_type
 84 |         return SamplerOptions(**options)
 85 | 
 86 | 
 87 | class MarkovChainManager():
 88 | 
 89 |     def __init__(self, n_obs, n_pred, n_unshrunk, model_name):
 90 |         self.n_obs = n_obs
 91 |         self.n_pred = n_pred
 92 |         self.n_unshrunk = n_unshrunk
 93 |         self.model_name = model_name
 94 |         self._prev_timestamp = None # For status update during Gibbs
 95 |         self._curr_timestamp = None
 96 | 
 97 |     def merge_outputs(self, prev_samples, prev_mcmc_info, new_samples, new_mcmc_info):
 98 | 
 99 |         new_samples = {
100 |             key: np.concatenate(
101 |                 (prev_samples[key], new_samples[key]), axis=-1
102 |             ) for key in new_samples.keys()
103 |         }
104 | 
105 |         for output_key in ['_reg_coef_sampling_info']:
106 |             prev_output = prev_mcmc_info[output_key]
107 |             next_output = new_mcmc_info[output_key]
108 |             new_mcmc_info[output_key] = {
109 |                 key : np.concatenate(
110 |                     (prev_output[key], next_output[key]), axis=-1
111 |                 ) for key in prev_output.keys()
112 |             }
113 | 
114 |         new_mcmc_info['n_iter'] += prev_mcmc_info['n_iter']
115 |         new_mcmc_info['runtime'] += prev_mcmc_info['runtime']
116 | 
117 |         for output_key in ['_init_optim_info', 'seed']:
118 |             new_mcmc_info[output_key] = prev_mcmc_info[output_key]
119 | 
120 |         return new_samples, new_mcmc_info
121 | 
122 |     def pre_allocate(self, samples, sampling_info, n_post_burnin, thin, params_to_save, sampling_method):
123 | 
124 |         n_sample = math.floor(n_post_burnin / thin)  # Number of samples to keep
125 | 
126 |         if 'coef' in params_to_save:
127 |             samples['coef'] = np.zeros((self.n_pred, n_sample))
128 | 
129 |         if 'local_scale' in params_to_save:
130 |             samples['local_scale'] = np.zeros((self.n_pred - self.n_unshrunk, n_sample))
131 | 
132 |         if 'global_scale' in params_to_save:
133 |             samples['global_scale'] = np.zeros(n_sample)
134 | 
135 |         if 'obs_prec' in params_to_save:
136 |             if self.model_name == 'linear':
137 |                 samples['obs_prec'] = np.zeros(n_sample)
138 |             elif self.model_name == 'logit':
139 |                 samples['obs_prec'] = np.zeros((self.n_obs, n_sample))
140 | 
141 |         if 'logp' in params_to_save:
142 |             samples['logp'] = np.zeros(n_sample)
143 | 
144 |         for key in self.get_sampling_info_keys(sampling_method):
145 |             sampling_info[key] = np.zeros(n_sample)
146 | 
147 |     def get_sampling_info_keys(self, sampling_method):
148 |         if sampling_method == 'cg':
149 |             keys = ['n_cg_iter']
150 |         elif sampling_method in ['hmc', 'nuts']:
151 |             keys = [
152 |                 'stepsize', 'n_hessian_matvec', 'n_grad_evals',
153 |                 'stability_limit_est', 'stability_adjustment_factor',
154 |                 'instability_detected'
155 |             ]
156 |             if sampling_method == 'hmc':
157 |                 keys += ['n_integrator_step', 'accepted', 'accept_prob']
158 |             else:
159 |                 keys += ['tree_height', 'ave_accept_prob']
160 |         else:
161 |             keys = []
162 |         return keys
163 | 
164 |     def store_current_state(
165 |             self, samples, mcmc_iter, n_burnin, thin, coef, lscale,
166 |             gscale, obs_prec, logp, params_to_save):
167 | 
168 |         if mcmc_iter <= n_burnin or (mcmc_iter - n_burnin) % thin != 0:
169 |             return
170 | 
171 |         index = math.floor((mcmc_iter - n_burnin) / thin) - 1
172 | 
173 |         if 'coef' in params_to_save:
174 |             samples['coef'][:, index] = coef
175 | 
176 |         if 'local_scale' in params_to_save:
177 |             samples['local_scale'][:, index] = lscale
178 | 
179 |         if 'global_scale' in params_to_save:
180 |             samples['global_scale'][index] = gscale
181 | 
182 |         if 'obs_prec' in params_to_save:
183 |             if self.model_name == 'linear':
184 |                 samples['obs_prec'][index] = obs_prec
185 |             elif self.model_name == 'logit':
186 |                 samples['obs_prec'][:, index] = obs_prec
187 | 
188 |         if 'logp' in params_to_save:
189 |             samples['logp'][index] = logp
190 | 
191 |     def store_sampling_info(
192 |             self, sampling_info, info, mcmc_iter, n_burnin, thin, sampling_method):
193 | 
194 |         if mcmc_iter <= n_burnin or (mcmc_iter - n_burnin) % thin != 0:
195 |             return
196 | 
197 |         index = math.floor((mcmc_iter - n_burnin) / thin) - 1
198 |         for key in self.get_sampling_info_keys(sampling_method):
199 |             sampling_info[key][index] = info[key]
200 | 
201 |     def pack_parameters(self, coef, obs_prec, lscale, gscale):
202 |         state = {
203 |             'coef': coef,
204 |             'local_scale': lscale,
205 |             'global_scale': gscale,
206 |         }
207 |         if self.model_name in ('linear', 'logit'):
208 |             state['obs_prec'] = obs_prec
209 |         return state
210 | 
211 |     def stamp_time(self, curr_time):
212 |         self._prev_timestamp = curr_time
213 | 
214 |     def print_status(self, n_status_update, mcmc_iter, n_iter,
215 |                      time_format='minute'):
216 | 
217 |         if n_status_update == 0:
218 |             return
219 |         n_iter_per_update = int(n_iter / n_status_update)
220 |         if mcmc_iter % n_iter_per_update != 0:
221 |             return
222 | 
223 |         self._curr_timestamp = time.time()
224 | 
225 |         time_elapsed = self._curr_timestamp - self._prev_timestamp
226 |         if time_format == 'second':
227 |             time_str = "{:.3g} seconds".format(time_elapsed)
228 |         elif time_format == 'minute':
229 |             time_str = "{:.3g} minutes".format(time_elapsed / 60)
230 |         else:
231 |             raise ValueError()
232 | 
233 |         msg = " ".join((
234 |             "{:d} Gibbs iterations complete:".format(mcmc_iter),
235 |             time_str, "has elasped since the last update."
236 |         ))
237 |         print(msg)
238 |         self._prev_timestamp = self._curr_timestamp


--------------------------------------------------------------------------------
/bayesbridge/random/polya_gamma/scipy_ndtr.c:
--------------------------------------------------------------------------------
  1 | /* Scipy port of a Cephes Library component, suitably modified to be stand-alone. */
  2 | 
  3 | /*
  4 |  *
  5 |  *     Normal distribution function
  6 |  *
  7 |  *
  8 |  *
  9 |  * SYNOPSIS:
 10 |  *
 11 |  * double x, y, ndtr();
 12 |  *
 13 |  * y = ndtr( x );
 14 |  *
 15 |  *
 16 |  *
 17 |  * DESCRIPTION:
 18 |  *
 19 |  * Returns the area under the Gaussian probability density
 20 |  * function, integrated from minus infinity to x:
 21 |  *
 22 |  *                            x
 23 |  *                             -
 24 |  *                   1        | |          2
 25 |  *    ndtr(x)  = ---------    |    exp( - t /2 ) dt
 26 |  *               sqrt(2pi)  | |
 27 |  *                           -
 28 |  *                          -inf.
 29 |  *
 30 |  *             =  ( 1 + erf(z) ) / 2
 31 |  *             =  erfc(z) / 2
 32 |  *
 33 |  * where z = x/sqrt(2). Computation is via the functions
 34 |  * erf and erfc.
 35 |  *
 36 |  *
 37 |  * ACCURACY:
 38 |  *
 39 |  *                      Relative error:
 40 |  * arithmetic   domain     # trials      peak         rms
 41 |  *    IEEE     -13,0        30000       3.4e-14     6.7e-15
 42 |  *
 43 |  *
 44 |  * ERROR MESSAGES:
 45 |  *
 46 |  *   message         condition         value returned
 47 |  * erfc underflow    x > 37.519379347       0.0
 48 |  *
 49 |  */
 50 | /*							erf.c
 51 |  *
 52 |  *	Error function
 53 |  *
 54 |  *
 55 |  *
 56 |  * SYNOPSIS:
 57 |  *
 58 |  * double x, y, erf();
 59 |  *
 60 |  * y = erf( x );
 61 |  *
 62 |  *
 63 |  *
 64 |  * DESCRIPTION:
 65 |  *
 66 |  * The integral is
 67 |  *
 68 |  *                           x
 69 |  *                            -
 70 |  *                 2         | |          2
 71 |  *   erf(x)  =  --------     |    exp( - t  ) dt.
 72 |  *              sqrt(pi)   | |
 73 |  *                          -
 74 |  *                           0
 75 |  *
 76 |  * For 0 <= |x| < 1, erf(x) = x * P4(x**2)/Q5(x**2); otherwise
 77 |  * erf(x) = 1 - erfc(x).
 78 |  *
 79 |  *
 80 |  *
 81 |  * ACCURACY:
 82 |  *
 83 |  *                      Relative error:
 84 |  * arithmetic   domain     # trials      peak         rms
 85 |  *    IEEE      0,1         30000       3.7e-16     1.0e-16
 86 |  *
 87 |  */
 88 | /*							erfc.c
 89 |  *
 90 |  *	Complementary error function
 91 |  *
 92 |  *
 93 |  *
 94 |  * SYNOPSIS:
 95 |  *
 96 |  * double x, y, erfc();
 97 |  *
 98 |  * y = erfc( x );
 99 |  *
100 |  *
101 |  *
102 |  * DESCRIPTION:
103 |  *
104 |  *
105 |  *  1 - erf(x) =
106 |  *
107 |  *                           inf.
108 |  *                             -
109 |  *                  2         | |          2
110 |  *   erfc(x)  =  --------     |    exp( - t  ) dt
111 |  *               sqrt(pi)   | |
112 |  *                           -
113 |  *                            x
114 |  *
115 |  *
116 |  * For small x, erfc(x) = 1 - erf(x); otherwise rational
117 |  * approximations are computed.
118 |  *
119 |  *
120 |  *
121 |  * ACCURACY:
122 |  *
123 |  *                      Relative error:
124 |  * arithmetic   domain     # trials      peak         rms
125 |  *    IEEE      0,26.6417   30000       5.7e-14     1.5e-14
126 |  */
127 | 
128 | 
129 | /*
130 |  * Cephes Math Library Release 2.2:  June, 1992
131 |  * Copyright 1984, 1987, 1988, 1992 by Stephen L. Moshier
132 |  * Direct inquiries to 30 Frost Street, Cambridge, MA 02140
133 |  */
134 | 
135 | #include <math.h>
136 | 
137 | static double P[] = {
138 |     2.46196981473530512524E-10,
139 |     5.64189564831068821977E-1,
140 |     7.46321056442269912687E0,
141 |     4.86371970985681366614E1,
142 |     1.96520832956077098242E2,
143 |     5.26445194995477358631E2,
144 |     9.34528527171957607540E2,
145 |     1.02755188689515710272E3,
146 |     5.57535335369399327526E2
147 | };
148 | 
149 | static double Q[] = {
150 |     /* 1.00000000000000000000E0, */
151 |     1.32281951154744992508E1,
152 |     8.67072140885989742329E1,
153 |     3.54937778887819891062E2,
154 |     9.75708501743205489753E2,
155 |     1.82390916687909736289E3,
156 |     2.24633760818710981792E3,
157 |     1.65666309194161350182E3,
158 |     5.57535340817727675546E2
159 | };
160 | 
161 | static double R[] = {
162 |     5.64189583547755073984E-1,
163 |     1.27536670759978104416E0,
164 |     5.01905042251180477414E0,
165 |     6.16021097993053585195E0,
166 |     7.40974269950448939160E0,
167 |     2.97886665372100240670E0
168 | };
169 | 
170 | static double S[] = {
171 |     /* 1.00000000000000000000E0, */
172 |     2.26052863220117276590E0,
173 |     9.39603524938001434673E0,
174 |     1.20489539808096656605E1,
175 |     1.70814450747565897222E1,
176 |     9.60896809063285878198E0,
177 |     3.36907645100081516050E0
178 | };
179 | 
180 | static double T[] = {
181 |     9.60497373987051638749E0,
182 |     9.00260197203842689217E1,
183 |     2.23200534594684319226E3,
184 |     7.00332514112805075473E3,
185 |     5.55923013010394962768E4
186 | };
187 | 
188 | static double U[] = {
189 |     /* 1.00000000000000000000E0, */
190 |     3.35617141647503099647E1,
191 |     5.21357949780152679795E2,
192 |     4.59432382970980127987E3,
193 |     2.26290000613890934246E4,
194 |     4.92673942608635921086E4
195 | };
196 | 
197 | #define UTHRESH 37.519379347
198 | #define DBL_EPSILON 2.2204460492503131e-16
199 | #define NPY_SQRT1_2 0.707106781186547524400844362104849039  /* 1/sqrt(2) */
200 | #define MAXLOG 7.09782712893383996843E2 /* log(2**1024) */
201 | #define M_PI 3.14159265358979323846264338327950288
202 | 
203 | 
204 | double ndtr(double a);
205 | double erf(double x);
206 | double polevl(double x, const double coef[], int N);
207 | double p1evl(double x, const double coef[], int N);
208 | 
209 | 
210 | double ndtr(double a)
211 | {
212 |     double x, y, z;
213 | 
214 |     // if (cephes_isnan(a)) {
215 | 	// sf_error("ndtr", SF_ERROR_DOMAIN, NULL);
216 | 	// return (NPY_NAN);
217 |     // }
218 | 
219 |     x = a * NPY_SQRT1_2;
220 |     z = fabs(x);
221 | 
222 |     if (z < NPY_SQRT1_2)
223 | 	y = 0.5 + 0.5 * erf(x);
224 | 
225 |     else {
226 | 	y = 0.5 * erfc(z);
227 | 
228 | 	if (x > 0)
229 | 	    y = 1.0 - y;
230 |     }
231 | 
232 |     return (y);
233 | }
234 | 
235 | 
236 | double erfc(double a)
237 | {
238 |     double p, q, x, y, z;
239 | 
240 |     // if (cephes_isnan(a)) {
241 |     //     sf_error("erfc", SF_ERROR_DOMAIN, NULL);
242 | 	// return (NPY_NAN);
243 |     // }
244 | 
245 |     if (a < 0.0)
246 | 	x = -a;
247 |     else
248 | 	x = a;
249 | 
250 |     if (x < 1.0)
251 | 	return (1.0 - erf(a));
252 | 
253 |     z = -a * a;
254 | 
255 |     if (z < -MAXLOG) {
256 |       under:
257 | 	// sf_error("erfc", SF_ERROR_UNDERFLOW, NULL);
258 | 	if (a < 0)
259 | 	    return (2.0);
260 | 	else
261 | 	    return (0.0);
262 |     }
263 | 
264 |     z = exp(z);
265 | 
266 |     if (x < 8.0) {
267 | 	p = polevl(x, P, 8);
268 | 	q = p1evl(x, Q, 8);
269 |     }
270 |     else {
271 | 	p = polevl(x, R, 5);
272 | 	q = p1evl(x, S, 6);
273 |     }
274 |     y = (z * p) / q;
275 | 
276 |     if (a < 0)
277 | 	y = 2.0 - y;
278 | 
279 |     if (y == 0.0)
280 | 	goto under;
281 | 
282 |     return (y);
283 | }
284 | 
285 | 
286 | double erf(double x)
287 | {
288 |     double y, z;
289 | 
290 |     // if (cephes_isnan(x)) {
291 | 	// sf_error("erf", SF_ERROR_DOMAIN, NULL);
292 | 	// return (NPY_NAN);
293 |     // }
294 | 
295 |     if (x < 0.0) {
296 |     // original implementation used -x instead of fabs(), but led to breaking behavior on some platforms.
297 |     // see https://github.com/aki-nishimura/bayes-bridge/pull/7 for more info.
298 | 	return -erf(fabs(x));
299 |     }
300 | 
301 |     if (fabs(x) > 1.0)
302 | 	return (1.0 - erfc(x));
303 |     z = x * x;
304 | 
305 |     y = x * polevl(z, T, 4) / p1evl(z, U, 5);
306 |     return (y);
307 | 
308 | }
309 | 
310 | 
311 | double polevl(double x, const double coef[], int N)
312 | {
313 |     double ans;
314 |     int i;
315 |     const double *p;
316 | 
317 |     p = coef;
318 |     ans = *p++;
319 |     i = N;
320 | 
321 |     do
322 | 	ans = ans * x + *p++;
323 |     while (--i);
324 | 
325 |     return (ans);
326 | }
327 | 
328 | /*                                                     p1evl() */
329 | /*                                          N
330 |  * Evaluate polynomial when coefficient of x  is 1.0.
331 |  * Otherwise same as polevl.
332 |  */
333 | 
334 | double p1evl(double x, const double coef[], int N)
335 | {
336 |     double ans;
337 |     const double *p;
338 |     int i;
339 | 
340 |     p = coef;
341 |     ans = x + *p++;
342 |     i = N - 1;
343 | 
344 |     do
345 | 	ans = ans * x + *p++;
346 |     while (--i);
347 | 
348 |     return (ans);
349 | }
350 | 
351 | /*
352 |  * double log_ndtr(double a)
353 |  *
354 |  * For a > -20, use the existing ndtr technique and take a log.
355 |  * for a <= -20, we use the Taylor series approximation of erf to compute
356 |  * the log CDF directly. The Taylor series consists of two parts which we will name "left"
357 |  * and "right" accordingly.  The right part involves a summation which we compute until the
358 |  * difference in terms falls below the machine-specific EPSILON.
359 |  *
360 |  * \Phi(z) &=&
361 |  *   \frac{e^{-z^2/2}}{-z\sqrt{2\pi}}  * [1 +  \sum_{n=1}^{N-1}  (-1)^n \frac{(2n-1)!!}{(z^2)^n}]
362 |  *   + O(z^{-2N+2})
363 |  *   = [\mbox{LHS}] * [\mbox{RHS}] + \mbox{error}.
364 |  *
365 |  */
366 | 
367 | double log_ndtr(double a)
368 | {
369 | 
370 |     double log_LHS,		/* we compute the left hand side of the approx (LHS) in one shot */
371 |      last_total = 0,		/* variable used to check for convergence */
372 | 	right_hand_side = 1,	/* includes first term from the RHS summation */
373 | 	numerator = 1,		/* numerator for RHS summand */
374 | 	denom_factor = 1,	/* use reciprocal for denominator to avoid division */
375 | 	denom_cons = 1.0 / (a * a);	/* the precomputed division we use to adjust the denominator */
376 |     long sign = 1, i = 0;
377 | 
378 |     if (a > 6) {
379 | 	return -ndtr(-a);     /* log(1+x) \approx x */
380 |     }
381 |     if (a > -20) {
382 | 	return log(ndtr(a));
383 |     }
384 |     log_LHS = -0.5 * a * a - log(-a) - 0.5 * log(2 * M_PI);
385 | 
386 |     while (fabs(last_total - right_hand_side) > DBL_EPSILON) {
387 | 	i += 1;
388 | 	last_total = right_hand_side;
389 | 	sign = -sign;
390 | 	denom_factor *= denom_cons;
391 | 	numerator *= 2 * i - 1;
392 | 	right_hand_side += sign * numerator * denom_factor;
393 | 
394 |     }
395 |     return log_LHS + log(right_hand_side);
396 | }
397 | 


--------------------------------------------------------------------------------
/bayesbridge/prior.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from warnings import warn
  3 | import numpy as np
  4 | import scipy as sp
  5 | from scipy.special import polygamma as scipy_polygamma
  6 | 
  7 | class RegressionCoefPrior():
  8 | 
  9 |     def __init__(
 10 |             self,
 11 |             bridge_exponent=.5,
 12 |             n_fixed_effect=0,
 13 |             sd_for_intercept=float('inf'),
 14 |             sd_for_fixed_effect=float('inf'),
 15 |             regularizing_slab_size=float('inf'),
 16 |             global_scale_prior_hyper_param=None,
 17 |             _global_scale_parametrization='coef_magnitude'
 18 |         ):
 19 |         """ Encapisulate prior information for BayesBridge.
 20 | 
 21 |         Parameters
 22 |         ----------
 23 |         bridge_exponent : float < 2
 24 |             Exponent of the bridge prior on regression coefficients. For example,
 25 |             the value of 2 (albeit unsupported) would correspond to Gaussian prior
 26 |             and of 1 double-exponential as in Bayesian Lasso.
 27 |         n_fixed_effect : int
 28 |             Number of predictors --- other than intercept and placed at the
 29 |             first columns of the design matrices --- whose coefficients are
 30 |             estimated with Gaussian priors of pre-specified standard
 31 |             deviation(s).
 32 |         sd_for_intercept : float
 33 |             Standard deviation of Gaussian prior on the intercept. `Inf`
 34 |             corresponds to an uninformative flat prior.
 35 |         sd_for_fixed_effect : float, numpy array
 36 |             Standard deviation(s) of Gaussian prior(s) on fixed effects.
 37 |             If an array, the length must be the same as `n_fixed_effect`.
 38 |             `Inf` corresponds to an uninformative flat prior.
 39 |         regularizing_slab_size : float
 40 |             Standard deviation of the Gaussian tail-regularizer on
 41 |             the bridge prior. Used to impose soft prior constraints on a
 42 |             range of regression coefficients in case the data provides limited
 43 |             information (e.g. when complete separation occurs). One may, for
 44 |             example, set the slab size by first choosing a value which
 45 |             regression coefficients are very unlikely to exceed in magnitude and
 46 |             then dividing the value by 1.96.
 47 |         global_scale_prior_hyper_param : dict, None
 48 |             Should contain pair of keys 'log10_mean' and 'log10_sd',
 49 |             specifying the prior mean and standard deviation of
 50 |             log10(global_scale). If None, the default reference prior for a
 51 |             scale parameter is used.
 52 | 
 53 |         Other Parameters
 54 |         ----------------
 55 |         _global_scale_parametrization: str, {'raw', 'coef_magnitude'}
 56 |             If 'coef_magnitude', scale the local and global scales so that the
 57 |             global scale parameter coincide with the prior expected
 58 |             magnitude of regression coefficients.
 59 |         """
 60 |         if not (np.isscalar(sd_for_fixed_effect)
 61 |                 or n_fixed_effect == len(sd_for_fixed_effect)):
 62 |             raise ValueError(
 63 |                 "Prior sd for fixed effects must be specified either by a "
 64 |                 "scalar or array of the same length as n_fixed_effect."
 65 |             )
 66 |         if bridge_exponent > 2:
 67 |             raise ValueError("Exponent larger than 2 is unsupported.")
 68 | 
 69 |         if np.isscalar(sd_for_fixed_effect):
 70 |             sd_for_fixed_effect = sd_for_fixed_effect * np.ones(n_fixed_effect)
 71 |         self.sd_for_intercept = sd_for_intercept
 72 |         self.sd_for_fixed = sd_for_fixed_effect
 73 |         self.slab_size = regularizing_slab_size
 74 |         self.n_fixed = n_fixed_effect
 75 |         self.bridge_exp = bridge_exponent
 76 |         self._gscale_paramet = _global_scale_parametrization
 77 |         if global_scale_prior_hyper_param is None:
 78 |             self.param = {
 79 |                 'gscale_neg_power': {'shape': 0., 'rate': 0.},
 80 |                     # Reference prior for a scale family.
 81 |                 'gscale': None
 82 |             }
 83 | 
 84 |         else:
 85 |             keys = global_scale_prior_hyper_param.keys()
 86 |             if not ({'log10_mean', 'log10_sd'} <= keys):
 87 |                 raise ValueError(
 88 |                     "Dictionary should contain keys 'log10_mean' and 'log10_sd.'"
 89 |                 )
 90 |             log10_mean = global_scale_prior_hyper_param['log10_mean']
 91 |             log10_sd = global_scale_prior_hyper_param['log10_sd']
 92 |             shape, rate = self.solve_for_gscale_prior_hyperparam(
 93 |                 log10_mean, log10_sd, bridge_exponent, self._gscale_paramet
 94 |             )
 95 |             self.param = {
 96 |                 'gscale_neg_power': {'shape': shape, 'rate': rate},
 97 |                 'gscale': {'log10_mean': log10_mean, 'log10_sd': log10_sd}
 98 |             }   # Hyper-parameters on the negative power are specified in
 99 |                 # terms of the 'raw' parametrization.
100 | 
101 |     def get_info(self):
102 |         sd_for_fixed = self.sd_for_fixed
103 |         if len(sd_for_fixed) > 0 and np.all(sd_for_fixed == sd_for_fixed[0]):
104 |             sd_for_fixed = sd_for_fixed[0]
105 |         info = {
106 |             'bridge_exponent': self.bridge_exp,
107 |             'n_fixed_effect': self.n_fixed,
108 |             'sd_for_intercept': self.sd_for_intercept,
109 |             'sd_for_fixed_effect': sd_for_fixed,
110 |             'regularizing_slab_size': self.slab_size,
111 |             'global_scale_prior_hyper_param': self.param['gscale'],
112 |             '_global_scale_parametrization': self._gscale_paramet
113 |         }
114 |         return info
115 | 
116 |     def clone(self, **kwargs):
117 |         """ Make a clone with only specified attributes modified. """
118 |         info = self.get_info()
119 |         if '_global_scale_parametrization' in kwargs:
120 |             raise ValueError("Change of parametrization is not supported.")
121 |         for key in kwargs.keys():
122 |             if key in info:
123 |                 info[key] = kwargs[key]
124 |             else:
125 |                 warn("'{:s} is not a valid keyward argument.".format(key))
126 |         return RegressionCoefPrior(**info)
127 | 
128 |     def adjust_scale(self, gscale, lscale, to):
129 |         unit_bridge_magnitude \
130 |             = self.compute_power_exp_ave_magnitude(self.bridge_exp, 1.)
131 |         if to == 'raw':
132 |             gscale /= unit_bridge_magnitude
133 |             lscale *= unit_bridge_magnitude
134 |         elif to == 'coef_magnitude':
135 |             gscale *= unit_bridge_magnitude
136 |             lscale /= unit_bridge_magnitude
137 |         else:
138 |             raise ValueError()
139 |         return gscale, lscale
140 | 
141 |     def solve_for_gscale_prior_hyperparam(
142 |             self, log10_mean, log10_sd, bridge_exp, gscale_paramet):
143 |         log_mean = self.change_log_base(log10_mean, from_=10., to=math.e)
144 |         log_sd = self.change_log_base(log10_sd, from_=10., to=math.e)
145 |         if gscale_paramet == 'coef_magnitude':
146 |             unit_bridge_magnitude \
147 |                 = self.compute_power_exp_ave_magnitude(bridge_exp, 1.)
148 |             log_mean -= math.log(unit_bridge_magnitude)
149 |         shape, rate = self.solve_for_gamma_param(
150 |             log_mean, log_sd, bridge_exp
151 |         )
152 |         return shape, rate
153 | 
154 |     @staticmethod
155 |     def compute_power_exp_ave_magnitude(exponent, scale=1.):
156 |         """ Returns the expected absolute value of a random variable with
157 |         density proportional to exp( - |x / scale|^exponent ).
158 |         """
159 |         return scale * math.gamma(2 / exponent) / math.gamma(1 / exponent)
160 | 
161 |     @staticmethod
162 |     def change_log_base(val, from_=math.e, to=10.):
163 |         return val * math.log(from_) / math.log(to)
164 | 
165 |     def solve_for_gamma_param(self, log_mean, log_sd, bridge_exp):
166 |         """ Find hyper-parameters matching specified mean and sd in log scale.
167 | 
168 |         Determine the shape and rate parameters of a Gamma prior on
169 |             phi = gscale ** (- 1 / bridge_exp)
170 |         so that the mean and sd of log(phi) coincide with log_mean and log_sd.
171 |         The calculations are done in the 'raw' parametrization of gscale,
172 |         as opposed to the 'coef_magnitude' parametrization.
173 |         """
174 | 
175 |         f = lambda log_shape: (
176 |             math.sqrt(self._polygamma(1, math.exp(log_shape))) / bridge_exp
177 |             - log_sd
178 |         ) # Function whose root coincides with the desired log-shape parameter.
179 |         lower_lim = -10.  # Any sufficiently small number is fine.
180 |         if log_sd < 0:
181 |             raise ValueError("Variance has to be positive.")
182 |         elif log_sd > 10 ** 8:
183 |             raise ValueError("Specified prior variance is too large.")
184 |         lower, upper = self._find_root_bounds(f, lower_lim)
185 | 
186 |         try:
187 |             log_shape = sp.optimize.brentq(f, lower, upper)
188 |         except BaseException as error:
189 |             print('Solving for the global scale gamma prior hyper-parameters '
190 |                   'failed; {}'.format(error))
191 |         shape = math.exp(log_shape)
192 |         rate = math.exp(
193 |             self._polygamma(0, shape) + bridge_exp * log_mean
194 |         )
195 |         return shape, rate
196 | 
197 |     @staticmethod
198 |     def _polygamma(n, x):
199 |         """ Wrap the scipy function so that it returns a scalar. """
200 |         return scipy_polygamma([n], x)[0]
201 | 
202 |     @staticmethod
203 |     def _find_root_bounds(f, init_lower_lim, increment=5., max_lim=None):
204 |         if max_lim is None:
205 |             max_lim = init_lower_lim + 10 ** 4
206 |         if f(init_lower_lim) < 0:
207 |             raise ValueError(
208 |                 "Objective function must have positive value "
209 |                 "at the lower limit."
210 |             )
211 |         lower_lim = init_lower_lim
212 |         while f(lower_lim + increment) > 0 and lower_lim < max_lim:
213 |             lower_lim += increment
214 |         if lower_lim >= max_lim:
215 |             raise Exception()  # Replace with a warning.
216 |         upper_lim = lower_lim + increment
217 |         return (lower_lim, upper_lim)


--------------------------------------------------------------------------------
/tests/manual_tests/test_stepsize_adapter.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import sys\n",
 10 |     "sys.path.append('..')\n",
 11 |     "sys.path.append('../..')"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import numpy as np\n",
 21 |     "import scipy as sp\n",
 22 |     "import matplotlib.pyplot as plt\n",
 23 |     "%matplotlib inline"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "from distributions import BivariateGaussian, BivariateSkewNormal\n",
 33 |     "from bayesbridge.reg_coef_sampler.stepsize_adapter \\\n",
 34 |     "    import StepsizeAdapter, RobbinsMonroStepsizer, DualAverageStepsizeAdapter"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "## Define an adaptive RWMH operator and the target distribution."
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "def random_walk_MH_step(f, theta0, logp0, prop_sd):\n",
 51 |     "    \"\"\"\n",
 52 |     "    Params\n",
 53 |     "    ------\n",
 54 |     "    f : function\n",
 55 |     "        Computes the log density of the target density\n",
 56 |     "    prop_sd : scalar or vector\n",
 57 |     "        Proposal standard deviation\n",
 58 |     "    \"\"\"\n",
 59 |     "    theta = theta0.copy()\n",
 60 |     "    theta += prop_sd * np.random.randn(len(theta0))\n",
 61 |     "    logp = f(theta)\n",
 62 |     "    accept_prob = min(1, np.exp(logp - logp0))\n",
 63 |     "    accepted = accept_prob > np.random.uniform()\n",
 64 |     "    if not accepted:\n",
 65 |     "        theta = theta0\n",
 66 |     "        logp = logp0\n",
 67 |     "    return theta, logp, accept_prob, accepted\n",
 68 |     "\n",
 69 |     "def adaptive_random_walk_MH(\n",
 70 |     "        f, n_iter, x0, stepsize0, target_accept_prob=.9,\n",
 71 |     "        adapt_method='robbins-monro'\n",
 72 |     "    ):\n",
 73 |     "    \n",
 74 |     "    logp = f(x0)\n",
 75 |     "    x = x0.copy()\n",
 76 |     "    stepsize = stepsize0\n",
 77 |     "    \n",
 78 |     "    # Pre-allocate\n",
 79 |     "    samples = np.zeros((len(x0), n_iter))\n",
 80 |     "    stepsizes = np.zeros(n_iter)\n",
 81 |     "    ave_stepsizes = np.zeros(n_iter)\n",
 82 |     "    accept_probs = np.zeros(n_iter)\n",
 83 |     "    \n",
 84 |     "    if adapt_method == 'dual-average':\n",
 85 |     "        adapter = DualAverageStepsizeAdapter(\n",
 86 |     "            stepsize0, target_accept_prob\n",
 87 |     "        )\n",
 88 |     "    else:\n",
 89 |     "        adapter = StepsizeAdapter(\n",
 90 |     "            stepsize0, target_accept_prob, \n",
 91 |     "            reference_iteration=n_iter, \n",
 92 |     "            adaptsize_at_reference=0.05\n",
 93 |     "        )\n",
 94 |     "    for i in range(n_iter):\n",
 95 |     "        \n",
 96 |     "        x, logp, accept_prob, _ = \\\n",
 97 |     "            random_walk_MH_step(f, x, logp, stepsize)\n",
 98 |     "        stepsize = adapter.adapt_stepsize(accept_prob)\n",
 99 |     "        \n",
100 |     "        samples[:, i] = x\n",
101 |     "        stepsizes[i] = stepsize\n",
102 |     "        ave_stepsizes[i] = adapter.get_current_stepsize(averaged=True)\n",
103 |     "        accept_probs[i] = accept_prob\n",
104 |     "        \n",
105 |     "    return samples, stepsizes, ave_stepsizes, accept_probs"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "bi_skewnorm = BivariateSkewNormal()\n",
115 |     "def f(x):\n",
116 |     "    return bi_skewnorm.compute_logp_and_gradient(x, logp_only=True)[0]"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "## Compare different adaptation schedule for Robbins-Monro algorithm."
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "plt.figure(figsize=(7, 4.5))\n",
133 |     "plt.rcParams['font.size'] = 18\n",
134 |     "\n",
135 |     "for decay_exponent in (1., 2 / 3, 1 / 2):\n",
136 |     "    rm_stepsizer = RobbinsMonroStepsizer(\n",
137 |     "        init=1., decay_exponent=decay_exponent,\n",
138 |     "        reference_iteration=100, size_at_reference=.05)\n",
139 |     "    adaptation_sizes = rm_stepsizer.calculate_stepsize(np.arange(500))\n",
140 |     "    plt.plot(np.log10(adaptation_sizes), label='Exponent = {:.2f}'.format(decay_exponent))\n",
141 |     "    \n",
142 |     "plt.ylabel(r'$\\log_{10}$(adaptation size)')\n",
143 |     "plt.xlabel('Number of adaptation steps')\n",
144 |     "plt.legend()\n",
145 |     "plt.show()"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "## Run an adaptive MCMC with Robbins-Monro adaptation."
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "stepsize0 = 1\n",
162 |     "x0 = np.array([0., 0.])\n",
163 |     "n_iter = 5 * 10 ** 4\n",
164 |     "\n",
165 |     "samples, stepsizes, ave_stepsizes, accept_probs = adaptive_random_walk_MH(\n",
166 |     "    f, n_iter, x0, stepsize0, target_accept_prob=.9, \n",
167 |     "    adapt_method='robbins-monro',\n",
168 |     ")"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "print('The average acceptance probability is {:.2f}.'.format(np.mean(accept_probs)))"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "#### Take a look at the empirical distribution: stationary distribution may be perturbed a bit due to adaptation."
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": [
193 |     "plt.figure(figsize=(7, 4.5))\n",
194 |     "plt.rcParams['font.size'] = 20\n",
195 |     "\n",
196 |     "grid = np.linspace(-4, 4, 101)\n",
197 |     "marginal_pdf = bi_skewnorm.compute_marginal_pdf(grid, grid)\n",
198 |     "\n",
199 |     "for axis in range(2):\n",
200 |     "    color = 'C' + str(axis)\n",
201 |     "    plt.hist(samples[axis, int(n_iter / 2):], \n",
202 |     "             alpha=.5, bins=21, normed=True,\n",
203 |     "             color=color)\n",
204 |     "    plt.plot(grid, marginal_pdf[axis], color=color)\n",
205 |     "plt.show()"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "markdown",
210 |    "metadata": {},
211 |    "source": [
212 |     "#### Plot the sequence of stepsizes used at each MCMC iteration as well as the average."
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": null,
218 |    "metadata": {},
219 |    "outputs": [],
220 |    "source": [
221 |     "plt.figure(figsize=(14, 4.5))\n",
222 |     "\n",
223 |     "plt.plot(np.log10(stepsizes), label='M-H stepsize')\n",
224 |     "plt.plot(np.log10(ave_stepsizes), label='averaged stepsize')\n",
225 |     "plt.ylim([-2.1, -.4])\n",
226 |     "\n",
227 |     "plt.legend()\n",
228 |     "plt.show()"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "## Run an adaptive MCMC with dual-averaging algorithm."
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": null,
241 |    "metadata": {},
242 |    "outputs": [],
243 |    "source": [
244 |     "stepsize0 = .1\n",
245 |     "x0 = np.array([0., 0.])\n",
246 |     "n_iter = 5 * 10 ** 4\n",
247 |     "samples, stepsizes, ave_stepsizes, accept_probs = adaptive_random_walk_MH(\n",
248 |     "    f, n_iter, x0, stepsize0, target_accept_prob=.9, \n",
249 |     "    adapt_method='dual-average',\n",
250 |     ")"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": null,
256 |    "metadata": {},
257 |    "outputs": [],
258 |    "source": [
259 |     "print('The average acceptance probability is {:.2f}.'.format(np.mean(accept_probs)))"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "metadata": {},
265 |    "source": [
266 |     "#### Take a look at the empirical distribution: stationary distribution may be perturbed a bit due to adaptation."
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": null,
272 |    "metadata": {},
273 |    "outputs": [],
274 |    "source": [
275 |     "plt.figure(figsize=(7, 4.5))\n",
276 |     "plt.rcParams['font.size'] = 20\n",
277 |     "\n",
278 |     "grid = np.linspace(-4, 4, 101)\n",
279 |     "marginal_pdf = bi_skewnorm.compute_marginal_pdf(grid, grid)\n",
280 |     "\n",
281 |     "for axis in range(2):\n",
282 |     "    color = 'C' + str(axis)\n",
283 |     "    plt.hist(samples[axis, int(n_iter / 2):], \n",
284 |     "             alpha=.5, bins=21, normed=True,\n",
285 |     "             color=color)\n",
286 |     "    plt.plot(grid, marginal_pdf[axis], color=color)\n",
287 |     "plt.show()"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "markdown",
292 |    "metadata": {},
293 |    "source": [
294 |     "#### Plot the sequence of stepsizes used at each MCMC iteration as well as the average."
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": null,
300 |    "metadata": {},
301 |    "outputs": [],
302 |    "source": [
303 |     "plt.figure(figsize=(14, 4.5))\n",
304 |     "\n",
305 |     "plt.plot(np.log10(stepsizes), label='M-H stepsize')\n",
306 |     "plt.plot(np.log10(ave_stepsizes), label='averaged stepsize')\n",
307 |     "plt.ylim([-2.1, -.4])\n",
308 |     "\n",
309 |     "plt.legend()\n",
310 |     "plt.show()"
311 |    ]
312 |   }
313 |  ],
314 |  "metadata": {
315 |   "kernelspec": {
316 |    "display_name": "Python 3",
317 |    "language": "python",
318 |    "name": "python3"
319 |   },
320 |   "language_info": {
321 |    "codemirror_mode": {
322 |     "name": "ipython",
323 |     "version": 3
324 |    },
325 |    "file_extension": ".py",
326 |    "mimetype": "text/x-python",
327 |    "name": "python",
328 |    "nbconvert_exporter": "python",
329 |    "pygments_lexer": "ipython3",
330 |    "version": "3.6.3"
331 |   }
332 |  },
333 |  "nbformat": 4,
334 |  "nbformat_minor": 1
335 | }
336 | 


--------------------------------------------------------------------------------
/bayesbridge/reg_coef_sampler/hamiltonian_monte_carlo/stepsize_adapter.py:
--------------------------------------------------------------------------------
  1 | from math import exp, log, log10, sqrt, copysign
  2 | from .util import warn_message_only
  3 | import scipy.stats as stats
  4 | 
  5 | 
  6 | class HamiltonianBasedStepsizeAdapter():
  7 |     """
  8 |     Updates the stepsize of an HMC integrator so that the average Hamiltonian
  9 |     error matches a pre-specified target value.
 10 |     """
 11 | 
 12 |     def __init__(self, init_stepsize, target_accept_prob=.9,
 13 |                  init_adaptsize=1., adapt_decay_exponent=1.,
 14 |                  reference_iteration=500, adaptsize_at_reference=.05):
 15 |         """
 16 |         Parameters
 17 |         ----------
 18 |         reference_iteration & adaptsize_at_reference:
 19 |             Stepsize sequence of Robbins-Monro algorithm will be set so that it
 20 |             decreases to `adaptsize_at_refrence` after `reference_iteration`.
 21 |         """
 22 |         if init_stepsize <= 0:
 23 |             raise ValueError("The initial stepsize must be positive.")
 24 |         log_init_stepsize = log(init_stepsize)
 25 |         self.log_stepsize = log_init_stepsize
 26 |         self.log_stepsize_averaged = log_init_stepsize
 27 |         self.n_averaged = 0
 28 |         self.target_accept_prob = target_accept_prob
 29 |         self.target_log10_hamiltonian_error \
 30 |             = self.convert_to_log_hamiltonian_error(target_accept_prob)
 31 | 
 32 |         self.rm_stepsizer = RobbinsMonroStepsizer(
 33 |             init=init_adaptsize,
 34 |             decay_exponent=adapt_decay_exponent,
 35 |             reference_iteration=reference_iteration,
 36 |             size_at_reference=adaptsize_at_reference
 37 |         )
 38 | 
 39 |     @staticmethod
 40 |     def convert_to_log_hamiltonian_error(target_accept_prob):
 41 |         """ Calculate the target squared Hamiltonian error in the log scale.
 42 | 
 43 |         Under a high-dimensional limit of i.i.d. parameters, the Hamiltonian
 44 |         error is distributed as
 45 |             Normal(mean = - delta / 2, var = delta),
 46 |         and the corresponding average acceptance rate is
 47 |             2 GausssianCDF(- sqrt(delta) / 2).
 48 |         So we solve for `delta` that theoretically achieves the target acceptance
 49 |         rate and try to calibrate the average square error of the Hamiltonian
 50 |         to be the theoretical value (delta^2 / 4 + delta).
 51 |         """
 52 |         if target_accept_prob <= 0 or target_accept_prob >= 1:
 53 |             raise ValueError("Target probability must be within (0, 1).")
 54 |         delta = 4 * stats.norm.ppf(target_accept_prob / 2) ** 2
 55 |         target_log10_hamiltonian_error = .5 * log10(delta + delta ** 2 / 4)
 56 |         return target_log10_hamiltonian_error
 57 | 
 58 |     def get_current_stepsize(self, averaged=False):
 59 |         if averaged:
 60 |             return exp(self.log_stepsize_averaged)
 61 |         else:
 62 |             return exp(self.log_stepsize)
 63 | 
 64 |     def reinitialize(self, init_stepsize):
 65 |         log_init_stepsize = log(init_stepsize)
 66 |         self.log_stepsize = log_init_stepsize
 67 |         self.log_stepsize_averaged = log_init_stepsize
 68 |         self.n_averaged = 0
 69 | 
 70 |     def adapt_stepsize(self, hamiltonian_error):
 71 |         rm_stepsize = self.rm_stepsizer.calculate_stepsize(self.n_averaged)
 72 |         self.n_averaged += 1
 73 |         adaptsize = self.transform_to_adaptsize(hamiltonian_error)
 74 |         self.log_stepsize += rm_stepsize * adaptsize
 75 |         weight = 1 / self.n_averaged
 76 |         self.log_stepsize_averaged = (
 77 |             weight * self.log_stepsize
 78 |             + (1 - weight) * self.log_stepsize_averaged
 79 |         )
 80 |         return exp(self.log_stepsize)
 81 | 
 82 |     def transform_to_adaptsize(
 83 |             self, error, upper_bound=1., trans_type='piecewise'):
 84 |         """
 85 |         Parameters
 86 |         ----------
 87 |         trans_type: str, {'log-linear', 'sign', 'piecewise'}
 88 |         """
 89 | 
 90 |         if trans_type == 'probability':
 91 |             accept_prob = min(1, exp(error))
 92 |             adapt_size = accept_prob - self.target_accept_prob
 93 |             return adapt_size
 94 | 
 95 |         if error == 0.:
 96 |             log10_error = - float('inf')
 97 |         else:
 98 |             log10_error = log10(abs(error))
 99 | 
100 |         target = self.target_log10_hamiltonian_error
101 |         if trans_type == 'log-linear':
102 |             adapt_size = target - log10_error
103 | 
104 |         elif trans_type == 'sign':
105 |             adapt_size = copysign(1., target - log10_error)
106 | 
107 |         elif trans_type == 'piecewise':
108 |             # Increase the adjustment when the error is larger than the target.
109 |             if log10_error > target:
110 |                 adapt_size = (target - log10_error) / .301 # Convert to log2 scale.
111 |             else:
112 |                 adapt_size = (target - log10_error) / 3 # Convert to log1000 scale.
113 | 
114 |         else:
115 |             raise NotImplementedError()
116 | 
117 |         if abs(adapt_size) > upper_bound:
118 |             adapt_size = copysign(1., adapt_size)
119 | 
120 |         return adapt_size
121 | 
122 | 
123 | def initialize_stepsize(compute_acceptprob, dt=1.0):
124 |     """ Heuristic for choosing an initial value of dt
125 | 
126 |     Parameters
127 |     ----------
128 |     compute_acceptprob: callable
129 |         Computes the acceptance probability of the proposal one-step HMC proposal.
130 |     """
131 | 
132 |     # Figure out what direction we should be moving dt.
133 |     acceptprob = compute_acceptprob(dt)
134 |     direc = 2 * int(acceptprob > 0.5) - 1
135 | 
136 |     # Keep moving dt in that direction until acceptprob crosses 0.5.
137 |     while acceptprob == 0 or (2 * acceptprob) ** direc > 1:
138 |         dt = dt * (2 ** direc)
139 |         acceptprob = compute_acceptprob(dt)
140 |         if acceptprob == 0 and direc == 1:
141 |             # The last doubling of stepsize was too much.
142 |             dt /= 2
143 |             break
144 | 
145 |     return dt
146 | 
147 | 
148 | class RobbinsMonroStepsizer():
149 | 
150 |     def __init__(self, init=1., decay_exponent=1.,
151 |                  reference_iteration=None, size_at_reference=None):
152 |         self.init = init
153 |         self.exponent = decay_exponent
154 |         self.scale = self.determine_decay_scale(
155 |             init, decay_exponent, reference_iteration, size_at_reference
156 |         )
157 | 
158 |     def determine_decay_scale(self, init, decay_exponent, ref_iter, size_at_ref):
159 | 
160 |         if (ref_iter is not None) and (size_at_ref is not None):
161 |             decay_scale = \
162 |                 ref_iter / ((init / size_at_ref) ** (1 / decay_exponent) - 1)
163 |         else:
164 |             warn_message_only(
165 |                 'The default stepsize sequence tends to decay too quicky; '
166 |                 'consider manually setting the decay scale.'
167 |             )
168 |             decay_scale = 1.
169 | 
170 |         return decay_scale
171 | 
172 |     def __iter__(self):
173 |         self.n_iter = 0
174 |         return self
175 | 
176 |     def __next__(self):
177 |         stepsize = self.calculate_stepsize(self.n_iter)
178 |         self.n_iter += 1
179 |         return stepsize
180 | 
181 |     def calculate_stepsize(self, n_iter):
182 |         stepsize = self.init / (1 + n_iter / self.scale) ** self.exponent
183 |         return stepsize
184 | 
185 | 
186 | class RobbinsMonroStepsizeAdapter():
187 | 
188 |     def __init__(self, init_stepsize, target_accept_prob=.9,
189 |                  init_adaptsize=1., adapt_decay_exponent=1.,
190 |                  reference_iteration=100, adaptsize_at_reference=.05):
191 |         """
192 |         Parameters
193 |         ----------
194 |         reference_iteration & adaptsize_at_reference:
195 |             Stepsize sequence of Robbins-Monro algorithm will be set so that it
196 |             decreases to `adaptsize_at_refrence` after `reference_iteration`.
197 |         """
198 |         if init_stepsize <= 0:
199 |             raise ValueError("The initial stepsize must be positive.")
200 |         log_init_stepsize = log(init_stepsize)
201 |         self.log_stepsize = log_init_stepsize
202 |         self.log_stepsize_averaged = log_init_stepsize
203 |         self.n_averaged = 0
204 |         self.target_accept_prob = target_accept_prob
205 | 
206 |         self.rm_stepsizer = iter(RobbinsMonroStepsizer(
207 |             init=init_adaptsize,
208 |             decay_exponent=adapt_decay_exponent,
209 |             reference_iteration=reference_iteration,
210 |             size_at_reference=adaptsize_at_reference
211 |         ))
212 | 
213 |     def get_current_stepsize(self, averaged=False):
214 |         if averaged:
215 |             return exp(self.log_stepsize_averaged)
216 |         else:
217 |             return exp(self.log_stepsize)
218 | 
219 |     def adapt_stepsize(self, accept_prob):
220 |         self.n_averaged += 1
221 |         rm_stepsize = next(self.rm_stepsizer)
222 |         adaptsize = \
223 |             self.transform_to_adaptsize(accept_prob, self.target_accept_prob)
224 |         self.log_stepsize += rm_stepsize * adaptsize
225 |         weight = 1 / self.n_averaged
226 |         self.log_stepsize_averaged = (
227 |             weight * self.log_stepsize
228 |             + (1 - weight) * self.log_stepsize_averaged
229 |         )
230 |         return exp(self.log_stepsize)
231 | 
232 |     def transform_to_adaptsize(
233 |             self, accept_prob, target, trans_type='linear'):
234 |         """
235 |         Parameters
236 |         ----------
237 |         trans_type: str, {'linear', 'sign', 'penalize-high-prob'}
238 |         """
239 | 
240 |         if trans_type == 'linear':
241 |             adapt_size = accept_prob - target
242 | 
243 |         elif trans_type == 'sign':
244 |             adapt_size = copysign(1., accept_prob - target)
245 | 
246 |         elif trans_type == 'penalize-high-prob':
247 |             # Transforms accept_prob -> adapt_size so that it roughly interpolate
248 |             # the points (0, -1), (target, 0), and (1, 1). Transformation is
249 |             # linear near accept_prob = target but quickly goes up to
250 |             # adapt_size = 1 as (1 - accecpt_prob) becomes an order of manitude
251 |             # smaller than (1 - target).
252 |             if accept_prob <= target:
253 |                 adapt_size = (accept_prob - target) / target
254 |             else:
255 |                 epsilon = 2. ** -52
256 |                 magnitude_diff = log10(
257 |                     (1. - (accept_prob - epsilon)) / (1 - target)
258 |                 )
259 |                 if magnitude_diff == 0:
260 |                     w = 0.
261 |                 else:
262 |                     w = exp(magnitude_diff ** - 1)
263 |                 adapt_size = (
264 |                     (1 - w) * (accept_prob - target) / target
265 |                     - w * magnitude_diff
266 |                 )
267 |                 adapt_size = min(1., adapt_size)
268 | 
269 |         else:
270 |             raise NotImplementedError()
271 | 
272 |         return adapt_size
273 | 
274 | 
275 | class DualAverageStepsizeAdapter():
276 | 
277 |     def __init__(self, init_stepsize, target_accept_prob=.9):
278 | 
279 |         if init_stepsize <= 0:
280 |             raise ValueError("The initial stepsize must be positive.")
281 |         log_init_stepsize = log(init_stepsize)
282 |         self.log_stepsize = log_init_stepsize
283 |         self.log_stepsize_averaged = log_init_stepsize
284 |         self.n_averaged = 0
285 |         self.target_accept_prob = target_accept_prob
286 |         self.latent_stat = 0.  # Used for dual-averaging.
287 | 
288 |         # Parameters for the dual-averaging algorithm.
289 |         self.stepsize_averaging_log_decay_rate = 0.75
290 |         self.latent_prior_samplesize = 10
291 |         multiplier = 2. # > 1 to err on the side of shrinking toward a larger value.
292 |         self.log_stepsize_shrinkage_mean = log(multiplier) + log_init_stepsize
293 |         self.log_stepsize_shrinkage_strength = 0.05
294 |             # Variable name is not quite accurate since this parameter interacts with latent_prior_samplesize.
295 | 
296 |     def get_current_stepsize(self, averaged=False):
297 |         if averaged:
298 |             return exp(self.log_stepsize_averaged)
299 |         else:
300 |             return exp(self.log_stepsize)
301 | 
302 |     def adapt_stepsize(self, accept_prob):
303 |         self.n_averaged += 1
304 |         self.latent_stat = self.update_latent_stat(
305 |             accept_prob, self.target_accept_prob, self.latent_stat
306 |         )
307 |         self.log_stepsize, self.log_stepsize_averaged = self.dual_average_stepsize(
308 |             self.latent_stat, self.log_stepsize_averaged
309 |         )
310 |         return exp(self.log_stepsize)
311 | 
312 |     def update_latent_stat(self, accept_prob, target_accept_prob, latent_stat):
313 |         weight_latent = (self.n_averaged + self.latent_prior_samplesize) ** -1
314 |         latent_stat = (1 - weight_latent) * latent_stat \
315 |                       + weight_latent * (target_accept_prob - accept_prob)
316 |         return latent_stat
317 | 
318 |     def dual_average_stepsize(self, latent_stat, log_stepsize_optimized):
319 |         log_stepsize = (
320 |             self.log_stepsize_shrinkage_mean
321 |             - sqrt(self.n_averaged) / self.log_stepsize_shrinkage_strength * latent_stat
322 |         )
323 |         weight = self.n_averaged ** - self.stepsize_averaging_log_decay_rate
324 |         log_stepsize_optimized = \
325 |             (1 - weight) * log_stepsize_optimized + weight * log_stepsize
326 |         return log_stepsize, log_stepsize_optimized


--------------------------------------------------------------------------------
/bayesbridge/random/tilted_stable/tilted_stable.pyx:
--------------------------------------------------------------------------------
  1 | cimport cython
  2 | from libc.math cimport exp as exp_c
  3 | from libc.math cimport fabs, pow, log, sqrt, sin, floor, INFINITY, M_PI
  4 | import random
  5 | import numpy as np
  6 | cimport numpy as np
  7 | from numpy.random import PCG64
  8 | from numpy.random.bit_generator cimport BitGenerator
  9 | from bayesbridge.random.normal.normal cimport random_normal
 10 | from bayesbridge.random.uniform.uniform cimport random_uniform
 11 | 
 12 | 
 13 | cdef double MAX_EXP_ARG = 709  # ~ log(2 ** 1024)
 14 | ctypedef np.uint8_t np_uint8
 15 | ctypedef double (*rand_generator)()
 16 | 
 17 | 
 18 | cdef double exp(double x):
 19 |     if x > MAX_EXP_ARG:
 20 |         val = INFINITY
 21 |     elif x < - MAX_EXP_ARG:
 22 |         val = 0.
 23 |     else:
 24 |         val = exp_c(x)
 25 |     return val
 26 | 
 27 | 
 28 | @cython.cdivision(True)
 29 | cdef double sinc(double x):
 30 |     cdef double x_sq
 31 |     if fabs(x) < .01:
 32 |         x_sq = x * x
 33 |         val = 1. - x_sq / 6. * (1 - x_sq / 20.)
 34 |             # Taylor approximation with an error bounded by 2e-16
 35 |     else:
 36 |         val = sin(x) / x
 37 |     return val
 38 | 
 39 | 
 40 | cdef double python_builtin_next_double():
 41 |     return <double>random.random()
 42 | 
 43 | 
 44 | cdef class ExpTiltedStableDist():
 45 |     cdef rand_generator next_double
 46 |     cdef double TILT_POWER_THRESHOLD # For deciding the faster of two algorithms
 47 |     cdef BitGenerator bitgen
 48 | 
 49 |     def __init__(self, seed=None):
 50 |         self.set_seed(seed)
 51 |         self.bitgen = PCG64(seed)
 52 |         self.TILT_POWER_THRESHOLD = 2.
 53 | 
 54 |     def set_seed(self, seed):
 55 |         self.bitgen = PCG64(seed)
 56 | 
 57 |     def get_state(self):
 58 |         return self.bitgen.state
 59 | 
 60 |     def set_state(self, state):
 61 |         self.bitgen.state = state
 62 | 
 63 |     @cython.boundscheck(False)
 64 |     @cython.wraparound(False)
 65 |     def sample(self, char_exponent, tilt, method=None):
 66 |         """
 67 |         Generate a random variable from a stable distribution with
 68 |             characteristic exponent =  char_exponent < 1
 69 |             skewness = 1
 70 |             scale = cos(char_exponent * pi / 2) ** (1 / char_exponent)
 71 |             location = 0
 72 |             exponential tilting = tilt
 73 |         (The density p(x) is tilted by exp(- tilt * x).)
 74 | 
 75 |         The cost of the divide-conquer algorithm increases as a function of
 76 |         'tilt ** char_exp'. While the cost of double-rejection algorithm is
 77 |         bounded, the divide-conquer algorithm is simpler and faster for small
 78 |         'tilt ** char_exp'.
 79 | 
 80 |         References:
 81 |         -----------
 82 |         Implementation is mostly based on the algorithm descriptions in
 83 |             'Sampling Exponentially Tilted Stable Distributions' by Hofert (2011)
 84 |         Ideas behind and details on the double-rejection sampling is better
 85 |         described in
 86 |             'Random variate generation for exponentially and polynomially tilted
 87 |             stable distributions' by Devroye (2009)
 88 |         """
 89 |         if not isinstance(tilt, np.ndarray):
 90 |             raise TypeError('Tilt parameter must be a numpy array.')
 91 |         if isinstance(char_exponent, (np.floating, float)):
 92 |             char_exponent = np.tile(char_exponent, tilt.size)
 93 |         elif isinstance(char_exponent, np.ndarray):
 94 |             if not char_exponent.size == tilt.size:
 95 |                 raise ValueError('Input arrays must be of the same length.')
 96 |         else:
 97 |             raise TypeError('Characteristic exponent must be float or numpy array.')
 98 |         if not np.all(char_exponent < 1):
 99 |             raise ValueError('Characteristic exponent must be smaller than 1.')
100 |         if not np.all(tilt > 0):
101 |             raise ValueError('Tilting parameter must be positive.')
102 | 
103 |         if method is None:
104 |             # Choose a likely faster method.
105 |             divide_conquer_cost = tilt ** char_exponent
106 |             double_rejection_cost = self.TILT_POWER_THRESHOLD
107 |                 # The relative costs are implementation & architecture dependent.
108 |             use_divide_conquer = (divide_conquer_cost < double_rejection_cost)
109 |         elif method in ['divide-conquer', 'double-rejection']:
110 |             use_divide_conquer = np.tile(method == 'divide-conquer', tilt.size)
111 |         else:
112 |             raise ValueError("Unrecognized method name.")
113 | 
114 |         char_exponent = char_exponent.astype(np.double)
115 |         tilt = tilt.astype(np.double)
116 |         use_divide_conquer = use_divide_conquer.astype(np.uint8)
117 |         result = np.zeros(tilt.size, dtype=np.double)
118 | 
119 |         cdef double[:] char_exponent_view = char_exponent
120 |         cdef double[:] tilt_view = tilt
121 |         cdef np_uint8[:] use_divide_conquer_view = use_divide_conquer
122 |         cdef double[:] result_view = result
123 |         cdef long n_sample = tilt.size
124 |         cdef Py_ssize_t i
125 | 
126 |         for i in range(n_sample):
127 |             if use_divide_conquer_view[i]:
128 |                 result_view[i] = self.sample_by_divide_and_conquer(
129 |                     char_exponent_view[i], tilt_view[i]
130 |                 )
131 |             else:
132 |                 result_view[i] = self.sample_by_double_rejection(
133 |                     char_exponent_view[i], tilt_view[i]
134 |                 )
135 |         return result
136 | 
137 |     cdef double sample_by_divide_and_conquer(self, double char_exp, double tilt):
138 |         cdef double X, c
139 |         cdef long partition_size = max(1, <long>floor(pow(tilt, char_exp)))
140 |         X = 0.
141 |         c = pow(1. / partition_size, 1. / char_exp)
142 |         for i in range(partition_size):
143 |             X += self.sample_divided_rv(char_exp, tilt, c)
144 |         return X
145 | 
146 |     cdef double sample_divided_rv(self, double char_exp, double tilt, double c):
147 |         cdef bint accepted = False
148 |         while not accepted:
149 |             S = c * self.sample_non_tilted_rv(char_exp)
150 |             accept_prob = exp(- tilt * S)
151 |             accepted = (random_uniform(self.bitgen) < accept_prob)
152 |         return S
153 | 
154 |     cdef double sample_non_tilted_rv(self, double char_exp):
155 |         cdef double S = pow(
156 |             - self.zolotarev_function(M_PI * random_uniform(self.bitgen), char_exp)
157 |                 / log(random_uniform(self.bitgen)),
158 |             (1. - char_exp) / char_exp
159 |         )
160 |         return S
161 | 
162 |     cdef double sample_by_double_rejection(self, double char_exp, double tilt):
163 | 
164 |         cdef double U, V, X, z, log_accept_prob
165 |         cdef double tilt_power = pow(tilt, char_exp)
166 | 
167 |         # Start double-rejection sampling.
168 |         cdef bint accepted = False
169 |         while not accepted:
170 |             U, V, z = self.sample_aux_rv(char_exp, tilt_power)
171 |             X, log_accept_prob = \
172 |                 self.sample_reference_rv(U, char_exp, tilt_power, z)
173 |             accepted = (log_accept_prob > log(V))
174 | 
175 |         return pow(X, - (1. - char_exp) / char_exp)
176 | 
177 |     cdef (double, double, double) \
178 |         sample_aux_rv(self, double char_exp, double tilt_power):
179 |         """
180 |         Samples an auxiliary random variable for the double-rejection algorithm.
181 |         Returns:
182 |             U : auxiliary random variable for the double-rejection algorithm
183 |             V : uniform random variable independent of U, X
184 |             z : scalar quantity used later
185 |         """
186 |         cdef double U, V, z, accept_prob
187 |         cdef double gamma, xi, psi
188 |             # Intermediate quantities; could be computed outside the funciton
189 |             # and reused in case of rejection
190 |         gamma = tilt_power * char_exp * (1. - char_exp)
191 |         xi = (1. + sqrt(2. * gamma) * (2. + sqrt(.5 * M_PI))) / M_PI
192 |         psi = sqrt(gamma / M_PI) * (2. + sqrt(.5 * M_PI)) \
193 |             * exp(- gamma * M_PI * M_PI / 8.)
194 |         cdef bint accepted = False
195 |         while not accepted:
196 |             U = self.sample_aux2_rv(xi, psi, gamma)
197 |             if U > M_PI:
198 |                 continue
199 | 
200 |             zeta = sqrt(self.zolotarev_pdf_exponentiated(U, char_exp))
201 |             z = 1. / (1. - pow(1. + char_exp * zeta / sqrt(gamma), -1. / char_exp))
202 |             accept_prob = self.compute_aux2_accept_prob(
203 |                 U, xi, psi, zeta, z, tilt_power, gamma
204 |             )
205 |             if accept_prob > 0.:
206 |                 V = random_uniform(self.bitgen) / accept_prob
207 |                 accepted = (U < M_PI and V <= 1.)
208 | 
209 |         return U, V, z
210 | 
211 |     cdef double sample_aux2_rv(self,
212 |             double xi, double psi, double gamma):
213 |         """
214 |         Sample the 2nd level auxiliary random variable (i.e. the additional
215 |         auxiliary random variable used to sample the auxilary variable for
216 |         double-rejection algorithm.)
217 |         """
218 | 
219 |         w1 = sqrt(.5 * M_PI / gamma) * xi
220 |         w2 = 2. * sqrt(M_PI) * psi
221 |         w3 = xi * M_PI
222 |         V = random_uniform(self.bitgen)
223 |         if gamma >= 1:
224 |             if V < w1 / (w1 + w2):
225 |                 U = fabs(random_normal(self.bitgen)) / sqrt(gamma)
226 |             else:
227 |                 W = random_uniform(self.bitgen)
228 |                 U = M_PI * (1. - W * W)
229 |         else:
230 |             W = random_uniform(self.bitgen)
231 |             if V < w3 / (w2 + w3):
232 |                 U = M_PI * W
233 |             else:
234 |                 U = M_PI * (1. - W * W)
235 | 
236 |         return U
237 | 
238 |     cdef double compute_aux2_accept_prob(self,
239 |             double U, double xi, double psi, double zeta, double z,
240 |             double tilt_power, double gamma
241 |         ):
242 |         inverse_accept_prob = M_PI * exp(-tilt_power * (1. - 1. / (zeta * zeta))) \
243 |               / ((1. + sqrt(.5 * M_PI)) * sqrt(gamma) / zeta + z)
244 |         d = 0.
245 |         if U >= 0. and gamma >= 1:
246 |             d += xi * exp(-gamma * U * U / 2.)
247 |         if U > 0. and U < M_PI:
248 |             d += psi / sqrt(M_PI - U)
249 |         if U >= 0. and U <= M_PI and gamma < 1.:
250 |             d += xi
251 |         inverse_accept_prob *= d
252 |         accept_prob = 1 / inverse_accept_prob
253 |         return accept_prob
254 | 
255 |     cdef (double, double) sample_reference_rv(self,
256 |             double U, double char_exp, double tilt_power, double z):
257 |         """
258 |         Generate a sample from the reference (augmented) distribution conditional
259 |         on U for the double-rejection algorithm. The algorithm use a rejection
260 |         sampler with half-Gaussian, uniform, and truncated exponential to the
261 |         left, middle, and right of a partitioned real-line.
262 | 
263 |         Returns:
264 |         --------
265 |             X : random variable from the reference distribution
266 |             N, E : random variables used later for computing the acceptance prob
267 |         """
268 |         cdef double a, left_thresh, right_thresh, expo_scale, \
269 |             mass_left, mass_mid, mass_right, mass_total, X, V, N, E
270 |         a = self.zolotarev_function(U, char_exp)
271 |         left_thresh = pow((1. - char_exp) / char_exp / a, char_exp) * tilt_power
272 |         right_thresh = left_thresh + sqrt(left_thresh * char_exp / a)
273 |         expo_scale = z / a
274 |         mass_left = (right_thresh - left_thresh) * sqrt(.5 * M_PI)
275 |         mass_mid = (right_thresh - left_thresh)
276 |         mass_right = expo_scale
277 |         mass_total = mass_left + mass_mid + mass_right
278 |         V = random_uniform(self.bitgen)
279 |         N = 0.
280 |         E = 0.
281 |         # Divided into three pieces at left_thresh and (left_thresh + mid_width)
282 |         if V < mass_left / mass_total:
283 |             N = random_normal(self.bitgen)
284 |             X = left_thresh - (right_thresh - left_thresh) * fabs(N)
285 |         elif V < (mass_left + mass_mid) / mass_total:
286 |             X = left_thresh + (right_thresh - left_thresh) * random_uniform(self.bitgen)
287 |         else:
288 |             E = - log(random_uniform(self.bitgen))
289 |             X = right_thresh + E * mass_right
290 | 
291 |         log_accept_prob = self.compute_log_accept_prob(
292 |             X, N, E, left_thresh, right_thresh, a, char_exp, tilt_power
293 |         )
294 |         return X, log_accept_prob
295 | 
296 |     cdef double compute_log_accept_prob(self,
297 |             double X, double N, double E, double left_thresh, double right_thresh,
298 |             double a, double char_exp, double tilt_power
299 |         ):
300 |         cdef double char_exp_odds = (1. - char_exp) / char_exp
301 |         if X < 0:
302 |             log_accept_prob = - INFINITY
303 |         else:
304 |             log_accept_prob = - (
305 |                 a * (X - left_thresh)
306 |                 + exp(log(tilt_power) / char_exp - char_exp_odds * log(left_thresh))
307 |                 * (pow(left_thresh / X, char_exp_odds) - 1.)
308 |             )
309 |             if X < left_thresh:
310 |                 log_accept_prob += N * N / 2.
311 |             elif X > right_thresh:
312 |                 log_accept_prob += E
313 | 
314 |         return log_accept_prob
315 | 
316 |     cdef double zolotarev_pdf_exponentiated(self, double x, double char_exp):
317 |         """
318 |         Evaluates a function proportional to a power of the Zolotarev density.
319 |         """
320 |         cdef double denominator, numerator
321 |         denominator = pow(sinc(char_exp * x), char_exp) \
322 |                       * pow(sinc((1. - char_exp) * x), (1. - char_exp))
323 |         numerator = sinc(x)
324 |         return numerator / denominator
325 | 
326 |     cdef double zolotarev_function(self, double x, double char_exp):
327 |         cdef double val = pow(
328 |             pow((1. - char_exp) * sinc((1. - char_exp) * x), (1. - char_exp))
329 |             * pow(char_exp * sinc(char_exp * x), char_exp)
330 |             / sinc(x)
331 |         , 1. / (1. - char_exp))
332 |         return val
333 | 


--------------------------------------------------------------------------------
/bayesbridge/reg_coef_sampler/hamiltonian_monte_carlo/nuts.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | import time
  4 | from .stepsize_adapter import HamiltonianBasedStepsizeAdapter, initialize_stepsize
  5 | from .dynamics import HamiltonianDynamics
  6 | from .util import warn_message_only
  7 | 
  8 | 
  9 | class NoUTurnSampler():
 10 | 
 11 |     def __init__(self, f, mass=None, warning_requested=True):
 12 |         """
 13 |         Parameters
 14 |         ----------
 15 |         f: callable
 16 |             Return the log probability and gradient evaluated at q.
 17 |         mass: None, numpy 1d array, or callable
 18 |         """
 19 |         self.f = f
 20 |         self.dynamics = HamiltonianDynamics(mass)
 21 |         self.warning_requested = warning_requested
 22 | 
 23 |     def generate_samples(
 24 |             self, q0, n_burnin, n_sample, dt_range=None, seed=None, n_update=0,
 25 |             adapt_stepsize=False, target_accept_prob=.9, final_adaptsize=.05):
 26 |         """
 27 |         Implements the No-U-Turn Sampler (NUTS) of Hoffman and Gelman (2011).
 28 | 
 29 |         Parameters:
 30 |         -----------
 31 |         dt_range: None, float, or ndarray of length 2
 32 |         adapt_stepsize: bool
 33 |             If True, the max stepsize will be adjusted to to achieve the target
 34 |             acceptance rate. Forced to be True if dt_range is None.
 35 |         """
 36 | 
 37 |         if seed is not None:
 38 |             np.random.seed(seed)
 39 | 
 40 |         q = q0
 41 |         logp, grad = self.f(q)
 42 | 
 43 |         if np.isscalar(dt_range):
 44 |             dt_range = np.array(2 * [dt_range])
 45 | 
 46 |         elif dt_range is None:
 47 |             p = self.dynamics.draw_momentum(len(q))
 48 |             logp_joint0 = - self.dynamics.compute_hamiltonian(logp, p)
 49 |             dt = initialize_stepsize(
 50 |                 lambda dt: self.compute_onestep_accept_prob(dt, q, p, grad, logp_joint0)
 51 |             )
 52 |             dt_range = dt * np.array([.8, 1.0])
 53 |             adapt_stepsize = True
 54 | 
 55 |         max_stepsize_adapter = HamiltonianBasedStepsizeAdapter(
 56 |             init_stepsize=1., target_accept_prob=target_accept_prob,
 57 |             reference_iteration=n_burnin, adaptsize_at_reference=final_adaptsize
 58 |         )
 59 | 
 60 |         if n_update > 0:
 61 |             n_per_update = math.ceil((n_burnin + n_sample) / n_update)
 62 |         else:
 63 |             n_per_update = float('inf')
 64 |         samples = np.zeros((len(q), n_sample + n_burnin))
 65 |         logp_samples = np.zeros(n_sample + n_burnin)
 66 |         accept_prob = np.zeros(n_sample + n_burnin)
 67 |         max_dt = np.zeros(n_burnin)
 68 | 
 69 |         tic = time.time()
 70 |         use_averaged_stepsize = False
 71 |         for i in range(n_sample + n_burnin):
 72 |             dt_multiplier \
 73 |                 = max_stepsize_adapter.get_current_stepsize(use_averaged_stepsize)
 74 |             dt = np.random.uniform(dt_range[0], dt_range[1])
 75 |             dt *= dt_multiplier
 76 |             q, info = self.generate_next_state(dt, q, logp, grad)
 77 |             logp, grad = info['logp'], info['grad']
 78 |             if i < n_burnin and adapt_stepsize:
 79 |                 max_dt[i] = dt_range[1] * dt_multiplier
 80 |                 max_stepsize_adapter.adapt_stepsize(info['ave_hamiltonian_error'])
 81 |             elif i == n_burnin - 1:
 82 |                 use_averaged_stepsize = True
 83 |             samples[:, i] = q
 84 |             logp_samples[i] = logp
 85 |             if (i + 1) % n_per_update == 0:
 86 |                 print('{:d} iterations have been completed.'.format(i + 1))
 87 | 
 88 |         toc = time.time()
 89 |         time_elapsed = toc - tic
 90 | 
 91 |         info = {
 92 |             'logp_samples': logp_samples,
 93 |             'accept_prob_samples': accept_prob,
 94 |             'sampling_time': time_elapsed
 95 |         }
 96 |         if adapt_stepsize:
 97 |             info['max_stepsize'] = max_dt
 98 | 
 99 |         return samples, info
100 | 
101 | 
102 |     def compute_onestep_accept_prob(self, dt, q0, p0, grad0, logp_joint0):
103 |         _, p, logp, _ = self.dynamics.integrate(self.f, dt, q0, p0, grad0)
104 |         logp_joint = - self.dynamics.compute_hamiltonian(logp, p)
105 |         accept_prob = np.exp(logp_joint - logp_joint0)
106 |         return accept_prob
107 | 
108 |     def generate_next_state(self, dt, q, logp=None, grad=None, p=None,
109 |                             max_height=10, hamiltonian_error_tol=100):
110 | 
111 |         n_grad_evals = 0
112 |         if logp is None or grad is None:
113 |             logp, grad = self.f(q)
114 |             n_grad_evals += 1
115 | 
116 |         if p is None:
117 |             p = self.dynamics.draw_momentum(len(q))
118 | 
119 |         logp_joint = - self.dynamics.compute_hamiltonian(logp, p)
120 |         logp_joint_threshold = logp_joint - np.random.exponential()
121 |             # Slicing variable in the log-scale.
122 | 
123 |         tree = _TrajectoryTree(
124 |             self.dynamics, self.f, dt, q, p, logp, grad, logp_joint, logp_joint,
125 |             logp_joint_threshold, hamiltonian_error_tol
126 |         )
127 |         directions = 2 * (np.random.rand(max_height) < 0.5) - 1
128 |             # Pre-allocation of random directions is unnecessary, but makes the code easier to test.
129 |         tree, final_height, last_doubling_rejected, maxed_before_u_turn \
130 |             = self._grow_trajectory_till_u_turn(tree, directions)
131 |         q, logp, grad = tree.sample
132 |         n_grad_evals += tree.n_integration_step
133 | 
134 |         if self.warning_requested:
135 |             self._issue_warnings(
136 |                 tree.instability_detected, maxed_before_u_turn, max_height
137 |             )
138 | 
139 |         info = {
140 |             'logp': logp,
141 |             'grad': grad,
142 |             'ave_accept_prob': tree.ave_accept_prob,
143 |             'ave_hamiltonian_error': tree.ave_hamiltonian_error,
144 |             'n_grad_evals': n_grad_evals,
145 |             'tree_height': final_height,
146 |             'u_turn_detected': tree.u_turn_detected,
147 |             'instability_detected': tree.instability_detected,
148 |             'last_doubling_rejected': last_doubling_rejected
149 |         }
150 | 
151 |         return q, info
152 | 
153 |     def _issue_warnings(
154 |             self, instability_detected, maxed_before_u_turn, max_height):
155 | 
156 |         if instability_detected:
157 |             warn_message_only(
158 |                 "Numerical integration became unstable while simulating a "
159 |                 "NUTS trajectory."
160 |             )
161 |         if maxed_before_u_turn:
162 |             warn_message_only(
163 |                 'The trajectory tree reached the max height of {:d} before '
164 |                 'meeting the U-turn condition.'.format(max_height)
165 |             )
166 |         return
167 | 
168 |     @staticmethod
169 |     def _grow_trajectory_till_u_turn(tree, directions):
170 | 
171 |         height = 0 # Referred to as 'depth' in the original paper, but arguably the
172 |                    # trajectory tree is built 'upward' on top of the existing ones.
173 |         max_height = len(directions)
174 |         trajectory_terminated = False
175 |         while not trajectory_terminated:
176 | 
177 |             doubling_rejected \
178 |                 = tree.double_trajectory(height, directions[height])
179 |                 # No transition to the next half of trajectory takes place if the
180 |                 # termination criteria are met within the next half tree.
181 | 
182 |             height += 1
183 |             trajectory_terminated \
184 |                 = tree.u_turn_detected or tree.instability_detected or (height >= max_height)
185 |             maxed_before_u_turn \
186 |                 = height >= max_height and (not tree.u_turn_detected)
187 | 
188 |         return tree, height, doubling_rejected, maxed_before_u_turn
189 | 
190 | 
191 | class _TrajectoryTree():
192 |     """
193 |     Collection of (a subset of) states along the simulated Hamiltonian dynamics
194 |     trajcetory endowed with a binary tree structure.
195 |     """
196 | 
197 |     def __init__(
198 |             self, dynamics, f, dt, q, p, logp, grad, joint_logp,
199 |             init_joint_logp, joint_logp_threshold, hamiltonian_error_tol=100.,
200 |             u_turn_criterion='momentum'):
201 | 
202 |         self.dynamics = dynamics
203 |         self.f = f
204 |         self.dt = dt
205 |         self.joint_logp_threshold = joint_logp_threshold
206 |         self.front_state = (q, p, grad)
207 |         self.rear_state = (q, p, grad)
208 |         self.sample = (q, logp, grad)
209 |         self.u_turn_detected = False
210 |         self.min_hamiltonian = - joint_logp
211 |         self.max_hamiltonian = - joint_logp
212 |         self.hamiltonian_error_tol = hamiltonian_error_tol
213 |         self.n_acceptable_state = int(joint_logp > joint_logp_threshold)
214 |         self.n_integration_step = 0
215 |         self.init_joint_logp = init_joint_logp
216 |         self.height = 0
217 |         self.ave_hamiltonian_error = abs(init_joint_logp - joint_logp)
218 |         self.ave_accept_prob = min(1, math.exp(joint_logp - init_joint_logp))
219 |         self.velocity_based_u_turn = (u_turn_criterion == 'velocity')
220 | 
221 |     @property
222 |     def n_node(self):
223 |         return 2 ** self.height
224 | 
225 |     @property
226 |     def instability_detected(self):
227 |         fluctuation_along_trajectory = self.max_hamiltonian - self.min_hamiltonian
228 |         return fluctuation_along_trajectory > self.hamiltonian_error_tol
229 | 
230 |     def double_trajectory(self, height, direction):
231 |         next_tree = self._build_next_tree(
232 |             *self._get_states(direction), height, direction
233 |         )
234 |         no_transition_to_next_tree_attempted \
235 |             = self._merge_next_tree(next_tree, direction, sampling_method='swap')
236 |         return no_transition_to_next_tree_attempted
237 | 
238 |     def _build_next_tree(self, q, p, grad, height, direction):
239 | 
240 |         if height == 0:
241 |             return self._build_next_singleton_tree(q, p, grad, direction)
242 | 
243 |         subtree = self._build_next_tree(q, p, grad, height - 1, direction)
244 |         trajectory_terminated_within_subtree \
245 |             = subtree.u_turn_detected or subtree.instability_detected
246 |         if not trajectory_terminated_within_subtree:
247 |             next_subtree = self._build_next_tree(
248 |                 *subtree._get_states(direction), height - 1, direction
249 |             )
250 |             subtree._merge_next_tree(next_subtree, direction, sampling_method='uniform')
251 | 
252 |         return subtree
253 | 
254 |     def _build_next_singleton_tree(self, q, p, grad, direction):
255 |         q, p, logp, grad = \
256 |             self.dynamics.integrate(self.f, direction * self.dt, q, p, grad)
257 |         self.n_integration_step += 1
258 |         if math.isinf(logp):
259 |             joint_logp = - float('inf')
260 |         else:
261 |             joint_logp = - self.dynamics.compute_hamiltonian(logp, p)
262 |         return self._clone_tree(q, p, logp, grad, joint_logp)
263 | 
264 |     def _clone_tree(self, q, p, logp, grad, joint_logp):
265 |         """ Construct a tree with shared dynamics and acceptance criteria. """
266 |         return _TrajectoryTree(
267 |             self.dynamics, self.f, self.dt, q, p, logp, grad, joint_logp, self.init_joint_logp,
268 |             self.joint_logp_threshold, self.hamiltonian_error_tol
269 |         )
270 | 
271 |     def _merge_next_tree(self, next_tree, direction, sampling_method):
272 | 
273 |         # Trajectory termination flags from the next tree must be propagated up
274 |         # the call stack, but other states of the tree is updated only if the
275 |         # next tree is accessible from the current tree (i.e. the trajectory
276 |         # did not get terminated within the next tree).
277 | 
278 |         self.u_turn_detected = self.u_turn_detected or next_tree.u_turn_detected
279 |         self.min_hamiltonian = min(self.min_hamiltonian, next_tree.min_hamiltonian)
280 |         self.max_hamiltonian = max(self.max_hamiltonian, next_tree.max_hamiltonian)
281 |         trajectory_terminated_within_next_tree \
282 |             = next_tree.u_turn_detected or next_tree.instability_detected
283 | 
284 |         if not trajectory_terminated_within_next_tree:
285 |             self._update_sample(next_tree, sampling_method)
286 |             self.n_acceptable_state += next_tree.n_acceptable_state
287 |             self._set_states(*next_tree._get_states(direction), direction)
288 |             self.u_turn_detected \
289 |                 = self.u_turn_detected or self._check_u_turn_at_front_and_rear_ends()
290 |             weight = self.n_node / (self.n_node + next_tree.n_node)
291 |             self.ave_hamiltonian_error \
292 |                 = weight * self.ave_hamiltonian_error + (1 - weight) * next_tree.ave_hamiltonian_error
293 |             self.ave_accept_prob \
294 |                 = weight * self.ave_accept_prob + (1 - weight) * next_tree.ave_accept_prob
295 |             self.height += 1
296 | 
297 |         return trajectory_terminated_within_next_tree
298 | 
299 |     def _update_sample(self, next_tree, method):
300 |         """
301 |         Parameters
302 |         ----------
303 |         method: {'uniform', 'swap'}
304 |         """
305 |         if method == 'uniform':
306 |             n_total = self.n_acceptable_state + next_tree.n_acceptable_state
307 |             sampling_weight_on_next_tree \
308 |                 = next_tree.n_acceptable_state / max(1, n_total)
309 |         elif method == 'swap':
310 |             sampling_weight_on_next_tree \
311 |                 = next_tree.n_acceptable_state / self.n_acceptable_state
312 |         if np.random.uniform() < sampling_weight_on_next_tree:
313 |             self.sample = next_tree.sample
314 | 
315 |     def _check_u_turn_at_front_and_rear_ends(self):
316 |         q_front, p_front, _ = self._get_states(1)
317 |         q_rear, p_rear, _ = self._get_states(-1)
318 |         dq = q_front - q_rear
319 |         if self.velocity_based_u_turn:
320 |             v_front = self.dynamics.convert_to_velocity(p_front)
321 |             v_rear = self.dynamics.convert_to_velocity(p_rear)
322 |             u_turned = (np.dot(dq, v_front) < 0) or (np.dot(dq, v_rear) < 0)
323 |         else:
324 |             u_turned = (np.dot(dq, p_front) < 0) or (np.dot(dq, p_rear) < 0)
325 |         return u_turned
326 | 
327 |     def _set_states(self, q, p, grad, direction):
328 |         if direction > 0:
329 |             self.front_state = (q, p, grad)
330 |         else:
331 |             self.rear_state = (q, p, grad)
332 | 
333 |     def _get_states(self, direction):
334 |         if direction > 0:
335 |             return self.front_state
336 |         else:
337 |             return self.rear_state
338 | 


--------------------------------------------------------------------------------