├── tests ├── __init__.py ├── gpu_tests │ ├── __init__.py │ └── test_gibbs.py ├── design_matrix │ ├── __init__.py │ └── test_sparse_matrix.py ├── regression_tests │ ├── __init__.py │ ├── saved_outputs │ │ ├── cox_hmc_samples.npy │ │ ├── logit_cg_samples.npy │ │ ├── linear_cg_samples.npy │ │ └── logit_cholesky_samples.npy │ └── test_gibb.py ├── helper.py ├── derivative_tester.py ├── test_design_matrix.py ├── manual_tests │ ├── test_stable_distribution.ipynb │ ├── test_global_scale_prior_hyperparam.ipynb │ └── test_stepsize_adapter.ipynb ├── test_likelihood_models.py └── test_prior.py ├── bayesbridge ├── util │ ├── __init__.py │ └── simplify_warnings.py ├── random │ ├── normal │ │ ├── __init__.py │ │ ├── normal.pxd │ │ └── normal.pyx │ ├── uniform │ │ ├── __init__.py │ │ ├── uniform.pxd │ │ └── uniform.pyx │ ├── __init__.py │ ├── polya_gamma │ │ ├── __init__.py │ │ ├── scipy_ndtr.pxd │ │ ├── setup.py │ │ ├── test_polyagamma.ipynb │ │ ├── polya_gamma.pyx │ │ └── scipy_ndtr.c │ ├── tilted_stable │ │ ├── __init__.py │ │ ├── setup.py │ │ ├── test_tilted_stable.ipynb │ │ ├── compare_methods_speed.ipynb │ │ └── tilted_stable.pyx │ └── random.py ├── reg_coef_sampler │ ├── hamiltonian_monte_carlo │ │ ├── __init__.py │ │ ├── util.py │ │ ├── dynamics.py │ │ ├── hmc.py │ │ ├── stepsize_adapter.py │ │ └── nuts.py │ ├── __init__.py │ ├── direct_gaussian_sampler.py │ ├── reg_coef_posterior_summarizer.py │ └── cg_sampler.py ├── __init__.py ├── design_matrix │ ├── __init__.py │ ├── cython_matmal │ │ ├── setup.py │ │ └── binary_matmul.pyx │ ├── mkl_matvec.py │ ├── dense_matrix.py │ ├── abstract_matrix.py │ └── sparse_matrix.py ├── model │ ├── __init__.py │ ├── abstract_model.py │ ├── linear_model.py │ ├── factory.py │ └── logistic_model.py ├── gibbs_util.py └── prior.py ├── docs ├── demo.ipynb ├── requirements.txt ├── index.rst ├── class.rst ├── Makefile └── conf.py ├── util ├── __init__.py └── mcmc_summarizer.py ├── .gitattributes ├── requirements.txt ├── MANIFEST.in ├── pyproject.toml ├── .readthedocs.yml ├── .gitignore ├── .github └── workflows │ └── python-app.yml ├── setup.py ├── README.rst └── simulate_data.py /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bayesbridge/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/demo.ipynb: -------------------------------------------------------------------------------- 1 | ../demo.ipynb -------------------------------------------------------------------------------- /tests/gpu_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/design_matrix/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bayesbridge/random/normal/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bayesbridge/random/uniform/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/regression_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- 1 | from . import mcmc_summarizer -------------------------------------------------------------------------------- /bayesbridge/random/__init__.py: -------------------------------------------------------------------------------- 1 | from .random import BasicRandom -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | bayesbridge_demo.ipynb linguist-documentation 2 | -------------------------------------------------------------------------------- /bayesbridge/reg_coef_sampler/hamiltonian_monte_carlo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx>=3.0 2 | nbsphinx 3 | numpy 4 | scipy 5 | -------------------------------------------------------------------------------- /bayesbridge/random/polya_gamma/__init__.py: -------------------------------------------------------------------------------- 1 | from .polya_gamma import PolyaGammaDist -------------------------------------------------------------------------------- /bayesbridge/random/tilted_stable/__init__.py: -------------------------------------------------------------------------------- 1 | from .tilted_stable import ExpTiltedStableDist -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Cython 2 | numpy>=1.19 3 | scipy 4 | pytest 5 | setuptools 6 | wheel 7 | -------------------------------------------------------------------------------- /bayesbridge/reg_coef_sampler/__init__.py: -------------------------------------------------------------------------------- 1 | from .reg_coef_sampler import SparseRegressionCoefficientSampler -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include bayesbridge/random/polya_gamma/scipy_ndtr.c 2 | global-include *.pyx 3 | global-include *.pxd 4 | -------------------------------------------------------------------------------- /bayesbridge/random/polya_gamma/scipy_ndtr.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "scipy_ndtr.c": 2 | double log_ndtr(double a) 3 | -------------------------------------------------------------------------------- /bayesbridge/random/normal/normal.pxd: -------------------------------------------------------------------------------- 1 | from numpy.random.bit_generator cimport BitGenerator 2 | 3 | cdef double random_normal(BitGenerator) -------------------------------------------------------------------------------- /bayesbridge/random/uniform/uniform.pxd: -------------------------------------------------------------------------------- 1 | from numpy.random.bit_generator cimport BitGenerator 2 | 3 | cdef double random_uniform(BitGenerator) -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel", "numpy", "scipy", "Cython"] 3 | build-backend = "setuptools.build_meta" 4 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | 3 | .. toctree:: 4 | :caption: Documentation 5 | :maxdepth: 1 6 | 7 | demo 8 | class 9 | -------------------------------------------------------------------------------- /tests/regression_tests/saved_outputs/cox_hmc_samples.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/bayes-bridge/HEAD/tests/regression_tests/saved_outputs/cox_hmc_samples.npy -------------------------------------------------------------------------------- /tests/regression_tests/saved_outputs/logit_cg_samples.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/bayes-bridge/HEAD/tests/regression_tests/saved_outputs/logit_cg_samples.npy -------------------------------------------------------------------------------- /tests/regression_tests/saved_outputs/linear_cg_samples.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/bayes-bridge/HEAD/tests/regression_tests/saved_outputs/linear_cg_samples.npy -------------------------------------------------------------------------------- /tests/regression_tests/saved_outputs/logit_cholesky_samples.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/bayes-bridge/HEAD/tests/regression_tests/saved_outputs/logit_cholesky_samples.npy -------------------------------------------------------------------------------- /bayesbridge/__init__.py: -------------------------------------------------------------------------------- 1 | from .bayesbridge import BayesBridge 2 | from .gibbs_util import SamplerOptions 3 | from .prior import RegressionCoefPrior 4 | from .model import RegressionModel -------------------------------------------------------------------------------- /bayesbridge/design_matrix/__init__.py: -------------------------------------------------------------------------------- 1 | from .sparse_matrix import SparseDesignMatrix 2 | from .dense_matrix import DenseDesignMatrix 3 | from .abstract_matrix import AbstractDesignMatrix -------------------------------------------------------------------------------- /bayesbridge/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .factory import RegressionModel 2 | from .linear_model import LinearModel 3 | from .logistic_model import LogisticModel 4 | from .cox_model import CoxModel -------------------------------------------------------------------------------- /bayesbridge/util/simplify_warnings.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | 4 | def simplified_format( 5 | message, category, filename, lineno, line=None): 6 | to_print = '{:s}:{:d}: {:s}: {:s}\n'.format( 7 | filename, lineno, category.__name__, str(message) 8 | ) 9 | return to_print 10 | 11 | warnings.formatwarning = simplified_format 12 | -------------------------------------------------------------------------------- /bayesbridge/random/polya_gamma/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | from Cython.Build import cythonize 3 | import numpy as np 4 | 5 | ext_modules = [ 6 | Extension( 7 | "polya_gamma", 8 | ["polya_gamma.pyx"], 9 | include_dirs=[np.get_include()] 10 | ) 11 | ] 12 | 13 | setup( 14 | ext_modules = cythonize(ext_modules) 15 | ) 16 | -------------------------------------------------------------------------------- /bayesbridge/random/tilted_stable/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy as np 5 | 6 | ext_modules = [ 7 | Extension( 8 | "tilted_stable", 9 | ["tilted_stable.pyx"], 10 | include_dirs=[np.get_include()] 11 | ) 12 | ] 13 | 14 | setup( 15 | ext_modules = cythonize(ext_modules) 16 | ) 17 | -------------------------------------------------------------------------------- /docs/class.rst: -------------------------------------------------------------------------------- 1 | Main Class and Method 2 | ===================== 3 | 4 | BayesBridge and Gibbs Sampler 5 | ----------------------------- 6 | .. automodule:: bayesbridge 7 | 8 | .. autoclass:: BayesBridge(model, prior) 9 | :members: gibbs 10 | 11 | Model and Prior 12 | --------------- 13 | .. automodule:: bayesbridge 14 | 15 | .. autofunction:: RegressionModel 16 | 17 | .. autoclass:: RegressionCoefPrior 18 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Build documentation in the docs/ directory with Sphinx 8 | sphinx: 9 | configuration: docs/conf.py 10 | 11 | # Optionally set the version of Python and requirements required to build your docs 12 | python: 13 | version: 3 14 | install: 15 | - requirements: docs/requirements.txt 16 | -------------------------------------------------------------------------------- /bayesbridge/reg_coef_sampler/hamiltonian_monte_carlo/util.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | 4 | def simplified_format( 5 | message, category, filename, lineno, line=None): 6 | to_print = '{:s}:{:d}: {:s}: {:s}\n'.format( 7 | filename, lineno, category.__name__, str(message) 8 | ) 9 | return to_print 10 | 11 | warnings.formatwarning = simplified_format 12 | 13 | def warn_message_only(message, category=UserWarning): 14 | warnings.warn(message, category, stacklevel=2) 15 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /bayesbridge/design_matrix/cython_matmal/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | from Cython.Build import cythonize 3 | import subprocess 4 | import os 5 | import numpy as np 6 | 7 | # Hack to include the numpy header file. 8 | cmd = 'export CFLAGS="-I ' + np.get_include() + ' $CFLAGS"' 9 | subprocess.run(cmd, shell=True, check=True) 10 | os.environ["CC"] = "clang++ -Xpreprocessor -fopenmp -lomp" # "gcc-6 -fopenmp" 11 | 12 | ext_modules = [ 13 | Extension( 14 | "binary_matmul", 15 | ["binary_matmul.pyx"], 16 | # extra_compile_args=['-Xpreprocessor -fopenmp -lomp'], 17 | # extra_link_args=['-Xpreprocessor -fopenmp -lomp'], 18 | ) 19 | ] 20 | 21 | setup( 22 | ext_modules = cythonize(ext_modules) 23 | ) 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/__pycache__ 2 | **/.DS_store 3 | **/.pytest_cache 4 | **/.ipynb_checkpoints 5 | 6 | bayesbridge/reg_coef_sampler/hamiltonian_monte_carlo/tests 7 | bayesbridge/random/polya_gamma/polya_gamma.c 8 | bayesbridge/random/polya_gamma/polya_gamma.*.so 9 | bayesbridge/random/polya_gamma/polya_gamma.html 10 | bayesbridge/random/tilted_stable/tilted_stable.c 11 | bayesbridge/random/tilted_stable/tilted_stable.*.so 12 | bayesbridge/random/tilted_stable/tilted_stable.html 13 | bayesbridge/random/normal/normal.c 14 | bayesbridge/random/normal/normal.*.so 15 | bayesbridge/random/normal/normal.html 16 | bayesbridge/random/uniform/uniform.c 17 | bayesbridge/random/uniform/uniform.*.so 18 | bayesbridge/random/uniform/uniform.html 19 | 20 | # C extensions 21 | *.so 22 | 23 | # Distribution / packaging 24 | **/build 25 | .eggs/ 26 | .idea/ 27 | env/ 28 | dist/ 29 | bayesbridge.egg-info/ 30 | 31 | # Environments 32 | venv/ 33 | 34 | # Sphinx documentation 35 | docs/_build/ -------------------------------------------------------------------------------- /bayesbridge/random/normal/normal.pyx: -------------------------------------------------------------------------------- 1 | from cpython.pycapsule cimport PyCapsule_GetPointer 2 | from numpy.random cimport bitgen_t 3 | from numpy.random.bit_generator cimport BitGenerator, bitgen_t 4 | from numpy.random.c_distributions cimport random_standard_normal 5 | 6 | 7 | cdef double random_normal(BitGenerator bit_generator): 8 | """ 9 | Generate a random value from a standard normal distribution. 10 | 11 | Parameters 12 | ---------- 13 | bit_generator : BitGenerator 14 | Numpy BitGenerator object. This object is *not* locked during generation since the 15 | sampling runs on a single thread and performance is much better without locking/releasing. 16 | 17 | Returns 18 | ------- 19 | double 20 | Random number. 21 | """ 22 | cdef bitgen_t *rng 23 | cdef const char *capsule_name = "BitGenerator" 24 | capsule = bit_generator.capsule 25 | rng = PyCapsule_GetPointer(capsule, capsule_name) 26 | return random_standard_normal(rng) 27 | -------------------------------------------------------------------------------- /bayesbridge/random/uniform/uniform.pyx: -------------------------------------------------------------------------------- 1 | from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer 2 | from numpy.random cimport bitgen_t 3 | from numpy.random.bit_generator cimport BitGenerator, bitgen_t 4 | from numpy.random.c_distributions cimport random_standard_uniform 5 | 6 | 7 | cdef double random_uniform(BitGenerator bit_generator): 8 | """ 9 | Generate a random value from a uniform(0,1) distribution. 10 | 11 | Parameters 12 | ---------- 13 | bit_generator : BitGenerator 14 | Numpy BitGenerator object. This object is *not* locked during generation since the 15 | sampling runs on a single thread and performance is much better without locking/releasing. 16 | 17 | Returns 18 | ------- 19 | double 20 | Random number. 21 | """ 22 | cdef bitgen_t *rng 23 | cdef const char *capsule_name = "BitGenerator" 24 | capsule = bit_generator.capsule 25 | rng = PyCapsule_GetPointer(capsule, capsule_name) 26 | return random_standard_uniform(rng) 27 | -------------------------------------------------------------------------------- /bayesbridge/model/abstract_model.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | 4 | class AbstractModel(): 5 | 6 | @property 7 | def n_obs(self): 8 | return self.design.shape[0] 9 | 10 | @property 11 | def n_pred(self): 12 | return self.design.shape[1] 13 | 14 | @property 15 | def intercept_added(self): 16 | return self.design.intercept_added 17 | 18 | @abc.abstractmethod 19 | def compute_loglik_and_gradient(self, beta, loglik_only=False): 20 | pass 21 | 22 | @abc.abstractmethod 23 | def compute_hessian(self, beta): 24 | pass 25 | 26 | @abc.abstractmethod 27 | def get_hessian_matvec_operator(self, beta): 28 | pass 29 | 30 | @abc.abstractmethod 31 | def get_hessian_matvec_operator(self, beta): 32 | pass 33 | 34 | @abc.abstractmethod 35 | def calc_intercept_mle(self): 36 | """ Calculate MLE for intercept assuming other coefficients are zero. """ 37 | pass 38 | 39 | @staticmethod 40 | @abc.abstractmethod 41 | def simulate_outcome(): 42 | pass -------------------------------------------------------------------------------- /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies and run tests with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python application 5 | 6 | on: 7 | push: 8 | branches: [ master, test ] 9 | pull_request: 10 | branches: [ master, test ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | matrix: 18 | os: [ubuntu-latest, macos-latest, windows-latest] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python 3.12 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: 3.12 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install -r requirements.txt 30 | - name: Cythonize and install package 31 | run: | 32 | python setup.py build_ext --inplace 33 | python setup.py install 34 | - name: Test with pytest 35 | run: | 36 | pytest tests 37 | -------------------------------------------------------------------------------- /bayesbridge/design_matrix/cython_matmal/binary_matmul.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | import cython 4 | cimport cython 5 | from cython.parallel cimport prange 6 | 7 | ctypedef np.int32_t INT_t 8 | ctypedef np.float_t FLOAT_t 9 | FLOAT = np.float64 10 | 11 | def binary_matmul(X_csr, v): 12 | return c_binary_matmul_parallel(X_csr.indices, X_csr.indptr, v) 13 | 14 | @cython.boundscheck(False) 15 | @cython.wraparound(False) 16 | cdef c_binary_matmul(np.ndarray[INT_t, ndim=1] indices, np.ndarray[INT_t, ndim=1] indptr, np.ndarray[FLOAT_t, ndim=1] v): 17 | cdef int i, k 18 | cdef int m = indptr.shape[0] - 1 19 | cdef FLOAT_t val 20 | cdef np.ndarray[FLOAT_t, ndim=1] Xv = np.zeros(m, dtype=FLOAT) 21 | for i in range(m): 22 | val = 0 23 | for k in range(indptr[i], indptr[i + 1]): 24 | val += v[indices[k]] 25 | Xv[i] = val 26 | return Xv 27 | 28 | @cython.boundscheck(False) 29 | @cython.wraparound(False) 30 | cdef c_binary_matmul_parallel(np.ndarray[INT_t, ndim=1] indices, np.ndarray[INT_t, ndim=1] indptr, np.ndarray[FLOAT_t, ndim=1] v): 31 | cdef int i, k 32 | cdef int m = indptr.shape[0] - 1 33 | cdef FLOAT_t val 34 | cdef np.ndarray[FLOAT_t, ndim=1] Xv = np.zeros(m, dtype=FLOAT) 35 | for i in prange(m, nogil=True): 36 | for k in range(indptr[i], indptr[i + 1]): 37 | Xv[i] += v[indices[k]] 38 | return Xv 39 | -------------------------------------------------------------------------------- /bayesbridge/model/linear_model.py: -------------------------------------------------------------------------------- 1 | from .abstract_model import AbstractModel 2 | import math 3 | import numpy as np 4 | 5 | 6 | class LinearModel(AbstractModel): 7 | 8 | def __init__(self, y, design): 9 | self.y = y 10 | self.design = design 11 | self.name = 'linear' 12 | 13 | def compute_loglik_and_gradient(self, beta, obs_prec, loglik_only=False): 14 | X_beta = self.design.dot(beta) 15 | loglik = ( 16 | len(self.y) * math.log(obs_prec) / 2 17 | - obs_prec * np.sum((self.y - X_beta) ** 2) / 2 18 | ) 19 | if loglik_only: 20 | grad = None 21 | else: 22 | grad = obs_prec * self.design.Tdot(self.y - X_beta) 23 | return loglik, grad 24 | 25 | def compute_hessian(self, beta): 26 | pass 27 | 28 | def get_hessian_matvec_operator(self, beta, obs_prec): 29 | hessian_op = lambda v: - obs_prec * self.design.Tdot(self.design.dot(v)) 30 | return hessian_op 31 | 32 | def calc_intercept_mle(self): 33 | return self.y.mean() 34 | 35 | @staticmethod 36 | def simulate_outcome(X, beta, noise_sd, seed=None): 37 | """ 38 | Parameters 39 | ---------- 40 | X : DesignMatrix, numpy/scipy matrix 41 | Only needs to support the `dot()` operation 42 | """ 43 | if seed is not None: 44 | np.random.seed(seed) 45 | y = X.dot(beta) + noise_sd * np.random.randn(X.shape[0]) 46 | return y -------------------------------------------------------------------------------- /bayesbridge/random/random.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .polya_gamma import PolyaGammaDist 3 | from .tilted_stable import ExpTiltedStableDist 4 | 5 | class BasicRandom(): 6 | """ 7 | Generators of random variables from the basic distributions used in 8 | Bayesian sparse regression. 9 | """ 10 | 11 | def __init__(self, seed=None): 12 | self.np_random = np.random 13 | self.pg = PolyaGammaDist() 14 | self.ts = ExpTiltedStableDist() 15 | self.set_seed(seed) 16 | 17 | def set_seed(self, seed): 18 | self.np_random.seed(seed) 19 | pg_seed = np.random.randint(1, 1 + np.iinfo(np.int32).max) 20 | ts_seed = np.random.randint(1, 1 + np.iinfo(np.int32).max) 21 | self.pg.set_seed(pg_seed) 22 | self.ts.set_seed(ts_seed) 23 | 24 | def get_state(self): 25 | rand_gen_state = { 26 | 'numpy' : self.np_random.get_state(), 27 | 'tilted_stable' : self.ts.get_state(), 28 | 'polya_gamma' : self.pg.get_state() 29 | } 30 | return rand_gen_state 31 | 32 | def set_state(self, rand_gen_state): 33 | self.np_random.set_state(rand_gen_state['numpy']) 34 | self.ts.set_state(rand_gen_state['tilted_stable']) 35 | self.pg.set_state(rand_gen_state['polya_gamma']) 36 | 37 | def polya_gamma(self, shape, tilt): 38 | return self.pg.rand_polyagamma(shape, tilt) 39 | 40 | def tilted_stable(self, char_exponent, tilt): 41 | return self.ts.sample(char_exponent, tilt) 42 | -------------------------------------------------------------------------------- /util/mcmc_summarizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from matplotlib.pylab import MaxNLocator 4 | 5 | 6 | def plot_conf_interval( 7 | coef_samples, conf_level=.95, n_coef_to_plot=None, 8 | sort_by_median_val=False, marker_scale=1.0, 9 | ): 10 | tail_prob = (1 - conf_level) / 2 11 | lower, median, upper = [ 12 | np.quantile(coef_samples, q, axis=-1) 13 | for q in [tail_prob, .5, 1 - tail_prob] 14 | ] 15 | 16 | if sort_by_median_val: 17 | sort_ind = np.argsort(median) 18 | else: 19 | sort_ind = np.arange(len(median)) # No sorting 20 | 21 | if n_coef_to_plot is None: 22 | n_coef_to_plot = len(median) 23 | coef_index = sort_ind[:n_coef_to_plot] 24 | 25 | plt.plot( 26 | coef_index, median[coef_index], 27 | 'x', color='tab:blue', ms=marker_scale * 10, 28 | label='Posterior median' 29 | ) 30 | plt.plot( 31 | coef_index, lower[coef_index], 32 | '_', color='tab:green', ms=marker_scale * 12, lw=marker_scale * 1.2, 33 | label='{:.1f}% credible interval'.format(100 * conf_level) 34 | ) 35 | plt.plot( 36 | coef_index, upper[coef_index], 37 | '_', color='tab:green', ms=marker_scale * 12, lw=marker_scale * 1.2 38 | ) 39 | plt.gca().get_xaxis().set_major_locator(MaxNLocator(integer=True)) 40 | 41 | plotted_quantity = { 42 | 'lower': lower[coef_index], 43 | 'median': median[coef_index], 44 | 'upper': upper[coef_index], 45 | 'coef_index': coef_index 46 | } 47 | return plotted_quantity -------------------------------------------------------------------------------- /tests/helper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | from bayesbridge.model import LinearModel, LogisticModel, CoxModel 4 | from bayesbridge.design_matrix import SparseDesignMatrix, DenseDesignMatrix 5 | from simulate_data import simulate_design 6 | 7 | 8 | def simulate_data(model, n_obs=100, n_pred=50, seed=None, 9 | return_design_mat=False): 10 | if seed is not None: 11 | np.random.seed(seed) 12 | 13 | X = simulate_design(n_obs, n_pred, binary_frac=.9) 14 | 15 | beta = np.random.randn(n_pred) 16 | n_trial = None 17 | if model == 'linear': 18 | y = LinearModel.simulate_outcome(X, beta, noise_sd=1.) 19 | elif model == 'logit': 20 | n_trial = 1 + np.random.binomial(np.arange(n_obs) + 1, .5) 21 | n_success = LogisticModel.simulate_outcome(n_trial, X, beta) 22 | y = (n_success, n_trial) 23 | elif model == 'cox': 24 | event_time, censoring_time = CoxModel.simulate_outcome(X, beta) 25 | event_time, censoring_time, X = \ 26 | CoxModel._permute_observations_by_event_and_censoring_time( 27 | event_time, censoring_time, X) 28 | event_time, censoring_time, X = \ 29 | CoxModel._drop_uninformative_observations(event_time, 30 | censoring_time, X) 31 | y = (event_time, censoring_time) 32 | else: 33 | raise NotImplementedError() 34 | 35 | if return_design_mat: 36 | if sp.sparse.issparse(X): 37 | X = SparseDesignMatrix(X, add_intercept=False) 38 | else: 39 | X = DenseDesignMatrix(X, add_intercept=False) 40 | 41 | return y, X, beta -------------------------------------------------------------------------------- /bayesbridge/reg_coef_sampler/direct_gaussian_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | 4 | def generate_gaussian_with_weight(design, obs_prec, prior_prec_sqrt, z, rand_gen=None): 5 | """ 6 | Generate a multi-variate Gaussian with covariance Sigma 7 | Sigma^{-1} = X diag(obs_prec) X + diag(prior_prec_sqrt) ** 2 8 | and mean = Sigma z, where X is the `design` matrix. 9 | 10 | Parameters 11 | ---------- 12 | obs_prec : 1-d numpy array 13 | prior_prec_sqrt : 1-d numpy array 14 | """ 15 | 16 | diag = prior_prec_sqrt ** 2 \ 17 | + design.compute_fisher_info(weight=obs_prec, diag_only=True) 18 | jacobi_precond_scale = 1 / np.sqrt(diag) 19 | Prec_precond = compute_precond_post_prec( 20 | design, obs_prec, prior_prec_sqrt, jacobi_precond_scale 21 | ) 22 | Prec_precond_chol = sp.linalg.cholesky(Prec_precond, jacobi_precond_scale) 23 | mean_precond = sp.linalg.cho_solve( 24 | (Prec_precond_chol, False), jacobi_precond_scale * z 25 | ) 26 | if rand_gen is None: 27 | gaussian_vec = np.random.randn(design.shape[1]) 28 | else: 29 | gaussian_vec = rand_gen.np_random.randn(design.shape[1]) 30 | sample_precond = mean_precond 31 | sample_precond += sp.linalg.solve_triangular( 32 | Prec_precond_chol, gaussian_vec, lower=False 33 | ) 34 | sample = jacobi_precond_scale * sample_precond 35 | 36 | return sample 37 | 38 | def compute_precond_post_prec(design, obs_prec, prior_prec_sqrt, precond_scale): 39 | Prec_precond = \ 40 | precond_scale[:, np.newaxis] \ 41 | * design.compute_fisher_info(obs_prec) \ 42 | * precond_scale[np.newaxis, :] 43 | Prec_precond += np.diag((precond_scale * prior_prec_sqrt) ** 2) 44 | return Prec_precond 45 | -------------------------------------------------------------------------------- /tests/design_matrix/test_sparse_matrix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import scipy as sp 4 | 5 | from bayesbridge.design_matrix import SparseDesignMatrix 6 | 7 | 8 | @pytest.fixture() 9 | def X(): 10 | np.random.seed(0) 11 | X = np.random.normal(size=(2, 4)) 12 | return X 13 | 14 | 15 | @pytest.fixture() 16 | def X_sp(X): 17 | return sp.sparse.csr_matrix(X) 18 | 19 | 20 | @pytest.fixture() 21 | def weight(X): 22 | np.random.seed(0) 23 | weight = np.random.exponential(size=X.shape[1] + 1) 24 | return weight 25 | 26 | 27 | def test_compute_transposed_fisher_info(X, X_sp, weight): 28 | design = SparseDesignMatrix( 29 | X_sp, center_predictor=False, add_intercept=True 30 | ) 31 | assert np.allclose( 32 | design.compute_transposed_fisher_info(weight[1:] , include_intrcpt=False), 33 | X @ np.diag(weight[1:]) @ X.T 34 | ) 35 | intrcpt_column = np.ones((X.shape[0], 1)) 36 | X_with_intrcpt = np.hstack((intrcpt_column, X)) 37 | assert np.allclose( 38 | design.compute_transposed_fisher_info(weight, include_intrcpt=True), 39 | X_with_intrcpt @ np.diag(weight) @ X_with_intrcpt.T 40 | ) 41 | 42 | 43 | def test_compute_transposed_fisher_info_centered(X, X_sp, weight): 44 | design = SparseDesignMatrix( 45 | X_sp, center_predictor=True, add_intercept=True 46 | ) 47 | X_centered = X - X.mean(0) 48 | assert np.allclose( 49 | design.compute_transposed_fisher_info(weight[1:], include_intrcpt=False), 50 | X_centered @ np.diag(weight[1:]) @ X_centered.T 51 | ) 52 | intrcpt_column = np.ones((X.shape[0], 1)) 53 | X_centered_with_intrcpt = np.hstack((intrcpt_column, X_centered)) 54 | assert np.allclose( 55 | design.compute_transposed_fisher_info(weight, include_intrcpt=True), 56 | X_centered_with_intrcpt @ np.diag(weight) @ X_centered_with_intrcpt.T 57 | ) 58 | -------------------------------------------------------------------------------- /bayesbridge/design_matrix/mkl_matvec.py: -------------------------------------------------------------------------------- 1 | import platform 2 | import numpy as np 3 | import scipy as sp 4 | import scipy.sparse 5 | import ctypes 6 | from ctypes import POINTER, c_int, c_char, c_char_p, c_double, byref 7 | 8 | try: 9 | if platform.system() == 'Windows': 10 | mkl = ctypes.windll.LoadLibrary("mkl_rt.dll") 11 | else: 12 | mkl = ctypes.cdll.LoadLibrary("libmkl_rt.dylib") 13 | except: 14 | raise ImportError("Could not load Intel MKL Library.") 15 | 16 | 17 | def mkl_csr_matvec(A, x, transpose=False): 18 | """ 19 | Parameters 20 | ---------- 21 | A : scipy.sparse csr matrix 22 | x : numpy 1d array 23 | """ 24 | 25 | if not sp.sparse.isspmatrix_csr(A): 26 | raise TypeError("The matrix must be a scipy sparse CSR matrix.") 27 | 28 | if x.ndim != 1: 29 | raise TypeError("The vector to be multiplied must be a 1d array.") 30 | 31 | if x.dtype.type is not np.double: 32 | x = x.astype(np.double, copy=True) 33 | 34 | # Allocate the result of the matrix-vector multiplication. 35 | result = np.empty(A.shape[transpose]) 36 | 37 | # Set the parameters for simply computing A.dot(x) for a general matrix A. 38 | alpha = byref(c_double(1.0)) 39 | beta = byref(c_double(0.0)) 40 | matrix_description = c_char_p(bytes('G C ', 'utf-8')) 41 | 42 | # Get pointers to the numpy arrays. 43 | data_ptr = A.data.ctypes.data_as(POINTER(c_double)) 44 | indices_ptr = A.indices.ctypes.data_as(POINTER(c_int)) 45 | indptr_begin = A.indptr[:-1].ctypes.data_as(POINTER(c_int)) 46 | indptr_end = A.indptr[1:].ctypes.data_as(POINTER(c_int)) 47 | x_ptr = x.ctypes.data_as(POINTER(c_double)) 48 | result_ptr = result.ctypes.data_as(POINTER(c_double)) 49 | 50 | transpose_flag = byref(c_char(bytes(['n', 't'][transpose], 'utf-8'))) 51 | n_row, n_col = [byref(c_int(size)) for size in A.shape] 52 | mkl.mkl_dcsrmv( 53 | transpose_flag, n_row, n_col, alpha, matrix_description, 54 | data_ptr, indices_ptr, indptr_begin, indptr_end, x_ptr, beta, result_ptr 55 | ) 56 | return result 57 | -------------------------------------------------------------------------------- /bayesbridge/design_matrix/dense_matrix.py: -------------------------------------------------------------------------------- 1 | from warnings import warn 2 | 3 | import numpy as np 4 | from .abstract_matrix import AbstractDesignMatrix 5 | 6 | 7 | class DenseDesignMatrix(AbstractDesignMatrix): 8 | 9 | def __init__(self, X, center_predictor=False, add_intercept=True, 10 | copy_array=False): 11 | """ 12 | Params: 13 | ------ 14 | X : numpy array 15 | """ 16 | self.use_cupy = False 17 | if copy_array: 18 | X = X.copy() 19 | super().__init__() 20 | X = self.remove_intercept_indicator(X) 21 | if center_predictor: 22 | X -= np.mean(X, axis=0)[np.newaxis, :] 23 | if add_intercept: 24 | X = np.hstack((np.ones((X.shape[0], 1)), X)) 25 | self.X = X 26 | self.intercept_added = add_intercept 27 | self.centered = center_predictor 28 | 29 | @property 30 | def shape(self): 31 | return self.X.shape 32 | 33 | @property 34 | def is_sparse(self): 35 | return False 36 | 37 | def dot(self, v): 38 | 39 | if self.memoized and np.all(self.v_prev == v): 40 | return self.X_dot_v 41 | 42 | result = self.X.dot(v) 43 | if self.memoized: 44 | self.X_dot_v = result 45 | self.v_prev = v 46 | self.dot_count += 1 47 | 48 | return result 49 | 50 | def Tdot(self, v): 51 | self.Tdot_count += 1 52 | return self.X.T.dot(v) 53 | 54 | def compute_fisher_info(self, weight, diag_only=False): 55 | if diag_only: 56 | return np.sum(weight[:, np.newaxis] * self.X ** 2, 0) 57 | else: 58 | return self.X.T.dot(weight[:, np.newaxis] * self.X) 59 | 60 | def compute_transposed_fisher_info(self, weight): 61 | # TODO: Implement. 62 | # Note: with current implementation of the class, `self.X` explicitly includes 63 | # the intercept when `self.intercept_added == True`. 64 | pass 65 | 66 | def toarray(self): 67 | return self.X 68 | 69 | def extract_matrix(self, order=None): 70 | return self.X 71 | -------------------------------------------------------------------------------- /tests/derivative_tester.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def numerical_grad_is_close( 4 | f, x, atol=10E-6, rtol=10E-6, dx=10E-6, 5 | subset_index=None, return_grad=False): 6 | """ 7 | Compare the computed gradient to a centered finite difference approximation. 8 | 9 | Params: 10 | ------- 11 | f : callable 12 | Returns a value of a function and its gradient 13 | """ 14 | if subset_index is None: 15 | subset_index = np.arange(len(x)) 16 | 17 | x = np.array(x, ndmin=1) 18 | grad_est = np.zeros(len(subset_index)) 19 | for i in range(len(subset_index)): 20 | x_minus = x.copy() 21 | x_minus[subset_index[i]] -= dx 22 | x_plus = x.copy() 23 | x_plus[subset_index[i]] += dx 24 | f_minus, _ = f(x_minus) 25 | f_plus, _ = f(x_plus) 26 | grad_est[i] = (f_plus - f_minus) / (2 * dx) 27 | 28 | _, grad = f(x) 29 | is_close = np.allclose(grad[subset_index], grad_est, atol=atol, rtol=rtol) 30 | 31 | if return_grad: 32 | return is_close, grad[subset_index], grad_est 33 | else: 34 | return is_close 35 | 36 | 37 | def numerical_direc_deriv_is_close( 38 | f, x, hess_matvec, n_direction=10, 39 | atol=10E-6, rtol=10E-6, dx=10E-6, seed=None): 40 | """ 41 | Compare analytically computed directional derivatives of the gradient of 'f' 42 | (i.e. the Hessian of 'f' applied to vectors) to its numerical approximations. 43 | 44 | Params: 45 | ------- 46 | f : callable 47 | Returns a value of a function and its gradient 48 | """ 49 | 50 | x = np.array(x, ndmin=1) 51 | 52 | np.random.seed(seed) 53 | all_matched = True 54 | 55 | for i in range(n_direction): 56 | 57 | v = np.random.randn(len(x)) 58 | v /= np.sqrt(np.sum(v ** 2)) 59 | _, grad_minus = f(x - dx * v) 60 | _, grad_plus = f(x + dx * v) 61 | direc_deriv_est = (grad_plus - grad_minus) / (2 * dx) 62 | direc_deriv = hess_matvec(v) 63 | 64 | if not np.allclose(direc_deriv, direc_deriv_est, atol=atol, rtol=rtol): 65 | all_matched = False 66 | break 67 | 68 | return all_matched 69 | -------------------------------------------------------------------------------- /tests/gpu_tests/test_gibbs.py: -------------------------------------------------------------------------------- 1 | """Tests for GPU functionality. Depends on cupy being installed, so not run on CI.""" 2 | from bayesbridge import BayesBridge, RegressionModel, RegressionCoefPrior 3 | import numpy as np 4 | import pytest 5 | pytest.importorskip("cupy") # Skip all the test in the module if cupy not found 6 | import cupy as cp 7 | 8 | from ..helper import simulate_data 9 | 10 | 11 | @pytest.fixture 12 | def bridge_gpu(): 13 | y, X, beta = simulate_data(model='logit', seed=1) 14 | return BayesBridge( 15 | RegressionModel(y, cp.sparse.csr_matrix(X), 'logit'), 16 | RegressionCoefPrior()) 17 | 18 | 19 | @pytest.fixture 20 | def bridge_cpu(): 21 | y, X, beta = simulate_data(model='logit', seed=1) 22 | return BayesBridge( 23 | RegressionModel(y, X, 'logit'), 24 | RegressionCoefPrior() 25 | ) 26 | 27 | 28 | def test_use_cupy(bridge_cpu, bridge_gpu): 29 | """Test use_cupy attribute is set appropriately.""" 30 | assert bridge_gpu.model.design.use_cupy 31 | assert not bridge_cpu.model.design.use_cupy 32 | 33 | 34 | def test_similar_output(bridge_cpu, bridge_gpu): 35 | """Test that the CPU and GPU results are the same.""" 36 | iters = 10 37 | seed = 1 38 | sampler = 'cg' 39 | init = {'coef': np.ones(bridge_gpu.model.n_pred)} 40 | samples_cpu, mcmc_info_cpu = bridge_cpu.gibbs( 41 | n_iter=iters, coef_sampler_type=sampler, init=init, seed=seed) 42 | samples_gpu, mcmc_info_gpu = bridge_gpu.gibbs( 43 | n_iter=iters, coef_sampler_type=sampler, init=init, seed=seed) 44 | assert np.allclose(samples_gpu['coef'], samples_cpu['coef'], atol=1e-5) 45 | 46 | 47 | def test_preferred_sampler(bridge_gpu): 48 | """Test default sampler for cupy matrices is 'cg'.""" 49 | samples_gpu, mcmc_info_gpu = bridge_gpu.gibbs(n_iter=1) 50 | assert mcmc_info_gpu['options']['coef_sampler_type'] == 'cg' 51 | 52 | 53 | def test_unsupported_sampler(bridge_gpu): 54 | """Test non-'cg' samplers raise errors.""" 55 | with pytest.raises(ValueError): 56 | bridge_gpu.gibbs(n_iter=1, coef_sampler_type='cholesky') 57 | with pytest.raises(ValueError): 58 | bridge_gpu.gibbs(n_iter=1, coef_sampler_type='hmc') 59 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Cython.Build import cythonize 3 | from os.path import dirname, join, abspath 4 | from setuptools import setup, find_packages 5 | from setuptools.command.build_ext import build_ext 6 | from setuptools.extension import Extension 7 | 8 | path = dirname(__file__) 9 | src_dir = join(dirname(path), '..', 'src') 10 | defs = [('NPY_NO_DEPRECATED_API', 0)] 11 | inc_path = np.get_include() 12 | lib_path = [abspath(join(np.get_include(), '..', '..', 'random', 'lib'))] 13 | np_libs = ['npyrandom'] 14 | 15 | class CustomBuildExtCommand(build_ext): 16 | """ build_ext command when numpy headers are needed. """ 17 | def run(self): 18 | # Import numpy here, only when headers are needed 19 | import numpy as np 20 | self.include_dirs.append(np.get_include()) 21 | build_ext.run(self) 22 | 23 | 24 | ext_modules = [ 25 | Extension( 26 | 'bayesbridge.random.tilted_stable.tilted_stable', 27 | sources=['bayesbridge/random/tilted_stable/tilted_stable.pyx'], 28 | ), 29 | Extension( 30 | 'bayesbridge.random.polya_gamma.polya_gamma', 31 | sources=['bayesbridge/random/polya_gamma/polya_gamma.pyx'], 32 | ), 33 | Extension( 34 | 'bayesbridge.random.normal.normal', 35 | sources=['bayesbridge/random/normal/normal.pyx'], 36 | library_dirs=lib_path, 37 | libraries=np_libs, 38 | define_macros=defs, 39 | ), 40 | Extension( 41 | 'bayesbridge.random.uniform.uniform', 42 | sources=['bayesbridge/random/uniform/uniform.pyx'], 43 | library_dirs=lib_path, 44 | libraries=np_libs, 45 | define_macros=defs, 46 | ) 47 | ] 48 | 49 | setup( 50 | name='bayesbridge', 51 | version='0.2.6', 52 | description=\ 53 | 'Generates posterior samples under Bayesian sparse regression based on ' 54 | + 'the bridge prior using the CG-accelerated Gibbs sampler of Nishimura ' 55 | + 'et. al. (2018). The linear and logistic model are currently supported.', 56 | url='https://github.com/aki-nishimura/bayes-bridge', 57 | author='Akihiko (Aki) Nishimura', 58 | author_email='aki.nishimura@jhu.edu', 59 | license='MIT', 60 | packages=find_packages(exclude=['tests', 'tests.*']), 61 | cmdclass={'build_ext': CustomBuildExtCommand}, 62 | ext_modules=cythonize(ext_modules), 63 | setup_requires=['numpy>=1.19'], 64 | extras_require={ 65 | 'gpu': 'cupy>=9.4.0' 66 | }, 67 | install_requires=[ 68 | 'numpy>=1.19', 'scipy' 69 | ], 70 | zip_safe=False 71 | ) 72 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('..')) 16 | # sys.path.insert(0, os.path.abspath('../bayesbridge/')) 17 | 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = 'bayesbridge' 22 | copyright = '2018, Aki Nishimura' 23 | author = 'Aki Nishimura' 24 | 25 | # The full version, including alpha/beta/rc tags 26 | release = '0.1' 27 | 28 | 29 | # -- General configuration --------------------------------------------------- 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'nbsphinx'] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # List of patterns, relative to source directory, that match files and 40 | # directories to ignore when looking for source files. 41 | # This pattern also affects html_static_path and html_extra_path. 42 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 43 | 44 | # Set the master document to 'index' 45 | master_doc = 'index' 46 | 47 | # -- Options for HTML output ------------------------------------------------- 48 | 49 | # The theme to use for HTML and HTML Help pages. See the documentation for 50 | # a list of builtin themes. 51 | # 52 | html_theme = 'sphinx_rtd_theme' 53 | 54 | # Add any paths that contain custom static files (such as style sheets) here, 55 | # relative to this directory. They are copied after the builtin static files, 56 | # so a file named "default.css" will overwrite the builtin "default.css". 57 | html_static_path = ['_static'] 58 | 59 | 60 | # -- Autodoc configuration --------------------------------------------------- 61 | autoclass_content = 'both' 62 | 63 | # Dependency on C-extension modules break ReadTheDocs without the mock import. 64 | autodoc_mock_imports = ["bayesbridge.random"] 65 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | BayesBridge 2 | =========== 3 | 4 | Python package for Bayesian sparse regression, implementing the standard (Polya-Gamma augmented) Gibbs sampler as well as the CG-accelerated sampler of Nishimura and Suchard (2022). The latter algorithm can be orders of magnitudes faster for a large and sparse design matrix. 5 | 6 | Installation 7 | ------------ 8 | .. code-block:: bash 9 | 10 | pip install bayesbridge 11 | 12 | Background 13 | ---------- 14 | The Bayesian bridge is based on the following prior on the regression coefficients :math:`\beta_j`'s: 15 | 16 | .. 17 | .. math:: 18 | \pi(\beta_j \, | \, \tau) \propto \tau^{-1} \exp \big(-|\beta_j / \tau|^\alpha \big) \ \text{ for } \ 0 < \alpha \leq 1 19 | 20 | .. raw:: html 21 | 22 | 23 | 24 | The Bayesian bridge recovers the the Bayesian lasso when :math:`\alpha = 1` but can provide an improved separation of the significant coefficients from the rest when :math:`\alpha < 1`. 25 | 26 | Usage 27 | ----- 28 | 29 | .. code-block:: python 30 | 31 | from bayesbridge import BayesBridge, RegressionModel, RegressionCoefPrior 32 | 33 | model = RegressionModel(y, X, family='logit') 34 | prior = RegressionCoefPrior(bridge_exponent=.5) 35 | bridge = BayesBridge(model, prior) 36 | samples, mcmc_info = bridge.gibbs( 37 | n_burnin=100, n_post_burnin=1000, thin=1, 38 | coef_sampler_type='cholesky' # Try 'cg' for large and sparse X 39 | ) 40 | coef_samples = samples['coef'] 41 | 42 | where `y` is a 1-D numpy array and `X` is a 2-D numpy array or scipy sparse matrix. 43 | 44 | Currently the linear and logistic model (binomial outcomes) are supported. See `demo.ipynb` for demonstration of further features. 45 | 46 | Citation 47 | -------- 48 | If you find this package useful, please consider citing: 49 | 50 | Akihiko Nishimura and Marc A. Suchard (2022). 51 | Prior-preconditioned conjugate gradient method for accelerated Gibbs sampling in "large *n*, large *p*" Bayesian sparse regression. *Journal of the American Statistical Association*. 52 | 53 | Akihiko Nishimura and Marc A. Suchard (2022). 54 | Shrinkage with shrunken shoulders: Gibbs sampling shrinkage model posteriors with guaranteed convergence rates. *Bayesian Analysis*. -------------------------------------------------------------------------------- /bayesbridge/model/factory.py: -------------------------------------------------------------------------------- 1 | from warnings import warn 2 | import scipy as sp 3 | 4 | from .linear_model import LinearModel 5 | from .logistic_model import LogisticModel 6 | from .cox_model import CoxModel 7 | from ..design_matrix import DenseDesignMatrix, SparseDesignMatrix 8 | from ..design_matrix import AbstractDesignMatrix 9 | 10 | def RegressionModel( 11 | outcome, X, family='linear', 12 | add_intercept=None, center_predictor=True 13 | ): 14 | """ Prepare input data to BayesBridge, with pre-processings as needed. 15 | 16 | For the Cox model, the observations (rows of X) are reordered to optimize 17 | likelihood, gradient, and Hessian evaluations. 18 | 19 | Parameters 20 | ---------- 21 | outcome : 1-d numpy array, tuple of two 1-d numpy arrays 22 | n_success or (n_success, n_trial) if family == 'logistic'. If 23 | the input is a single array, then outcome is assumed binary. 24 | (event_time, censoring_time) if family == 'cox'. 25 | X : numpy array or scipy sparse matrix 26 | family : str, {'linear', 'logit', 'cox'} 27 | add_intercept : bool, None 28 | If None, add intercept except when family == 'cox' 29 | center_predictor : bool 30 | """ 31 | 32 | if add_intercept is None: 33 | add_intercept = (family != 'cox') 34 | 35 | if family == 'cox': 36 | if add_intercept: 37 | add_intercept = False 38 | warn("Intercept is not identifiable in Cox model and won't be added.") 39 | if AbstractDesignMatrix.is_cupy_matrix(X): 40 | raise ValueError("cupy matrix not yet supported for the Cox model.") 41 | event_time, censoring_time = outcome 42 | event_time, censoring_time, X = CoxModel.preprocess_data( 43 | event_time, censoring_time, X 44 | ) 45 | 46 | if AbstractDesignMatrix.is_cupy_dense(X): 47 | raise ValueError("cupy not yet supported for a dense design matrix.") 48 | is_sparse = sp.sparse.issparse(X) or SparseDesignMatrix.is_cupy_sparse(X) 49 | DesignMatrix = SparseDesignMatrix if is_sparse else DenseDesignMatrix 50 | design = DesignMatrix( 51 | X, add_intercept=add_intercept, center_predictor=center_predictor 52 | ) 53 | 54 | if family == 'linear': 55 | model = LinearModel(outcome, design) 56 | elif family == 'logit': 57 | if isinstance(outcome, tuple): 58 | n_success, n_trial = outcome 59 | else: 60 | n_success = outcome 61 | n_trial = None 62 | model = LogisticModel(n_success, n_trial, design) 63 | elif family == 'cox': 64 | model = CoxModel(event_time, censoring_time, design) 65 | else: 66 | raise NotImplementedError() 67 | 68 | return model 69 | -------------------------------------------------------------------------------- /bayesbridge/reg_coef_sampler/hamiltonian_monte_carlo/dynamics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | 5 | """ 6 | Defines a (numerical) Hamiltonian dynamics based on a Gaussian momentum and the 7 | velocity Verlet integrator. The code is written so that other integrators & 8 | momentum distributions can also be employed straightwardly. 9 | """ 10 | 11 | class HamiltonianDynamics(): 12 | 13 | def __init__(self, mass=None): 14 | """ 15 | Parameters 16 | ---------- 17 | mass: None, numpy 1d array, or callable `mass(p, power)` 18 | If callable, should return a vector obtained by multiplying the 19 | vector p with matrix M ** power for power == -1 or power == 1/2. 20 | The matrix L corresponding to M ** 1/2 only needs to satisfy L L' = M. 21 | Passing M = None defaults to a dynamics with the identity mass matrix. 22 | """ 23 | 24 | if mass is None: 25 | mass_operator = lambda p, power: p 26 | elif isinstance(mass, np.ndarray): 27 | sqrt_mass = np.sqrt(mass) 28 | inv_mass = 1 / mass 29 | def mass_operator(p, power): 30 | if power == -1: 31 | return inv_mass * p 32 | elif power == 1 / 2: 33 | return sqrt_mass * p 34 | elif callable(mass): 35 | mass_operator = mass 36 | else: 37 | raise ValueError("Unsupported type for the mass matrix.") 38 | 39 | self.integrator = velocity_verlet 40 | self.momentum = GaussianMomentum(mass_operator) 41 | 42 | def integrate(self, f, dt, q, p, grad): 43 | q, p, logp, grad \ 44 | = self.integrator(f, self.momentum.get_grad, dt, q, p, grad) 45 | return q, p, logp, grad 46 | 47 | def draw_momentum(self, n_param): 48 | return self.momentum.draw_random(n_param) 49 | 50 | def compute_hamiltonian(self, logp, p): 51 | potential = - logp 52 | kinetic = - self.momentum.get_logp(p) 53 | return potential + kinetic 54 | 55 | def convert_to_velocity(self, p): 56 | return - self.momentum.get_grad(p) 57 | 58 | 59 | def velocity_verlet( 60 | get_position_logp_and_grad, get_momentum_grad, dt, q, p, position_grad 61 | ): 62 | p = p + 0.5 * dt * position_grad 63 | q = q - dt * get_momentum_grad(p) 64 | position_logp, position_grad = get_position_logp_and_grad(q) 65 | if math.isfinite(position_logp): 66 | p += 0.5 * dt * position_grad 67 | return q, p, position_logp, position_grad 68 | 69 | 70 | class GaussianMomentum(): 71 | 72 | def __init__(self, mass=None): 73 | self.mass = mass 74 | 75 | def draw_random(self, n_param): 76 | p = self.mass(np.random.randn(n_param), 1/2) 77 | return p 78 | 79 | def get_grad(self, p): 80 | return - self.mass(p, -1) 81 | 82 | def get_logp(self, p): 83 | return - 0.5 * np.dot(p, self.mass(p, -1)) -------------------------------------------------------------------------------- /tests/test_design_matrix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import scipy.sparse 4 | 5 | from bayesbridge.design_matrix import SparseDesignMatrix, DenseDesignMatrix 6 | from simulate_data import simulate_design 7 | 8 | atol = 10e-6 9 | rtol = 10e-6 10 | 11 | 12 | def test_sparse_design_intercept_and_centering(): 13 | 14 | n_obs, n_pred = (100, 10) 15 | X = simulate_design(n_obs, n_pred, binary_frac=.5, format_='sparse') 16 | X_design = SparseDesignMatrix(X, center_predictor=True, add_intercept=True) 17 | X_ndarray = center_and_add_intercept(X.toarray()) 18 | w, v = (np.random.randn(size) for size in X_design.shape) 19 | assert np.allclose( 20 | X_design.dot(v), X_ndarray.dot(v), atol=atol, rtol=rtol 21 | ) 22 | assert np.allclose( 23 | X_design.Tdot(w), X_ndarray.T.dot(w), atol=atol, rtol=rtol 24 | ) 25 | 26 | 27 | def test_sparse_design_centered_fisher_info(): 28 | 29 | n_obs, n_pred = (5, 3) 30 | X = simulate_design(n_obs, n_pred, binary_frac=.5, format_='sparse') 31 | X_design = SparseDesignMatrix( 32 | X, center_predictor=True, add_intercept=True, copy_array=True 33 | ) 34 | X_ndarray = center_and_add_intercept(X.toarray()) 35 | weight = np.random.exponential(size=n_obs) 36 | benchmark_fisher_info = X_ndarray.T.dot(weight[:, np.newaxis] * X_ndarray) 37 | assert np.allclose( 38 | X_design.compute_fisher_info(weight), 39 | benchmark_fisher_info, 40 | atol=atol, rtol=rtol 41 | ) 42 | assert np.allclose( 43 | X_design.compute_fisher_info(weight, diag_only=True), 44 | np.diag(benchmark_fisher_info), 45 | atol=atol, rtol=rtol 46 | ) 47 | 48 | 49 | def test_dense_design_intercept_and_centering(): 50 | 51 | n_obs, n_pred = (100, 10) 52 | X = simulate_design(n_obs, n_pred, binary_frac=.5, format_='dense') 53 | X_design = DenseDesignMatrix(X, center_predictor=True, add_intercept=True) 54 | X_ndarray = center_and_add_intercept(X) 55 | w, v = (np.random.randn(size) for size in X_design.shape) 56 | assert np.allclose( 57 | X_design.dot(v), X_ndarray.dot(v), atol=atol, rtol=rtol 58 | ) 59 | assert np.allclose( 60 | X_design.Tdot(w), X_ndarray.T.dot(w), atol=atol, rtol=rtol 61 | ) 62 | 63 | 64 | def center_and_add_intercept(X): 65 | X -= X.mean(axis=0)[np.newaxis, :] 66 | intercept_column = np.ones((X.shape[0], 1)) 67 | X = np.hstack((intercept_column, X)) 68 | return X 69 | 70 | 71 | def test_intercept_removal(): 72 | 73 | n_obs, n_pred = (100, 10) 74 | X = simulate_design(n_obs, n_pred, binary_frac=.5, format_='sparse') 75 | X_with_const_col = sp.sparse.hstack([ 76 | np.ones((n_obs, 1)), X[:, :5], -.5 * np.ones((n_obs, 1)), X[:, 5:] 77 | ]).tocsr() 78 | assert np.allclose( 79 | X.toarray(), 80 | SparseDesignMatrix.remove_intercept_indicator(X_with_const_col).toarray() 81 | ) 82 | assert np.allclose( 83 | X.toarray(), 84 | DenseDesignMatrix.remove_intercept_indicator(X_with_const_col.toarray()) 85 | ) -------------------------------------------------------------------------------- /tests/manual_tests/test_stable_distribution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import sys\n", 10 | "sys.path.append('../')\n", 11 | "sys.path.append('../../')\n", 12 | "\n", 13 | "from bayesbridge.random.tilted_stable_dist.rand_exp_tilted_stable \\\n", 14 | " import ExpTiltedStableDist\n", 15 | "from random import normalvariate as norm_rv\n", 16 | "from math import sqrt, gamma\n", 17 | "import numpy as np\n", 18 | "\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "%matplotlib inline" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## Gibbs sample a powered-exponential distribution using the tilted stable sampler." 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "tilted_stable = ExpTiltedStableDist(seed=0)\n", 37 | "\n", 38 | "def powered_exp_dist_gibbs(n_sample, n_burnin=0, exponent=.5, scale=1.):\n", 39 | " beta = 0.\n", 40 | " samples = np.zeros(n_sample + n_burnin)\n", 41 | " for i in range(n_burnin + n_sample):\n", 42 | " lshrink_sq = .5 / tilted_stable.rv(exponent / 2, (beta / scale) ** 2)\n", 43 | " lshrink = sqrt(lshrink_sq)\n", 44 | " beta = lshrink * norm_rv(0., 1.)\n", 45 | " samples[i] = beta\n", 46 | " samples = samples[n_burnin:]\n", 47 | " return samples\n", 48 | "\n", 49 | "def powered_exp_pdf(x, exponent, scale=1., normed=True):\n", 50 | " pdf = np.exp(- np.abs(x / scale) ** exponent)\n", 51 | " if normed:\n", 52 | " pdf *= exponent / (2 * scale * gamma(exponent ** -1))\n", 53 | " return pdf" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "n_sample = 10 ** 6\n", 63 | "n_burnin = 10 ** 3\n", 64 | "exponent = 1 / 2\n", 65 | "samples = powered_exp_dist_gibbs(n_sample, n_burnin, exponent)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "plt.figure(figsize=(8, 5))\n", 75 | "plt.rcParams['font.size'] = 20\n", 76 | "\n", 77 | "plot_range = 25 * np.array([-1, 1])\n", 78 | "bins = np.linspace(plot_range[0], plot_range[1], 100)\n", 79 | "x = np.linspace(plot_range[0], plot_range[1], 10001)\n", 80 | "plt.hist(samples, bins=bins, density=True)\n", 81 | "plt.plot(x, powered_exp_pdf(x, exponent), 'tab:green')\n", 82 | "plt.show()" 83 | ] 84 | } 85 | ], 86 | "metadata": { 87 | "kernelspec": { 88 | "display_name": "Python 3", 89 | "language": "python", 90 | "name": "python3" 91 | }, 92 | "language_info": { 93 | "codemirror_mode": { 94 | "name": "ipython", 95 | "version": 3 96 | }, 97 | "file_extension": ".py", 98 | "mimetype": "text/x-python", 99 | "name": "python", 100 | "nbconvert_exporter": "python", 101 | "pygments_lexer": "ipython3", 102 | "version": "3.7.3" 103 | } 104 | }, 105 | "nbformat": 4, 106 | "nbformat_minor": 2 107 | } 108 | -------------------------------------------------------------------------------- /bayesbridge/design_matrix/abstract_matrix.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import numpy as np 3 | import scipy as sp 4 | import scipy.sparse 5 | import warnings 6 | try: 7 | import cupy as cp 8 | import cupyx as cpx 9 | except (ImportError, ModuleNotFoundError) as e: 10 | cp = None 11 | cupy_exception = e 12 | 13 | 14 | class AbstractDesignMatrix(): 15 | 16 | def __init__(self): 17 | self.dot_count = 0 18 | self.Tdot_count = 0 19 | self.memoized = False 20 | self.X_dot_v = None # For memoization 21 | self.v_prev = None # For memoization 22 | 23 | @property 24 | @abc.abstractmethod 25 | def shape(self): 26 | pass 27 | 28 | @abc.abstractmethod 29 | def dot(self, v): 30 | pass 31 | 32 | @abc.abstractmethod 33 | def Tdot(self, v): 34 | """ Multiply by the transpose of the matrix. """ 35 | pass 36 | 37 | @property 38 | @abc.abstractmethod 39 | def is_sparse(self): 40 | pass 41 | 42 | def memoize_dot(self, flag=True): 43 | self.memoized = flag 44 | if self.v_prev is None: 45 | self.v_prev = np.full(self.shape[1], float('nan')) 46 | if not flag: 47 | self.X_dot_v = None 48 | self.v_prev = None 49 | 50 | @abc.abstractmethod 51 | def compute_fisher_info(self, weight, diag_only): 52 | """ Computes X' diag(weight) X and returns it as a numpy array. """ 53 | pass 54 | 55 | @abc.abstractmethod 56 | def compute_transposed_fisher_info(self, weight, include_intrcpt): 57 | """ Computes X diag(weight) X' and returns it as a numpy array, where 58 | `X` is a design matrix. """ 59 | pass 60 | 61 | @property 62 | def n_matvec(self): 63 | return self.dot_count + self.Tdot_count 64 | 65 | def get_dot_count(self): 66 | return self.dot_count, self.Tdot_count 67 | 68 | def reset_matvec_count(self, count=0): 69 | if not hasattr(count, "__len__"): 70 | count = 2 * [count] 71 | self.dot_count = count[0] 72 | self.Tdot_count = count[1] 73 | 74 | @abc.abstractmethod 75 | def toarray(self): 76 | """ Returns a 2-dimensional numpy array. """ 77 | pass 78 | 79 | @staticmethod 80 | def is_cupy_matrix(X): 81 | return AbstractDesignMatrix.is_cupy_dense(X) \ 82 | or AbstractDesignMatrix.is_cupy_sparse(X) 83 | 84 | @staticmethod 85 | def is_cupy_dense(X): 86 | return (cp is not None) and isinstance(X, cp.ndarray) 87 | 88 | @staticmethod 89 | def is_cupy_sparse(X): 90 | return (cp is not None) and isinstance(X, cpx.scipy.sparse.spmatrix) 91 | 92 | @staticmethod 93 | def remove_intercept_indicator(X): 94 | squeeze, array, power = (cp.squeeze, cp.array, cp.power) if \ 95 | AbstractDesignMatrix.is_cupy_sparse(X) else (np.squeeze, np.array, np.power) 96 | if sp.sparse.issparse(X) or AbstractDesignMatrix.is_cupy_sparse(X): 97 | col_variance = squeeze(array(X.power(2).mean(axis=0) - power(X.mean(axis=0), 2))) 98 | else: 99 | col_variance = np.var(X, axis=0) 100 | has_zero_variance = (col_variance < X.shape[0] * 2 ** -52) 101 | if np.any(has_zero_variance): 102 | warnings.warn( 103 | "Intercept column (or numerically indistinguishable from " 104 | "such) detected. Do not add intercept manually. Removing...." 105 | ) 106 | X = X[:, np.logical_not(has_zero_variance)] 107 | return X -------------------------------------------------------------------------------- /bayesbridge/random/tilted_stable/test_tilted_stable.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import sys\n", 10 | "sys.path.insert(0, '../')\n", 11 | "\n", 12 | "import numpy as np\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "\n", 15 | "from tilted_stable import ExpTiltedStableDist" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import rpy2.robjects as robjects\n", 25 | "import rpy2.robjects.packages as rpackages\n", 26 | "\n", 27 | "try:\n", 28 | " copula = rpackages.importr('copula')\n", 29 | "except:\n", 30 | " utils = rpackages.importr('utils')\n", 31 | " utils.install_packages('copula')\n", 32 | " copula = rpackages.importr('copula')" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Cross check Python module outputs with R package\n", 40 | "Generating the half-million samples via an external R call takes about 45 sec on iMac 2015." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "def comparison_hist(samples_1, samples_2, use_log_scale=False):\n", 50 | " if use_log_scale:\n", 51 | " samples_1 = np.log(samples_1)\n", 52 | " samples_2 = np.log(samples_2)\n", 53 | " x_max = max(samples_1.max(), samples_2.max())\n", 54 | " x_min = min(samples_1.min(), samples_2.min())\n", 55 | " bins = np.linspace(x_min, x_max, 51)\n", 56 | " \n", 57 | " plt.hist(samples_1, alpha=.5, bins=bins, density=True)\n", 58 | " plt.hist(samples_2, alpha=.5, bins=bins, density=True)\n", 59 | " for side in ['left', 'top', 'right']:\n", 60 | " plt.gca().spines[side].set_visible(False)\n", 61 | " plt.yticks([])" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "plt.figure(figsize=(14, 4.5))\n", 71 | "plt.rcParams['font.size'] = 20\n", 72 | "\n", 73 | "n_samples = 5 * 10 ** 5\n", 74 | "char_exponent = [1. / 32, 1. / 4]\n", 75 | "tilt = [.01, 100.]\n", 76 | "tilted_stable = ExpTiltedStableDist()\n", 77 | "\n", 78 | "for i in range(2):\n", 79 | " \n", 80 | " python_samples = tilted_stable.sample(\n", 81 | " char_exponent[i], np.tile(tilt[i], n_samples)\n", 82 | " )\n", 83 | " R_samples = np.array([\n", 84 | " copula.retstable(char_exponent[i], 1.0, tilt[i])[0]\n", 85 | " for _ in range(n_samples)\n", 86 | " ])\n", 87 | " \n", 88 | " plt.subplot(1, 2, i + 1)\n", 89 | " comparison_hist(R_samples, python_samples, use_log_scale=True)\n", 90 | " \n", 91 | " plt.xlabel('log(tilted stable)')\n", 92 | " if i == 0:\n", 93 | " plt.legend(['from R package', 'from Python'], frameon=False)\n", 94 | "\n", 95 | "plt.show()" 96 | ] 97 | } 98 | ], 99 | "metadata": { 100 | "kernelspec": { 101 | "display_name": "Python 3", 102 | "language": "python", 103 | "name": "python3" 104 | }, 105 | "language_info": { 106 | "codemirror_mode": { 107 | "name": "ipython", 108 | "version": 3 109 | }, 110 | "file_extension": ".py", 111 | "mimetype": "text/x-python", 112 | "name": "python", 113 | "nbconvert_exporter": "python", 114 | "pygments_lexer": "ipython3", 115 | "version": "3.6.10" 116 | } 117 | }, 118 | "nbformat": 4, 119 | "nbformat_minor": 4 120 | } 121 | -------------------------------------------------------------------------------- /tests/regression_tests/test_gibb.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import scipy.sparse 4 | import math 5 | import sys 6 | 7 | from bayesbridge import BayesBridge, RegressionModel, RegressionCoefPrior 8 | from bayesbridge.model import LinearModel, LogisticModel, CoxModel 9 | 10 | data_folder = 'saved_outputs' 11 | test_combo = [ 12 | ('linear', 'cg', 'dense', False), 13 | ('logit', 'cholesky', 'dense', False), 14 | ('logit', 'cholesky', 'dense', True), 15 | ('logit', 'cg', 'sparse', False), 16 | ('cox', 'hmc', 'sparse', False) 17 | ] 18 | 19 | def test_gibbs(request): 20 | 21 | test_dirname = request.fspath.dirname 22 | for model, sampling_method, matrix_format, restart_im_middle in test_combo: 23 | samples = run_gibbs(model, sampling_method, matrix_format, restart_im_middle) 24 | assert is_same_as_prev_output(samples, sampling_method, model, test_dirname) 25 | 26 | def run_gibbs(model_type, sampling_method, matrix_format, restart_in_middle=False): 27 | 28 | n_burnin = 0 29 | n_post_burnin = 10 30 | thin = 1 31 | bridge_exponent = 0.25 32 | 33 | outcome, X = simulate_data(model_type, matrix_format) 34 | prior = RegressionCoefPrior( 35 | sd_for_intercept=2., regularizing_slab_size=1., 36 | bridge_exponent=bridge_exponent 37 | ) 38 | model = RegressionModel(outcome, X, model_type) 39 | bridge = BayesBridge(model, prior) 40 | 41 | if restart_in_middle: 42 | n_total_post_burnin = n_post_burnin 43 | n_post_burnin = math.ceil(n_total_post_burnin / 2) 44 | 45 | init = { 46 | 'global_scale': 0.1, 47 | 'local_scale': np.ones(X.shape[1]), 48 | } 49 | samples, mcmc_info = bridge.gibbs( 50 | n_burnin + n_post_burnin, n_burnin, init=init, thin=thin, 51 | coef_sampler_type=sampling_method, seed=0, params_to_save='all' 52 | ) 53 | 54 | if restart_in_middle: 55 | reinit_bridge = BayesBridge(model, prior) 56 | samples, mcmc_info = reinit_bridge.gibbs_resume( 57 | mcmc_info, n_post_burnin, merge=True, prev_samples=samples 58 | ) 59 | 60 | return samples 61 | 62 | def simulate_data(model, matrix_format): 63 | 64 | np.random.seed(1) 65 | n = 100 66 | p = 50 67 | 68 | # True parameters 69 | sigma_true = 2 70 | beta_true = np.zeros(p) 71 | beta_true[:4] = 1 72 | beta_true[4:15] = 2 ** - np.linspace(0.0, 5, 11) 73 | 74 | X = np.random.randn(n, p) 75 | 76 | if model == 'linear': 77 | outcome = LinearModel.simulate_outcome(X, beta_true, sigma_true) 78 | elif model == 'logit': 79 | n_trial = np.ones(n, dtype=np.int32) 80 | n_success = LogisticModel.simulate_outcome(n_trial, X, beta_true) 81 | outcome = (n_success, n_trial) 82 | elif model == 'cox': 83 | outcome = CoxModel.simulate_outcome(X, beta_true) 84 | else: 85 | raise NotImplementedError() 86 | 87 | if matrix_format == 'sparse': 88 | X = sp.sparse.csr_matrix(X) 89 | 90 | return outcome, X 91 | 92 | def load_data(sampling_method, model, test_dirname): 93 | filepath = '/'.join([ 94 | test_dirname, data_folder, get_filename(sampling_method, model) 95 | ]) 96 | return np.load(filepath) 97 | 98 | def get_filename(sampling_method, model): 99 | return '_'.join([ 100 | model, sampling_method, 'samples.npy' 101 | ]) 102 | 103 | def save_data(samples, sampling_method, model): 104 | filepath = data_folder + '/' + get_filename(sampling_method, model) 105 | np.save(filepath, samples['coef'][:, -1]) 106 | 107 | def is_same_as_prev_output(samples, sampling_method, model, test_dirname): 108 | prev_sample = load_data(sampling_method, model, test_dirname) 109 | return np.allclose(samples['coef'][:, -1], prev_sample, rtol=.001, atol=10e-6) 110 | 111 | 112 | if __name__ == '__main__': 113 | option = sys.argv[-1] 114 | if option == 'update': 115 | for model, sampling_method, matrix_format, restart_im_middle in test_combo: 116 | samples = run_gibbs(model, sampling_method, matrix_format, restart_im_middle) 117 | save_data(samples, sampling_method, model) -------------------------------------------------------------------------------- /tests/manual_tests/test_global_scale_prior_hyperparam.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import math\n", 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import sys\n", 21 | "sys.path.insert(0, '../../')\n", 22 | "\n", 23 | "from bayesbridge import BayesBridge" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Specify mean and sd in log10 scale and find the matching prior." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "bridge_exponent = 1 / 8\n", 40 | "log10_mean = - 4.\n", 41 | "log10_sd = 1.\n", 42 | "gscale_parametrization = ['raw', 'coefficient'][1]" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "# Dummy data just to initialize BayesBridge\n", 52 | "y = np.random.randn(10)\n", 53 | "X = np.random.randn(10, 2)\n", 54 | "bridge = BayesBridge(y, X, global_scale_parametrization=gscale_parametrization)\n", 55 | "bridge.set_global_scale_prior(log10_mean, log10_sd, bridge_exponent)\n", 56 | "prior_param = bridge.prior_param['gscale_neg_power']" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "## Check that the prior indeed has the specified mean and sd." 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "shape = prior_param['shape']\n", 73 | "scale = prior_param['rate'] ** -1" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "n_sample = 10 ** 6\n", 83 | "samples = np.random.gamma(shape, scale, size=n_sample) ** (- 1 / bridge_exponent)\n", 84 | "if gscale_parametrization == 'coefficient':\n", 85 | " samples *= bridge.compute_power_exp_ave_magnitude(bridge_exponent, 1.)\n", 86 | "log10_gscale_samples = np.log10(samples)\n", 87 | "\n", 88 | "mean_est = np.mean(log10_gscale_samples)\n", 89 | "sd_est = np.std(log10_gscale_samples)\n", 90 | "\n", 91 | "rtol = .01\n", 92 | "mean_is_close = abs((mean_est - log10_mean) / log10_mean) < rtol\n", 93 | "sd_is_close = abs((sd_est - log10_sd) / log10_sd) < rtol\n", 94 | "if mean_is_close and sd_is_close:\n", 95 | " print(\"Monte Carlo estimates agree with theoretical values.\")\n", 96 | "else:\n", 97 | " print(\"Warning! Monte Carlo estimates do NOT agree with theoretical values.\")" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "plt.figure(figsize=(7, 5))\n", 107 | "plt.rcParams['font.size'] = 18\n", 108 | "\n", 109 | "plt.hist(\n", 110 | " log10_gscale_samples, bins=51, density=True,\n", 111 | " label='prior dist.'\n", 112 | ")\n", 113 | "plt.axvline(\n", 114 | " mean_est, linestyle='--', color='tab:orange',\n", 115 | " label='mean'\n", 116 | ")\n", 117 | "plt.axvline(\n", 118 | " mean_est + 2 * sd_est, linestyle='--', color='tab:olive',\n", 119 | " label=r'mean $\\pm$ 2 std'\n", 120 | ")\n", 121 | "plt.axvline(\n", 122 | " mean_est - 2 * sd_est, linestyle='--', color='tab:olive'\n", 123 | ")\n", 124 | "plt.xlabel(r'$\\log(\\tau)$')\n", 125 | "plt.yticks([])\n", 126 | "plt.legend(frameon=False)\n", 127 | "plt.tight_layout()" 128 | ] 129 | } 130 | ], 131 | "metadata": { 132 | "kernelspec": { 133 | "display_name": "Python 3", 134 | "language": "python", 135 | "name": "python3" 136 | }, 137 | "language_info": { 138 | "codemirror_mode": { 139 | "name": "ipython", 140 | "version": 3 141 | }, 142 | "file_extension": ".py", 143 | "mimetype": "text/x-python", 144 | "name": "python", 145 | "nbconvert_exporter": "python", 146 | "pygments_lexer": "ipython3", 147 | "version": "3.6.10" 148 | } 149 | }, 150 | "nbformat": 4, 151 | "nbformat_minor": 4 152 | } 153 | -------------------------------------------------------------------------------- /bayesbridge/reg_coef_sampler/reg_coef_posterior_summarizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class RegressionCoeffficientPosteriorSummarizer(): 4 | 5 | def __init__(self, n_coef, n_unshrunk, regularizing_slab_size, 6 | pc_summary_method='average'): 7 | self.n_unshrunk = n_unshrunk 8 | self.coef_scaled_summarizer = OntheflySummarizer(n_coef) 9 | self.slab_size = regularizing_slab_size 10 | self.pc_summarizer = DirectionSummarizer(pc_summary_method) 11 | 12 | def scale_coef(self, coef, gscale, lscale): 13 | coef_scaled = coef.copy() 14 | coef_scaled[self.n_unshrunk:] \ 15 | /= self.compute_prior_scale(gscale, lscale) 16 | return coef_scaled 17 | 18 | def update(self, coef, gscale, lscale): 19 | coef_scaled = self.scale_coef(coef, gscale, lscale) 20 | self.coef_scaled_summarizer.update_stats(coef_scaled) 21 | 22 | def update_precond_hessian_pc(self, pc): 23 | self.pc_summarizer.update(pc) 24 | 25 | def extrapolate_coef_condmean(self, gscale, lscale): 26 | coef_condmean_guess = self.coef_scaled_summarizer.stats['mean'].copy() 27 | coef_condmean_guess[self.n_unshrunk:] \ 28 | *= self.compute_prior_scale(gscale, lscale) 29 | return coef_condmean_guess 30 | 31 | def estimate_coef_precond_scale_sd(self): 32 | return self.coef_scaled_summarizer.estimate_post_sd() 33 | 34 | def estimate_precond_hessian_pc(self): 35 | return self.pc_summarizer.get_mean() 36 | 37 | def compute_prior_scale(self, gscale, lscale): 38 | """ Compute the regularized prior scale in a numerically stable way. """ 39 | unreg_prior_scale = gscale * lscale 40 | return unreg_prior_scale \ 41 | / np.sqrt(1 + (unreg_prior_scale / self.slab_size) ** 2) 42 | 43 | class DirectionSummarizer(): 44 | 45 | def __init__(self, summary_method): 46 | """ 47 | Parameters 48 | ---------- 49 | summary_method: str, {'average', 'previous'} 50 | """ 51 | self.method = summary_method 52 | self.n_averaged = 0 53 | self.v = None 54 | 55 | def update(self, v): 56 | if self.n_averaged == 0 or self.method == 'previous': 57 | self.v = v 58 | else: 59 | v *= np.sign(np.inner(self.v, v)) 60 | weight = 1 / (1 + self.n_averaged) 61 | self.v = weight * v + (1 - weight) * self.v 62 | self.n_averaged += 1 63 | 64 | def get_mean(self): 65 | return self.v 66 | 67 | 68 | class OntheflySummarizer(): 69 | """ 70 | Carries out online updates of the mean, variance, and other statistics of a 71 | random sequence. 72 | """ 73 | 74 | def __init__(self, n_param, sd_prior_samplesize=5): 75 | """ 76 | 77 | Params 78 | ------ 79 | init: dict 80 | sd_prior_samplesize: int 81 | Weight on the initial estimate of the posterior standard 82 | deviation; the estimate is treated as if it is an average of 83 | 'prior_samplesize' previous values. 84 | """ 85 | self.sd_prior_samplesize = sd_prior_samplesize 86 | self.sd_prior_guess = np.ones(n_param) 87 | self.n_averaged = 0 88 | self.stats = { 89 | 'mean': np.zeros(n_param), 90 | 'square': np.ones(n_param) 91 | } 92 | 93 | def update_stats(self, theta): 94 | 95 | weight = 1 / (1 + self.n_averaged) 96 | self.stats['mean'] = ( 97 | weight * theta + (1 - weight) * self.stats['mean'] 98 | ) 99 | self.stats['square'] = ( 100 | weight * theta ** 2 101 | + (1 - weight) * self.stats['square'] 102 | ) 103 | self.n_averaged += 1 104 | 105 | def estimate_post_sd(self): 106 | 107 | # TODO: implment Welford's algorithm for better numerical accuracy. 108 | mean = self.stats['mean'] 109 | sec_moment = self.stats['square'] 110 | 111 | if self.n_averaged > 1: 112 | var_estimator = self.n_averaged / (self.n_averaged - 1) * ( 113 | sec_moment - mean ** 2 114 | ) 115 | estimator_weight = (self.n_averaged - 1) \ 116 | / (self.n_averaged - 1 + self.sd_prior_samplesize) 117 | sd_estimator = np.sqrt( 118 | estimator_weight * var_estimator \ 119 | + (1 - estimator_weight) * self.sd_prior_guess ** 2 120 | ) 121 | else: 122 | sd_estimator = self.sd_prior_guess 123 | 124 | return sd_estimator -------------------------------------------------------------------------------- /tests/test_likelihood_models.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random 3 | import scipy as sp 4 | import scipy.sparse 5 | from functools import partial 6 | from .derivative_tester \ 7 | import numerical_grad_is_close, numerical_direc_deriv_is_close 8 | from .helper import simulate_data 9 | from bayesbridge.model import LinearModel, LogisticModel, CoxModel 10 | 11 | 12 | def test_linear_model_gradient_and_hessian(): 13 | y, X, beta = simulate_data(model='linear', seed=0, return_design_mat=True) 14 | obs_prec = 1. 15 | linear_model = LinearModel(y, X) 16 | f = partial(linear_model.compute_loglik_and_gradient, obs_prec=obs_prec) 17 | hessian_matvec = linear_model.get_hessian_matvec_operator(beta, obs_prec) 18 | assert numerical_grad_is_close(f, beta) 19 | assert numerical_direc_deriv_is_close(f, beta, hessian_matvec, seed=0) 20 | 21 | 22 | def test_logitstic_model_hessian_matvec(): 23 | y, X, beta = simulate_data(model='logit', seed=0, return_design_mat=True) 24 | n_success, n_trial = y 25 | logit_model = LogisticModel(n_success, n_trial, X) 26 | f = logit_model.compute_loglik_and_gradient 27 | hessian_matvec = logit_model.get_hessian_matvec_operator(beta) 28 | assert numerical_direc_deriv_is_close(f, beta, hessian_matvec, seed=0) 29 | 30 | 31 | def set_up_cox_model_test(seed=0): 32 | y, X, beta = simulate_data(model='cox', seed=seed, return_design_mat=True) 33 | event_order, censoring_time = y 34 | cox_model = CoxModel(event_order, censoring_time, X) 35 | return cox_model, beta 36 | 37 | 38 | def test_cox_model_observation_reordering_and_risk_set_counting(): 39 | 40 | event_time = np.array( 41 | [1, 5, np.inf, 2.5, 2.5, np.inf, 2] 42 | ) 43 | censoring_time = np.array( 44 | [np.inf, np.inf, 3, np.inf, np.inf, 2, np.inf] 45 | ) 46 | X = np.arange(len(event_time))[:, np.newaxis] 47 | event_time, censoring_time, X = \ 48 | CoxModel._permute_observations_by_event_and_censoring_time( 49 | event_time, censoring_time, X 50 | ) 51 | assert np.all( 52 | event_time == np.array([1, 2, 2.5, 2.5, 5, np.inf, np.inf]) 53 | ) 54 | assert np.all( 55 | censoring_time == np.array([np.inf, np.inf, np.inf, np.inf, np.inf, 3, 2]) 56 | ) 57 | assert np.all(X == np.array([0, 6, 3, 4, 1, 2, 5])[:, np.newaxis]) 58 | 59 | cox_model = CoxModel(event_time, censoring_time, X) 60 | assert np.all( 61 | cox_model.risk_set_start_index == np.array([0, 1, 2, 2, 4]) 62 | ) 63 | 64 | n_censored_before_event = np.array([0, 0, 1, 1, 2]) 65 | assert np.all( 66 | cox_model.risk_set_end_index \ 67 | == len(event_time) - 1 - n_censored_before_event 68 | ) 69 | assert np.all( 70 | cox_model.n_appearance_in_risk_set == np.array([1, 2, 4, 4, 5, 4, 2]) 71 | ) # Tied events are both considered to be in the risk set. 72 | 73 | 74 | def test_cox_model_drop_uninformative(): 75 | event_time = np.array( 76 | [2, 4, np.inf, np.inf] 77 | ) 78 | censoring_time = np.array( 79 | [np.inf, np.inf, 3, 1] 80 | ) 81 | X = np.arange(4)[:, np.newaxis] 82 | event_time, censoring_time, X = \ 83 | CoxModel._drop_uninformative_observations(event_time, censoring_time, X) 84 | assert np.all(event_time == np.array([2, 4, np.inf])) 85 | assert np.all(censoring_time == np.array([np.inf, np.inf, 3])) 86 | assert np.all(X == np.array([0, 1, 2])[:, np.newaxis]) 87 | 88 | 89 | def test_cox_model_sum_over_risk_set(): 90 | arr = np.array([1, 3, 2]) 91 | start_index = np.array([0, 1]) 92 | end_index = np.array([2, 1]) 93 | assert np.all( 94 | CoxModel._sum_over_start_end(arr, start_index, end_index) == np.array([6, 3]) 95 | ) 96 | 97 | def text_cox_model_sum_over_events(): 98 | 99 | cox_model, beta = set_up_cox_model_test() 100 | _, hazard_increase, sum_over_risk_set \ 101 | = cox_model._compute_relative_hazard(beta) 102 | hazard_matrix = cox_model._HazardMultinomialProbMatrix( 103 | hazard_increase, sum_over_risk_set, 104 | cox_model.risk_set_start_index, 105 | cox_model.risk_set_end_index, 106 | cox_model.n_appearance_in_risk_set 107 | ) 108 | assert np.allclose( 109 | hazard_matrix.sum_over_events(), 110 | np.sum(hazard_matrix.compute_matrix(), 0) 111 | ) 112 | 113 | 114 | def test_cox_model_gradient(): 115 | cox_model, beta = set_up_cox_model_test() 116 | f = cox_model.compute_loglik_and_gradient 117 | assert numerical_grad_is_close(f, beta) 118 | 119 | 120 | def test_cox_model_hessian_matvec(): 121 | cox_model, beta = set_up_cox_model_test() 122 | f = cox_model.compute_loglik_and_gradient 123 | hessian_matvec = cox_model.get_hessian_matvec_operator(beta) 124 | assert numerical_direc_deriv_is_close(f, beta, hessian_matvec, seed=0) -------------------------------------------------------------------------------- /bayesbridge/model/logistic_model.py: -------------------------------------------------------------------------------- 1 | from .abstract_model import AbstractModel 2 | import numpy as np 3 | import numpy.random 4 | from warnings import warn 5 | 6 | class LogisticModel(AbstractModel): 7 | 8 | # TODO: Python crushes during the Gibbs if n_success has the second 9 | # dimension (instead of being a vector). Add checks for the inputs. 10 | def __init__(self, n_success, n_trial, design): 11 | 12 | self.check_input_validity(n_success, n_trial, design) 13 | if n_trial is None: 14 | n_trial = np.ones(len(n_success)) 15 | warn( 16 | "The numbers of trials were not specified. The binary " 17 | "outcome is assumed." 18 | ) 19 | 20 | self.n_trial = n_trial.astype('float64') 21 | self.n_success = n_success.astype('float64') 22 | self.design = design 23 | self.name = 'logit' 24 | 25 | def check_input_validity(self, n_success, n_trial, design): 26 | 27 | if n_trial is None: 28 | if np.max(n_success) > 1: 29 | raise ValueError( 30 | "If not binary, the number of trials must be specified.") 31 | if not len(n_success) == design.shape[0]: 32 | raise ValueError( 33 | "Incompatible sizes of the outcome and design matrix." 34 | ) 35 | return # No need to check the rest for the default initialization. 36 | 37 | if not len(n_trial) == len(n_success) == design.shape[0]: 38 | raise ValueError( 39 | "Incompatible sizes of the outcome vectors and design matrix." 40 | ) 41 | 42 | if np.any(n_trial <= 0): 43 | raise ValueError("Number of trials must be strictly positive.") 44 | 45 | if np.any(n_success > n_trial): 46 | raise ValueError( 47 | "Number of successes cannot be larger than that of trials.") 48 | 49 | def compute_loglik_and_gradient(self, beta, loglik_only=False): 50 | logit_prob = self.design.dot(beta) 51 | predicted_prob = LogisticModel.convert_to_probability_scale(logit_prob) 52 | loglik = np.sum( 53 | self.n_success * logit_prob \ 54 | - self.n_trial * np.logaddexp(0, logit_prob) 55 | ) 56 | if loglik_only: 57 | grad = None 58 | else: 59 | grad = self.design.Tdot(self.n_success - self.n_trial * predicted_prob) 60 | return loglik, grad 61 | 62 | def compute_hessian(self, beta): 63 | predicted_prob = LogisticModel.compute_predicted_prob(self.design, beta) 64 | weight = predicted_prob * (1 - predicted_prob) 65 | return - self.design.compute_fisher_info(weight) 66 | 67 | def get_hessian_matvec_operator(self, beta): 68 | predicted_prob = LogisticModel.compute_predicted_prob(self.design, beta) 69 | weight = predicted_prob * (1 - predicted_prob) 70 | hessian_op = lambda v: \ 71 | - self.design.Tdot(self.n_trial * weight * self.design.dot(v)) 72 | return hessian_op 73 | 74 | def calc_intercept_mle(self): 75 | binom_prob_mle = self.n_success.mean() / self.n_trial.mean() 76 | intercept = np.log(binom_prob_mle / (1 - binom_prob_mle)) 77 | return intercept 78 | 79 | @staticmethod 80 | def compute_polya_gamma_mean(shape, tilt): 81 | min_magnitude = 1e-5 82 | pg_mean = shape.copy() / 2 83 | is_nonzero = (np.abs(tilt) > min_magnitude) 84 | pg_mean[is_nonzero] \ 85 | *= 1 / tilt[is_nonzero] \ 86 | * (np.exp(tilt[is_nonzero]) - 1) / (np.exp(tilt[is_nonzero]) + 1) 87 | return pg_mean 88 | 89 | @staticmethod 90 | def compute_predicted_prob(X, beta, truncate=False): 91 | logit_prob = X.dot(beta) 92 | return LogisticModel.convert_to_probability_scale(logit_prob, truncate) 93 | 94 | @staticmethod 95 | def convert_to_probability_scale(logit_prob, truncate=False): 96 | # The flag 'truncate == True' guarantees 0 < prob < 1. 97 | if truncate: 98 | upper_bd = 36.7 # approximately - log(2 ** -53) 99 | lower_bd = - 709 # approximately - log(2 ** 1023) 100 | logit_prob[logit_prob > upper_bd] = upper_bd 101 | logit_prob[logit_prob < lower_bd] = lower_bd 102 | prob = 1 / (1 + np.exp(-logit_prob)) 103 | return prob 104 | 105 | @staticmethod 106 | def simulate_outcome(n_trial, X, beta, seed=None): 107 | """ 108 | Parameters 109 | ---------- 110 | X : DesignMatrix, numpy/scipy matrix 111 | Only needs to support the `dot()` operation 112 | """ 113 | prob = LogisticModel.compute_predicted_prob(X, beta) 114 | if seed is not None: 115 | np.random.seed(seed) 116 | y = np.random.binomial(n_trial, prob) 117 | return y -------------------------------------------------------------------------------- /bayesbridge/random/tilted_stable/compare_methods_speed.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import sys\n", 10 | "sys.path.insert(0, '../')\n", 11 | "\n", 12 | "import time\n", 13 | "import numpy as np\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "from tilted_stable import ExpTiltedStableDist" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "## Specify the range of parameters to run the samplers" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "bridge_exponent = 1. / 16\n", 32 | "divide_conquer_cost = 10 ** np.linspace(-1., 1., 101)\n", 33 | "\n", 34 | "char_exponent = bridge_exponent / 2\n", 35 | "tilt = divide_conquer_cost ** (1. / char_exponent)\n", 36 | "# For Bayesian bridge, tilt parameter is given by beta / global_scale\n", 37 | "tilt_power = tilt ** char_exponent" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "## Time the samplers at given parameter values" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "tilted_stable = ExpTiltedStableDist(seed=0)\n", 54 | "\n", 55 | "def time_method(char_exponent, tilt, method, n_rep=1000):\n", 56 | " start = time.time()\n", 57 | " tilted_stable.sample(\n", 58 | " char_exponent * np.ones(n_rep), tilt * np.ones(n_rep), \n", 59 | " method=method\n", 60 | " );\n", 61 | " elapsed = time.time() - start\n", 62 | " return elapsed" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "n_repetition = 1000\n", 72 | "\n", 73 | "exec_time = {\n", 74 | " method: \n", 75 | " np.array([\n", 76 | " time_method(char_exponent, tilt_i, method, n_repetition)\n", 77 | " for tilt_i in tilt\n", 78 | " ]) \n", 79 | " for method in ['double-rejection', 'divide-conquer']\n", 80 | "}" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "## Plot the result" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "plt.figure(figsize=(7, 4.5))\n", 97 | "plt.rcParams['font.size'] = 18\n", 98 | "\n", 99 | "for method in ['double-rejection', 'divide-conquer']:\n", 100 | " plt.plot(tilt_power, exec_time[method] / n_repetition)\n", 101 | "plt.xlabel('Cost of divide-conquer (= tilt ^ char-exponent)')\n", 102 | "plt.ylabel('Sec. per sample')\n", 103 | "plt.ticklabel_format(axis='y', scilimits=(0,0))\n", 104 | "plt.ylim(bottom=0)\n", 105 | "\n", 106 | "for side in ['top', 'right']:\n", 107 | " plt.gca().spines[side].set_visible(False)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "## Run a basic check to confim the sampler chooses the faster method \n", 115 | "Call the method with randomly generated tilting parameters. If the sampler correctly chooses the faster method for each parameter setting, then it should run faster than using one fixed method for all the parameter settings." 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "n_samples = 10 ** 3\n", 125 | "\n", 126 | "double_rejection_cost = 2.\n", 127 | "divide_conquer_cost \\\n", 128 | " = double_rejection_cost * np.random.exponential(size=n_samples)\n", 129 | "tilt = divide_conquer_cost ** (1. / char_exponent)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "tilted_stable.set_seed(0)\n", 139 | "%timeit -n 100 tilted_stable.sample(char_exponent, tilt)\n", 140 | "\n", 141 | "tilted_stable.set_seed(0)\n", 142 | "%timeit -n 100 tilted_stable.sample(char_exponent, tilt, 'double-rejection')\n", 143 | "\n", 144 | "tilted_stable.set_seed(0)\n", 145 | "%timeit -n 100 tilted_stable.sample(char_exponent, tilt, 'divide-conquer')" 146 | ] 147 | } 148 | ], 149 | "metadata": { 150 | "kernelspec": { 151 | "display_name": "Python 3", 152 | "language": "python", 153 | "name": "python3" 154 | }, 155 | "language_info": { 156 | "codemirror_mode": { 157 | "name": "ipython", 158 | "version": 3 159 | }, 160 | "file_extension": ".py", 161 | "mimetype": "text/x-python", 162 | "name": "python", 163 | "nbconvert_exporter": "python", 164 | "pygments_lexer": "ipython3", 165 | "version": "3.7.7" 166 | } 167 | }, 168 | "nbformat": 4, 169 | "nbformat_minor": 4 170 | } 171 | -------------------------------------------------------------------------------- /simulate_data.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import scipy as sp 4 | import scipy.sparse 5 | from bayesbridge.model import CoxModel 6 | 7 | 8 | def simulate_outcome(X, beta, model, intercept=0., n_trial=None, seed=None): 9 | 10 | if seed is not None: 11 | np.random.seed(seed) 12 | 13 | if model == 'linear': 14 | sigma = 1. 15 | outcome = intercept + X.dot(beta) + sigma * np.random.randn(X.shape[0]) 16 | elif model == 'logit': 17 | if n_trial is None: 18 | n_trial = np.ones(X.shape[0]) 19 | prob = 1 / (1 + np.exp(- intercept - X.dot(beta))) 20 | n_success = np.random.binomial(n_trial.astype(np.int32), prob) 21 | outcome = (n_success, n_trial) 22 | elif model == 'cox': 23 | outcome = CoxModel.simulate_outcome(X, beta, censoring_frac=.5) 24 | else: 25 | raise NotImplementedError() 26 | 27 | return outcome 28 | 29 | def simulate_design( 30 | n_obs, n_pred, binary_frac=0., categorical_frac=0., 31 | corr_dense_design=False, binary_pred_freq=.1, n_category=5, 32 | shuffle_columns=False, seed=None, format_='sparse' 33 | ): 34 | 35 | if seed is not None: 36 | np.random.seed(seed) 37 | 38 | n_dense_pred = int(n_pred * (1 - binary_frac - categorical_frac)) 39 | n_categorical_pred = int((n_pred * categorical_frac) / (n_category - 1)) 40 | n_binary_pred = n_pred - n_dense_pred - n_categorical_pred * (n_category - 1) 41 | 42 | X_dense = simulate_dense_design(n_obs, n_dense_pred, corr_dense_design) 43 | 44 | if n_binary_pred + n_categorical_pred == 0: 45 | X = X_dense 46 | else: 47 | X_binary = simulate_binary_design(n_obs, n_binary_pred, binary_pred_freq) 48 | X_categorical = simulate_categorical_design( 49 | n_obs, n_categorical_pred, n_category 50 | ) 51 | X = sp.sparse.hstack(( 52 | sp.sparse.csr_matrix(X_dense), X_binary, X_categorical 53 | )).tocsr() 54 | 55 | if shuffle_columns: 56 | X = X[:, np.random.permutation(n_pred)] 57 | 58 | if format_ == 'sparse': 59 | X = sp.sparse.csr_matrix(X) 60 | elif sp.sparse.issparse(X): 61 | X = X.toarray() 62 | 63 | return X 64 | 65 | def simulate_dense_design(n_obs, n_pred, corr_design, standardize=False): 66 | if corr_design: 67 | X = generate_corr_design(n_obs, n_pred) 68 | else: 69 | X = np.random.randn(n_obs, n_pred) 70 | if standardize: 71 | X = np_standardize(X) 72 | return X 73 | 74 | def np_standardize(X, divide_by='std'): 75 | X = X - np.mean(X, axis=0)[np.newaxis, :] 76 | if divide_by == 'max': 77 | X = X / np.max(X, axis=0)[np.newaxis, :] 78 | else: 79 | X = X / np.std(X, axis=0)[np.newaxis, :] 80 | return X 81 | 82 | def generate_corr_design(n_obs, n_pred, n_factor=None, max_sd=100, min_sd=1): 83 | """ 84 | Each column is drawn from a Gaussian with a covariance proportional to 85 | I + F L F' 86 | where F is an orthogonal matrix of size p by n_factor and L is diagonal. 87 | """ 88 | if n_factor is None: 89 | n_factor = min(100, int(n_pred / 2)) 90 | factor, _ = np.linalg.qr(np.random.randn(n_pred, n_factor)) 91 | principal_comp_sd = np.linspace(max_sd, min_sd, n_factor + 1) 92 | loading = principal_comp_sd[:n_factor] - min_sd 93 | X = np.dot( 94 | factor, 95 | loading[:, np.newaxis] * np.random.randn(n_factor, n_obs) 96 | ).T 97 | X += min_sd * np.random.randn(n_obs, n_pred) 98 | return X 99 | 100 | def simulate_binary_design(n_obs, n_binary_pred, sparsity, max_freq_per_col=.5): 101 | """ 102 | Returns a binary matrix where the non-zero frequency (on average) equals 103 | the value of 'sparsity'. Also, the non-zero frequency along each column is 104 | bounded by 'max_freq_per_col'. 105 | """ 106 | if n_binary_pred == 0: 107 | return None 108 | 109 | a = .5 110 | b = a * (max_freq_per_col / sparsity - 1) 111 | # Solve a / (a + b) = sparsity / max_freq_per_col for 'b'. 112 | binary_freq = max_freq_per_col * np.random.beta(a, b, n_binary_pred) 113 | X = np.zeros((n_obs, n_binary_pred)) 114 | for j in range(n_binary_pred): 115 | nnz = math.ceil(n_obs * binary_freq[j]) 116 | X[np.random.choice(n_obs, nnz, replace=False), j] = 1. 117 | return X 118 | 119 | def simulate_categorical_design(n_obs, n_categorical_pred, n_category=5): 120 | if n_categorical_pred == 0: 121 | return None 122 | 123 | X = sp.sparse.hstack([ 124 | sp.sparse.csr_matrix(draw_categorical_pred(n_obs, n_category)) 125 | for dummy in range(n_categorical_pred) 126 | ]) 127 | return X 128 | 129 | def draw_categorical_pred(n_obs, n_category): 130 | # Returns a matrix of size n by (n_category - 1). 131 | category_freq = np.random.dirichlet(np.ones(n_category)) 132 | category_freq = np.sort(category_freq)[::-1][1:] 133 | # Use the most frequent category as baseline 134 | n_within_category = np.concatenate(( 135 | [0], np.floor(n_obs * np.cumsum(category_freq)) 136 | )).astype(np.int) 137 | X = np.zeros((n_obs, n_category - 1)) 138 | for j in range(n_category - 1): 139 | start = n_within_category[j] 140 | end = n_within_category[j + 1] 141 | X[start:end, j] = 1 142 | X = X[np.random.permutation(n_obs), :] 143 | return X 144 | -------------------------------------------------------------------------------- /tests/test_prior.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .helper import simulate_data 3 | from bayesbridge.model import LinearModel, LogisticModel, CoxModel 4 | from bayesbridge import BayesBridge, RegressionModel, RegressionCoefPrior 5 | 6 | 7 | def test_clone(): 8 | 9 | kwargs = { 10 | 'bridge_exponent': 1. / 8, 11 | 'n_fixed_effect': 1, 12 | 'sd_for_fixed_effect': 1.11, 13 | 'regularizing_slab_size': 2.22, 14 | 'global_scale_prior_hyper_param': {'log10_mean': - 4., 'log10_sd': 1.} 15 | } 16 | 17 | prior = RegressionCoefPrior(**kwargs) 18 | 19 | changed_kw = { 20 | 'n_fixed_effect': 3, 21 | 'global_scale_prior_hyper_param': {'log10_mean': - 6., 'log10_sd': 1.5} 22 | } 23 | kwargs_alt = kwargs.copy() 24 | for key, val in changed_kw.items(): 25 | kwargs_alt[key] = val 26 | cloned = prior.clone(**changed_kw) 27 | changed_prior = RegressionCoefPrior(**kwargs_alt) 28 | 29 | assert np.all( 30 | cloned.__dict__.pop('sd_for_fixed') 31 | == changed_prior.__dict__.pop('sd_for_fixed') 32 | ) 33 | assert cloned.__dict__ == changed_prior.__dict__ 34 | 35 | 36 | def test_gscale_parametrization(): 37 | """ Check that the Gamma hyper-parameters do not depend on parametrization. """ 38 | 39 | gscale_hyper_param = {'log10_mean': - 4., 'log10_sd': 1.} 40 | bridge_exp = .25 41 | 42 | prior_coef_scale = RegressionCoefPrior( 43 | bridge_exponent=bridge_exp, 44 | global_scale_prior_hyper_param=gscale_hyper_param, 45 | _global_scale_parametrization='coef_magnitude' 46 | ) 47 | 48 | unit_bridge_magnitude \ 49 | = RegressionCoefPrior.compute_power_exp_ave_magnitude(bridge_exp) 50 | gscale_hyper_param['log10_mean'] -= np.log10(unit_bridge_magnitude) 51 | prior_raw_scale = RegressionCoefPrior( 52 | bridge_exponent=bridge_exp, 53 | global_scale_prior_hyper_param=gscale_hyper_param, 54 | _global_scale_parametrization='raw' 55 | ) 56 | assert ( 57 | prior_coef_scale.param['gscale_neg_power'] == prior_raw_scale.param['gscale_neg_power'] 58 | ) 59 | 60 | 61 | def test_gscale_paramet_invariance(): 62 | """ Check sampler outputs are invariant under global scale parametrization. """ 63 | 64 | y, X, beta = simulate_data(model='logit', seed=0) 65 | model = RegressionModel(y, X, family='logit') 66 | bridge_exp = .25 67 | bridge_magnitude \ 68 | = RegressionCoefPrior.compute_power_exp_ave_magnitude(bridge_exp) 69 | init_gscale = 0.1 70 | init_lscale = np.ones(X.shape[1]) 71 | init_raw_gscale = init_gscale / bridge_magnitude 72 | init_raw_lscale = bridge_magnitude * init_lscale 73 | init = { 74 | 'global_scale': init_gscale, 75 | 'local_scale': init_lscale 76 | } 77 | raw_init = { 78 | 'global_scale': init_raw_gscale, 79 | 'local_scale': init_raw_lscale 80 | } 81 | 82 | # Two samples should agree since the default prior is scale invariant. 83 | prior = RegressionCoefPrior( 84 | bridge_exponent=bridge_exp, 85 | regularizing_slab_size=1., 86 | _global_scale_parametrization='raw' 87 | ) 88 | bridge = BayesBridge(model, prior) 89 | coef_sample_raw_scaling = get_last_sample_from_gibbs(bridge, raw_init) 90 | 91 | prior = RegressionCoefPrior( 92 | bridge_exponent=bridge_exp, 93 | regularizing_slab_size=1., 94 | _global_scale_parametrization='coef_magnitude' 95 | ) 96 | bridge = BayesBridge(model, prior) 97 | coef_sample_expected_mag_scaling = get_last_sample_from_gibbs(bridge, init) 98 | 99 | assert np.allclose( 100 | coef_sample_raw_scaling, 101 | coef_sample_expected_mag_scaling, 102 | rtol=1e-10 103 | ) 104 | 105 | # Place a prior on the global scale; the two samples should *not* coincide. 106 | 107 | gscale_hyper_param = { 108 | 'log10_mean': -2. - np.log10(bridge_magnitude), 109 | 'log10_sd': 1., 110 | } 111 | prior = RegressionCoefPrior( 112 | bridge_exponent=bridge_exp, 113 | regularizing_slab_size=1., 114 | global_scale_prior_hyper_param=gscale_hyper_param, 115 | _global_scale_parametrization='raw' 116 | ) 117 | bridge = BayesBridge(model, prior) 118 | coef_sample_raw_scaling \ 119 | = get_last_sample_from_gibbs(bridge, raw_init) 120 | 121 | prior = RegressionCoefPrior( 122 | bridge_exponent=bridge_exp, 123 | regularizing_slab_size=1., 124 | global_scale_prior_hyper_param=gscale_hyper_param, 125 | _global_scale_parametrization='coef_magnitude' 126 | ) 127 | bridge = BayesBridge(model, prior) 128 | coef_sample_expected_mag_scaling \ 129 | = get_last_sample_from_gibbs(bridge, init) 130 | 131 | assert not np.allclose( 132 | coef_sample_raw_scaling, 133 | coef_sample_expected_mag_scaling, 134 | rtol=1e-10 135 | ) 136 | 137 | # After appropriately adjusting the hyper-parameter, the two samples 138 | # should agree. 139 | gscale_hyper_param['log10_mean'] += np.log10(bridge_magnitude) 140 | prior = prior.clone(global_scale_prior_hyper_param=gscale_hyper_param) 141 | bridge = BayesBridge(model, prior) 142 | coef_sample_expected_mag_scaling \ 143 | = get_last_sample_from_gibbs(bridge, init) 144 | 145 | assert np.allclose( 146 | coef_sample_raw_scaling, 147 | coef_sample_expected_mag_scaling, 148 | rtol=1e-10 149 | ) 150 | 151 | 152 | def get_last_sample_from_gibbs(bridge, init, seed=0): 153 | samples, _ = bridge.gibbs( 154 | n_iter=10, n_burnin=0, init=init, 155 | coef_sampler_type='cholesky', 156 | seed=seed, n_status_update=0 157 | ) 158 | return samples['coef'][:, -1] -------------------------------------------------------------------------------- /bayesbridge/reg_coef_sampler/cg_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import scipy.sparse 4 | import scipy.linalg 5 | from warnings import warn 6 | 7 | try: 8 | import cupyx.scipy.sparse.linalg 9 | import cupyx as cpx 10 | import cupy as cp 11 | except (ImportError, ModuleNotFoundError) as e: 12 | cp = None 13 | cupy_exception = e 14 | 15 | class ConjugateGradientSampler(): 16 | 17 | def __init__(self, n_coef_wo_shrinkage): 18 | self.n_coef_wo_shrinkage = n_coef_wo_shrinkage 19 | 20 | def sample( 21 | self, design, obs_prec, prior_prec_sqrt, z, 22 | coef_cg_init=None, precond_by='prior', coef_scaled_sd=None, 23 | maxiter=None, atol=10e-6, seed=None): 24 | """ 25 | Generate a multi-variate Gaussian with mean mu and covariance Sigma of the form 26 | mu = Sigma z, 27 | Sigma^{-1} = X' diag(obs_prec) X + prior_prec_sqrt ** 2, 28 | For numerical stability, the code first sample from the scaled parameter 29 | coef / precond_scale. 30 | 31 | Param: 32 | ------ 33 | D : vector 34 | atol : float 35 | The absolute tolerance on the residual norm at the termination 36 | of CG iterations. 37 | coef_scaled_sd : vector of length design.shape[1] 38 | Used to estimate a good preconditioning scale for the coefficient 39 | without shrinkage. Used only if precond_by == 'prior'. 40 | precond_by : {'prior', 'diag'} 41 | """ 42 | if design.use_cupy: 43 | coef_cg_init = cp.asarray(coef_cg_init) 44 | coef_scaled_sd = cp.asarray(coef_scaled_sd) 45 | prior_prec_sqrt = cp.asarray(prior_prec_sqrt) 46 | cg = cpx.scipy.sparse.linalg.cg 47 | LinearOperator = cpx.scipy.sparse.linalg.LinearOperator 48 | else: 49 | cg = sp.sparse.linalg.cg 50 | LinearOperator = sp.sparse.linalg.LinearOperator 51 | if seed is not None: 52 | np.random.seed(seed) 53 | 54 | # Define a preconditioned linear operator. 55 | Prec_precond_op, precond_scale = \ 56 | self.precondition_linear_system( 57 | prior_prec_sqrt, obs_prec, design, precond_by, coef_scaled_sd, LinearOperator 58 | ) 59 | 60 | # Draw a target vector. 61 | randn_vec_1 = np.random.randn(design.shape[0]) 62 | randn_vec_2 = np.random.randn(design.shape[1]) 63 | if design.use_cupy: 64 | randn_vec_1 = cp.asarray(randn_vec_1) 65 | randn_vec_2 = cp.asarray(randn_vec_2) 66 | v = design.Tdot(obs_prec ** (1 / 2) * randn_vec_1) \ 67 | + prior_prec_sqrt * randn_vec_2 68 | b = precond_scale * (z + v) 69 | 70 | # Callback function to count the number of PCG iterations. 71 | cg_info = {'n_iter': 0} 72 | def cg_callback(x): cg_info['n_iter'] += 1 73 | 74 | # Run PCG. 75 | rtol = atol / np.linalg.norm(b) 76 | coef_scaled_cg_init = coef_cg_init / precond_scale 77 | coef_scaled, info = cg( 78 | Prec_precond_op, b, x0=coef_scaled_cg_init, maxiter=maxiter, tol=rtol, 79 | callback=cg_callback 80 | ) 81 | 82 | if info != 0: 83 | warn( 84 | "The conjugate gradient algorithm did not achieve the requested " + 85 | "tolerance level. You may increase the maxiter or use the dense " + 86 | "linear algebra instead." 87 | ) 88 | 89 | coef = precond_scale * coef_scaled 90 | cg_info['valid_input'] = (info >= 0) 91 | cg_info['converged'] = (info == 0) 92 | if design.use_cupy: 93 | coef = cp.asnumpy(coef) 94 | return coef, cg_info 95 | 96 | def precondition_linear_system( 97 | self, prior_prec_sqrt, obs_prec, design, precond_by, coef_scaled_sd, LinearOperator): 98 | 99 | # Compute the preconditioners. 100 | precond_scale = self.choose_preconditioner( 101 | prior_prec_sqrt, obs_prec, design, precond_by, coef_scaled_sd 102 | ) 103 | 104 | # Define a preconditioned linear operator. 105 | precond_prior_prec = (precond_scale * prior_prec_sqrt) ** 2 106 | def Prec_precond(x): 107 | Prec_precond_x = precond_prior_prec * x \ 108 | + precond_scale * design.Tdot(obs_prec * design.dot(precond_scale * x)) 109 | return Prec_precond_x 110 | Prec_precond_op = LinearOperator( 111 | (design.shape[1], design.shape[1]), matvec=Prec_precond 112 | ) 113 | return Prec_precond_op, precond_scale 114 | 115 | def choose_preconditioner( 116 | self, prior_prec_sqrt, obs_prec, design, precond_by, beta_scaled_sd): 117 | 118 | precond_scale = self.choose_diag_preconditioner( 119 | prior_prec_sqrt, obs_prec, design, precond_by, beta_scaled_sd) 120 | 121 | return precond_scale 122 | 123 | def choose_diag_preconditioner( 124 | self, prior_prec_sqrt, obs_prec, design, precond_by='diag', 125 | beta_scaled_sd=None): 126 | # Compute the diagonal (sqrt) preconditioner. 127 | 128 | if precond_by == 'prior': 129 | precond_scale = cp.ones(len(prior_prec_sqrt)) if design.use_cupy \ 130 | else np.ones(len(prior_prec_sqrt)) 131 | precond_scale[self.n_coef_wo_shrinkage:] = \ 132 | prior_prec_sqrt[self.n_coef_wo_shrinkage:] ** -1 133 | if self.n_coef_wo_shrinkage > 0: 134 | target_sd_scale = 2. 135 | # Larger than 1 because it is better to err on the side 136 | # of introducing large precisions. 137 | precond_scale[:self.n_coef_wo_shrinkage] = \ 138 | target_sd_scale * beta_scaled_sd[:self.n_coef_wo_shrinkage] 139 | 140 | elif precond_by == 'diag': 141 | diag = prior_prec_sqrt ** 2 \ 142 | + design.compute_fisher_info(weight=obs_prec, diag_only=True) 143 | precond_scale = 1 / np.sqrt(diag) 144 | 145 | elif precond_by is None: 146 | precond_scale = np.ones(design.shape[1]) 147 | 148 | else: 149 | raise NotImplementedError() 150 | 151 | return precond_scale -------------------------------------------------------------------------------- /bayesbridge/reg_coef_sampler/hamiltonian_monte_carlo/hmc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import time 4 | from .stepsize_adapter import HamiltonianBasedStepsizeAdapter, initialize_stepsize 5 | from .util import warn_message_only 6 | from .dynamics import HamiltonianDynamics 7 | 8 | 9 | dynamics = HamiltonianDynamics() 10 | integrator = dynamics.integrate 11 | compute_hamiltonian = dynamics.compute_hamiltonian 12 | draw_momentum = dynamics.draw_momentum 13 | 14 | 15 | def generate_samples( 16 | f, q0, n_burnin, n_sample, nstep_range, dt_range=None, 17 | seed=None, n_update=0, adapt_stepsize=False, target_accept_prob=.9, 18 | final_adaptsize=.05): 19 | """ Run HMC and return samples and some additional info. """ 20 | 21 | if seed is not None: 22 | np.random.seed(seed) 23 | 24 | q = q0 25 | logp, grad = f(q) 26 | 27 | if np.isscalar(dt_range): 28 | dt_range = np.array(2 * [dt_range]) 29 | 30 | elif dt_range is None: 31 | p = draw_momentum(len(q)) 32 | logp_joint0 = - compute_hamiltonian(logp, p) 33 | dt = initialize_stepsize( 34 | lambda dt: compute_onestep_accept_prob(dt, f, q, p, grad, logp_joint0) 35 | ) 36 | dt_range = dt * np.array([.8, 1.0]) 37 | adapt_stepsize = True 38 | 39 | if np.isscalar(nstep_range): 40 | nstep_range = np.array(2 * [nstep_range]) 41 | 42 | max_stepsize_adapter = HamiltonianBasedStepsizeAdapter( 43 | init_stepsize=1., target_accept_prob=target_accept_prob, 44 | reference_iteration=n_burnin, adaptsize_at_reference=final_adaptsize 45 | ) 46 | 47 | if n_update > 0: 48 | n_per_update = math.ceil((n_burnin + n_sample) / n_update) 49 | else: 50 | n_per_update = float('inf') 51 | 52 | samples = np.zeros((len(q), n_sample + n_burnin)) 53 | logp_samples = np.zeros(n_sample + n_burnin) 54 | accept_prob = np.zeros(n_sample + n_burnin) 55 | 56 | tic = time.time() # Start clock 57 | use_averaged_stepsize = False 58 | for i in range(n_sample + n_burnin): 59 | dt = np.random.uniform(dt_range[0], dt_range[1]) 60 | dt *= max_stepsize_adapter.get_current_stepsize(use_averaged_stepsize) 61 | nstep = np.random.randint(nstep_range[0], nstep_range[1] + 1) 62 | q, info = generate_next_state( 63 | f, dt, nstep, q, logp0=logp, grad0=grad 64 | ) 65 | logp, grad, pathlen, accept_prob[i] = ( 66 | info[key] for key in ['logp', 'grad', 'n_grad_evals', 'accept_prob'] 67 | ) 68 | if i < n_burnin and adapt_stepsize: 69 | max_stepsize_adapter.adapt_stepsize(info['hamiltonian_error']) 70 | elif i == n_burnin - 1: 71 | use_averaged_stepsize = True 72 | samples[:, i] = q 73 | logp_samples[i] = logp 74 | if (i + 1) % n_per_update == 0: 75 | print('{:d} iterations have been completed.'.format(i + 1)) 76 | 77 | toc = time.time() 78 | time_elapsed = toc - tic 79 | 80 | return samples, logp_samples, accept_prob, time_elapsed 81 | 82 | 83 | def compute_onestep_accept_prob(dt, f, q0, p0, grad0, logp_joint0): 84 | _, p, logp, _ = integrator(f, dt, q0, p0, grad0) 85 | logp_joint = - compute_hamiltonian(logp, p) 86 | accept_prob = np.exp(logp_joint - logp_joint0) 87 | return accept_prob 88 | 89 | 90 | def generate_next_state( 91 | f, dt, n_step, q0, 92 | p0=None, logp0=None, grad0=None, hamiltonian_tol=100.): 93 | 94 | n_grad_evals = 0 95 | 96 | if (logp0 is None) or (grad0 is None): 97 | logp0, grad0 = f(q0) 98 | n_grad_evals += 1 99 | 100 | if p0 is None: 101 | p0 = draw_momentum(len(q0)) 102 | 103 | log_joint0 = - compute_hamiltonian(logp0, p0) 104 | 105 | q, p, logp, grad, simulation_info = simulate_dynamics( 106 | f, dt, n_step, q0, p0, logp0, grad0, hamiltonian_tol 107 | ) 108 | n_grad_evals += simulation_info['n_grad_evals'] 109 | instability_detected = simulation_info['instability_detected'] 110 | 111 | if instability_detected: 112 | acceptprob = 0. 113 | hamiltonian_error = - float('inf') 114 | else: 115 | log_joint = - compute_hamiltonian(logp, p) 116 | hamiltonian_error = log_joint - log_joint0 117 | acceptprob = min(1, np.exp(hamiltonian_error)) 118 | 119 | accepted = acceptprob > np.random.rand() 120 | if not accepted: 121 | q = q0 122 | logp = logp0 123 | grad = grad0 124 | 125 | info = { 126 | 'logp': logp, 127 | 'grad': grad, 128 | 'accepted': accepted, 129 | 'accept_prob': acceptprob, 130 | 'hamiltonian_error': hamiltonian_error, 131 | 'instability_detected': instability_detected, 132 | 'n_grad_evals': n_grad_evals 133 | } 134 | 135 | return q, info 136 | 137 | 138 | def simulate_dynamics(f, dt, n_step, q0, p0, logp0, grad0, hamiltonian_tol=float('inf')): 139 | 140 | n_grad_evals = 0 141 | instability_detected = False 142 | 143 | # Keep track of Hamiltonians along the trajectory. 144 | hamiltonians = np.full(n_step + 1, float('nan')) 145 | hamiltonian = compute_hamiltonian(logp0, p0) 146 | hamiltonians[0] = hamiltonian 147 | min_h, max_h = 2 * [hamiltonian] 148 | 149 | q, p, logp, grad = q0, p0, logp0, grad0 150 | if n_step == 0: 151 | warn_message_only("The number of integration steps was set to be 0.") 152 | 153 | for i in range(n_step): 154 | q, p, logp, grad \ 155 | = integrator(f, dt, q, p, grad) 156 | hamiltonian = compute_hamiltonian(logp, p) 157 | hamiltonians[i + 1] = hamiltonian 158 | min_h, max_h = update_running_minmax(min_h, max_h, hamiltonian) 159 | n_grad_evals += 1 160 | instability_detected \ 161 | = math.isinf(logp) or (max_h - min_h) > hamiltonian_tol 162 | if instability_detected: 163 | warn_message_only( 164 | "Numerical integration became unstable while simulating the " 165 | "HMC trajectory." 166 | ) 167 | break 168 | 169 | info = { 170 | 'energy_trajectory': hamiltonians, 171 | 'n_grad_evals': n_grad_evals, 172 | 'instability_detected': instability_detected, 173 | } 174 | 175 | return q, p, logp, grad, info 176 | 177 | 178 | def update_running_minmax(running_min, running_max, curr_val): 179 | running_min = min(running_min, curr_val) 180 | running_max = max(running_max, curr_val) 181 | return running_min, running_max 182 | -------------------------------------------------------------------------------- /bayesbridge/random/polya_gamma/test_polyagamma.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import matplotlib.pyplot as plt\n", 10 | "import numpy as np\n", 11 | "\n", 12 | "from polya_gamma import PolyaGammaDist" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import rpy2.robjects as robjects\n", 22 | "import rpy2.robjects.packages as rpackages\n", 23 | "import rpy2.robjects.numpy2ri\n", 24 | "\n", 25 | "try:\n", 26 | " bayeslogit = rpackages.importr('BayesLogit')\n", 27 | "except:\n", 28 | " utils = rpackages.importr('utils')\n", 29 | " utils.install_packages('BayesLogit')\n", 30 | " bayeslogit = rpackages.importr('BayesLogit')" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Compare Python output against that of R package" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "def comparison_hist(samples_1, samples_2, use_log_scale=False):\n", 47 | " if use_log_scale:\n", 48 | " samples_1 = np.log(samples_1)\n", 49 | " samples_2 = np.log(samples_2)\n", 50 | " x_max = max(samples_1.max(), samples_2.max())\n", 51 | " x_min = min(samples_1.min(), samples_2.min())\n", 52 | " bins = np.linspace(x_min, x_max, 51)\n", 53 | " \n", 54 | " plt.hist(samples_1, alpha=.5, bins=bins, density=True)\n", 55 | " plt.hist(samples_2, alpha=.5, bins=bins, density=True)\n", 56 | " for side in ['left', 'top', 'right']:\n", 57 | " plt.gca().spines[side].set_visible(False)\n", 58 | " plt.yticks([])" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "n_samples = 10 ** 6\n", 68 | "shape = [1., 2.] # BayesLogit apparently requires double\n", 69 | "tilt = [.01, 100.]\n", 70 | " # Sqrt of twice the negative tilting parameter, actually\n", 71 | " \n", 72 | "pg = PolyaGammaDist()" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "plt.figure(figsize=(14, 4.5))\n", 82 | "plt.rcParams['font.size'] = 20\n", 83 | "\n", 84 | "for i in range(2):\n", 85 | " \n", 86 | " python_samples = pg.rand_polyagamma(\n", 87 | " np.tile(shape[i], n_samples).astype(np.int), \n", 88 | " np.tile(tilt[i], n_samples)\n", 89 | " )\n", 90 | " \n", 91 | " # Sample via R package.\n", 92 | " try:\n", 93 | " rpy2.robjects.numpy2ri.activate()\n", 94 | " r_samples = np.array(\n", 95 | " bayeslogit.rpg(n_samples, shape[i], tilt[i])\n", 96 | " )\n", 97 | " except:\n", 98 | " # In case 'numpy2ri.activate()' fails\n", 99 | " r_samples = np.array([\n", 100 | " bayeslogit.rpg(1, shape[i], tilt[i])[0]\n", 101 | " for i in range(n_samples)\n", 102 | " ])\n", 103 | " \n", 104 | " plt.subplot(1, 2, i + 1)\n", 105 | " comparison_hist(r_samples, python_samples, use_log_scale=True)\n", 106 | " \n", 107 | " plt.xlabel('log(tilted stable)')\n", 108 | " if i == 0:\n", 109 | " plt.legend(['from R', 'from Python'], loc=[.65, .7], frameon=False)\n", 110 | "\n", 111 | "plt.show()" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "## Compare against another R package" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "import rpy2.robjects as robjects\n", 128 | "import rpy2.robjects.packages as rpackages\n", 129 | "import rpy2.robjects.numpy2ri\n", 130 | "\n", 131 | "try:\n", 132 | " pgdraw = rpackages.importr('pgdraw')\n", 133 | "except:\n", 134 | " utils = rpackages.importr('utils')\n", 135 | " utils.install_packages('pgdraw')\n", 136 | " pgdraw = rpackages.importr('pgdraw')" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "plt.figure(figsize=(14, 4.5))\n", 146 | "plt.rcParams['font.size'] = 20\n", 147 | "\n", 148 | "for i in range(2):\n", 149 | " \n", 150 | " python_samples = pg.rand_polyagamma(\n", 151 | " np.tile(shape[i], n_samples).astype(np.int), \n", 152 | " np.tile(tilt[i], n_samples)\n", 153 | " )\n", 154 | " \n", 155 | " # Sample via R package.\n", 156 | " try:\n", 157 | " rpy2.robjects.numpy2ri.activate()\n", 158 | " r_samples = np.array(\n", 159 | " pgdraw.rcpp_pgdraw(shape[i], tilt[i] * np.ones(n_samples))\n", 160 | " )\n", 161 | " except:\n", 162 | " r_samples = np.array([\n", 163 | " pgdraw.rcpp_pgdraw(shape[i], tilt[i])[0]\n", 164 | " for i in range(n_samples)\n", 165 | " ])\n", 166 | " \n", 167 | " plt.subplot(1, 2, i + 1)\n", 168 | " comparison_hist(r_samples, python_samples, use_log_scale=True)\n", 169 | " \n", 170 | " plt.xlabel('log(tilted stable)')\n", 171 | " if i == 0:\n", 172 | " plt.legend(['from R', 'from Python'], loc=[.65, .7], frameon=False)\n", 173 | "\n", 174 | "plt.show()" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "## Make sure general and specialized method return same outputs" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "n_samples = 10 ** 2\n", 191 | "tilt = .1\n", 192 | "shape = 1" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "seed = 0\n", 202 | "\n", 203 | "pg = PolyaGammaDist(seed)\n", 204 | "general_samples = pg.rand_polyagamma(\n", 205 | " np.ones(n_samples, dtype=np.int), \n", 206 | " tilt * np.ones(n_samples)\n", 207 | ")\n", 208 | "\n", 209 | "pg.set_seed(seed)\n", 210 | "simplified_samples = pg.rand_unit_shape_polyagamma(\n", 211 | " tilt * np.ones(n_samples)\n", 212 | ")\n", 213 | "\n", 214 | "assert np.all(general_samples == simplified_samples)" 215 | ] 216 | } 217 | ], 218 | "metadata": { 219 | "kernelspec": { 220 | "display_name": "Python 3", 221 | "language": "python", 222 | "name": "python3" 223 | }, 224 | "language_info": { 225 | "codemirror_mode": { 226 | "name": "ipython", 227 | "version": 3 228 | }, 229 | "file_extension": ".py", 230 | "mimetype": "text/x-python", 231 | "name": "python", 232 | "nbconvert_exporter": "python", 233 | "pygments_lexer": "ipython3", 234 | "version": "3.6.10" 235 | } 236 | }, 237 | "nbformat": 4, 238 | "nbformat_minor": 4 239 | } 240 | -------------------------------------------------------------------------------- /bayesbridge/design_matrix/sparse_matrix.py: -------------------------------------------------------------------------------- 1 | from warnings import warn 2 | 3 | import numpy as np 4 | import scipy.sparse as sparse 5 | 6 | from .abstract_matrix import AbstractDesignMatrix 7 | 8 | try: 9 | from .mkl_matvec import mkl_csr_matvec 10 | except: 11 | mkl_csr_matvec = None 12 | try: 13 | import cupy as cp 14 | except (ImportError, ModuleNotFoundError) as e: 15 | cp = None 16 | cupy_exception = e 17 | 18 | 19 | class SparseDesignMatrix(AbstractDesignMatrix): 20 | 21 | def __init__(self, X, use_mkl=True, center_predictor=False, add_intercept=True, 22 | copy_array=False, dot_format='csr', Tdot_format='csr'): 23 | """ 24 | Params: 25 | ------ 26 | X : scipy sparse matrix 27 | """ 28 | if copy_array: 29 | X = X.copy() 30 | super().__init__() 31 | if dot_format == 'csc' or Tdot_format == 'csc': 32 | raise NotImplementedError( 33 | "Current dot operations are only implemented for the CSR format." 34 | ) 35 | self.use_cupy = self.is_cupy_sparse(X) 36 | if use_mkl and (mkl_csr_matvec is None) and (not self.use_cupy): 37 | warn("Could not load MKL Library. Will use Scipy's 'dot'.") 38 | use_mkl = False 39 | self.centered = center_predictor 40 | self.intercept_added = add_intercept 41 | self.use_mkl = (not self.use_cupy) and use_mkl 42 | X = self.remove_intercept_indicator(X) 43 | squeeze, array, zeros = (cp.squeeze, cp.array, cp.zeros) if self.use_cupy \ 44 | else (np.squeeze, np.array, np.zeros) 45 | if center_predictor: 46 | self.column_offset = squeeze(array(X.mean(axis=0))) 47 | else: 48 | self.column_offset = zeros(X.shape[1]) 49 | self.X_main = cp.sparse.csr_matrix(X) if self.use_cupy else X.tocsr() 50 | 51 | @property 52 | def shape(self): 53 | shape = self.X_main.shape 54 | return shape[0], shape[1] + int(self.intercept_added) 55 | 56 | @property 57 | def is_sparse(self): 58 | return True 59 | 60 | @property 61 | def nnz(self): 62 | """ Currently exists only to estimate the cost of matrix-matrix and 63 | matrix-vector operations. Does not correspond to the actual nnz of the 64 | represented design matrix. 65 | """ 66 | return self.X_main.nnz 67 | 68 | def dot(self, v): 69 | if self.memoized: 70 | if np.all(self.v_prev == v): 71 | return self.X_dot_v 72 | self.v_prev = v.copy() 73 | 74 | input_is_cupy = (cp is not None) and isinstance(v, cp.ndarray) 75 | if self.use_cupy and not input_is_cupy: 76 | v = cp.asarray(v) 77 | intercept_effect = 0. 78 | if self.intercept_added: 79 | intercept_effect += v[0] 80 | v = v[1:] 81 | result = intercept_effect + self.main_dot(v) 82 | if self.use_cupy and not input_is_cupy: 83 | result = cp.asnumpy(result) 84 | if self.memoized: 85 | self.X_dot_v = result 86 | self.dot_count += 1 87 | 88 | return result 89 | 90 | def main_dot(self, v): 91 | """ Multiply by the main effect part of the design matrix. """ 92 | X = self.X_main 93 | if self.use_mkl: 94 | result = mkl_csr_matvec(X, v) 95 | else: 96 | result = X.dot(v) 97 | inner = cp.inner if self.use_cupy else np.inner 98 | result -= inner(self.column_offset, v) 99 | if self.memoized: 100 | self.X_dot_v = result 101 | return result 102 | 103 | def Tdot(self, v): 104 | input_is_cupy = (cp is not None) and isinstance(v, cp.ndarray) 105 | if self.use_cupy and not input_is_cupy: 106 | v = cp.asarray(v) 107 | 108 | result = self.main_Tdot(v) 109 | 110 | if self.intercept_added: 111 | if self.use_cupy: 112 | result = cp.concatenate((cp.asarray([cp.sum(v)]), result)) 113 | else: 114 | result = np.concatenate(([np.sum(v)], result)) 115 | self.Tdot_count += 1 116 | 117 | if self.use_cupy and not input_is_cupy: 118 | result = cp.asnumpy(result) 119 | return result 120 | 121 | def main_Tdot(self, v): 122 | X = self.X_main 123 | if self.use_mkl: 124 | result = mkl_csr_matvec(X, v, transpose=True) 125 | else: 126 | result = X.T.dot(v) 127 | sum = cp.sum if self.use_cupy else np.sum 128 | result -= sum(v) * self.column_offset 129 | return result 130 | 131 | def compute_fisher_info(self, weight, diag_only=False): 132 | """ Compute $X^T W X$ where W is the diagonal matrix of a given weight.""" 133 | 134 | if diag_only: 135 | return self.compute_fisher_diag(weight) 136 | 137 | weight_mat = self.create_diag_matrix(weight) 138 | X = self.X_main 139 | X_T = X.T 140 | weighted_X = weight_mat.dot(X).tocsc() 141 | 142 | n_pred = self.shape[1] 143 | fisher_info = np.zeros((n_pred, n_pred)) 144 | if self.intercept_added: 145 | fisher_info[0, 0] = np.sum(weight) 146 | fisher_info[0, 1:] \ 147 | = weighted_X.sum(0) - np.sum(weight) * self.column_offset 148 | fisher_info[1:, 0] = fisher_info[0, 1:] 149 | fisher_info_wo_intercept = fisher_info[1:, 1:] 150 | else: 151 | fisher_info_wo_intercept = fisher_info 152 | 153 | fisher_info_wo_intercept += X_T.dot(weighted_X).toarray() 154 | if self.centered: 155 | outer_prod_term = np.outer( 156 | self.column_offset, weighted_X.sum(0) 157 | ) 158 | fisher_info_wo_intercept -= outer_prod_term + outer_prod_term.T 159 | fisher_info_wo_intercept \ 160 | += np.sum(weight) * np.outer(self.column_offset, self.column_offset) 161 | 162 | return fisher_info 163 | 164 | def compute_fisher_diag(self, weight): 165 | 166 | weight_mat = self.create_diag_matrix(weight) 167 | diag = weight_mat.dot(self.X_main.power(2)).sum(0) 168 | if self.centered: 169 | weighted_X = weight_mat.dot(self.X_main).tocsc() 170 | diag -= 2 * self.column_offset \ 171 | * np.squeeze(np.asarray(weighted_X.sum(0))) 172 | diag += np.sum(weight) * self.column_offset ** 2 173 | diag = np.squeeze(np.asarray(diag)) 174 | if self.intercept_added: 175 | diag = np.concatenate(([np.sum(weight)], diag)) 176 | 177 | return diag 178 | 179 | def create_diag_matrix(self, v): 180 | return sparse.dia_matrix((v, 0), (len(v), len(v))) 181 | 182 | def compute_transposed_fisher_info(self, weight, include_intrcpt=False): 183 | X = self.X_main 184 | weight_ex_intrcpt = weight[1:] if include_intrcpt else weight 185 | weight_mat = self.create_diag_matrix(weight_ex_intrcpt) 186 | weighted_X_T = weight_mat.dot(X.T).tocsc() 187 | transposed_fisher_info = X.dot(weighted_X_T).toarray() 188 | offset_weight_X = self.column_offset @ weighted_X_T 189 | if self.centered: 190 | transposed_fisher_info -= offset_weight_X[np.newaxis, :] 191 | transposed_fisher_info -= offset_weight_X[:, np.newaxis] 192 | transposed_fisher_info \ 193 | += np.sum(weight_ex_intrcpt * self.column_offset ** 2) 194 | if include_intrcpt: 195 | transposed_fisher_info += weight[0] 196 | return transposed_fisher_info 197 | 198 | def toarray(self): 199 | X = self.X_main.toarray() - self.column_offset[np.newaxis, :] 200 | if self.intercept_added: 201 | X = np.hstack((np.ones(X.shape[0]), X)) 202 | return X 203 | 204 | def extract_matrix(self, order=None): 205 | pass 206 | -------------------------------------------------------------------------------- /bayesbridge/random/polya_gamma/polya_gamma.pyx: -------------------------------------------------------------------------------- 1 | # cython: cdivision = True 2 | from libc.math cimport exp, log, sqrt, fabs, M_PI 3 | import random 4 | import cython 5 | import numpy as np 6 | cimport numpy as np 7 | from numpy.random import PCG64 8 | from numpy.random.bit_generator cimport BitGenerator 9 | from .scipy_ndtr cimport log_ndtr as normal_logcdf 10 | from bayesbridge.random.normal.normal cimport random_normal 11 | from bayesbridge.random.uniform.uniform cimport random_uniform 12 | 13 | 14 | 15 | cdef class PolyaGammaDist(): 16 | # Threshold below (and above) which the target density is bounded by inverse 17 | # Gaussian (and exponential) and have different analytical series expressions. 18 | cdef double THRESHOLD 19 | # Number of terms in the infinite alternating series beyond which to truncate. 20 | cdef int MAX_SERIES_TERMS 21 | cdef BitGenerator bitgen 22 | 23 | def __init__(self, seed=None): 24 | self.set_seed(seed) 25 | self.THRESHOLD = 2.0 / M_PI 26 | self.MAX_SERIES_TERMS = 100 27 | self.bitgen = PCG64(seed) 28 | 29 | def set_seed(self, seed): 30 | self.bitgen = PCG64(seed) 31 | 32 | def get_state(self): 33 | return self.bitgen.state 34 | 35 | def set_state(self, state): 36 | self.bitgen.state = state 37 | 38 | @cython.boundscheck(False) 39 | @cython.wraparound(False) 40 | def rand_polyagamma(self, shape, tilt): 41 | """ 42 | Sample from exponentially tilted Polya-Gamma distribution 43 | p(x | shape, tilt) \propto \exp(- tilt^2 / 2 * x) p(x | shape, tilt=0) 44 | via Devroye's alternatig series method. 45 | 46 | Parameters 47 | ---------- 48 | shape : numpy array of integers 49 | Distribution is defined for non-integer values but the implemented 50 | algorithm only support integer values. 51 | tilt : numpy array of doubles 52 | Sqrt of twice the negative tilting parameter, actually. 53 | """ 54 | 55 | if not isinstance(shape, np.ndarray) and isinstance(tilt, np.ndarray): 56 | raise TypeError('Input must be numpy arrays.') 57 | if not shape.size == tilt.size: 58 | raise ValueError('Input arrays must be of the same length.') 59 | if not np.issubdtype(shape.dtype, np.integer): 60 | raise ValueError('Shape parameter must be integers.') 61 | shape = shape.astype(np.intc) 62 | tilt = tilt.astype(np.double) 63 | result = np.zeros(shape.size, dtype=np.double) 64 | 65 | cdef int[:] shape_view = shape 66 | cdef double[:] tilt_view = tilt 67 | cdef double[:] result_view = result 68 | cdef long n_samples = shape_view.size 69 | cdef Py_ssize_t index, j 70 | for index in range(n_samples): 71 | for j in range(shape_view[index]): 72 | result_view[index] \ 73 | += self.rand_scalar_unit_shape_polyagamma(tilt_view[index]) 74 | return result 75 | 76 | @cython.boundscheck(False) 77 | @cython.wraparound(False) 78 | def rand_unit_shape_polyagamma(self, tilt): 79 | 80 | if not isinstance(tilt, np.ndarray): 81 | raise TypeError('Input must be numpy arrays.') 82 | tilt = tilt.astype(np.double) 83 | result = np.empty(tilt.size, dtype=np.double) 84 | 85 | cdef double[:] tilt_view = tilt 86 | cdef double[:] result_view = result 87 | cdef long n_samples = tilt_view.size 88 | cdef Py_ssize_t index 89 | for index in range(n_samples): 90 | result_view[index] \ 91 | = self.rand_scalar_unit_shape_polyagamma(tilt_view[index]) 92 | return result 93 | 94 | cdef double rand_scalar_unit_shape_polyagamma(self, double tilt): 95 | return .25 * self.rand_tilted_jocobi(.5 * fabs(tilt)) 96 | 97 | cdef double rand_tilted_jocobi(self, double tilt): 98 | """ 99 | Sample from tilted Jacobi distribution 100 | p(x | tilt) \propto \exp(- tilt^2 / 2 * x) p(x | 0) 101 | via Devroye's alternatig series method. 102 | """ 103 | cdef double X, U, proposal_density 104 | cdef bint accepted = False 105 | 106 | # Main sampling loop page 130 of the Windle PhD thesis 107 | while not accepted: 108 | X, proposal_density = self.rand_proposal(tilt) 109 | U = random_uniform(self.bitgen) * proposal_density 110 | accepted = self.decide_acceptability(U, X, proposal_density) 111 | 112 | return X 113 | 114 | cdef (double, double) rand_proposal(self, double tilt): 115 | # Many quantities here can be cached and reused in case of rejection, but 116 | # the acceptance rate is so high that it does not matter. 117 | cdef double exp_rate = .5 * tilt ** 2 + .125 * M_PI ** 2 118 | cdef double prob_to_right = self.calc_prob_to_right(tilt, exp_rate) 119 | if random_uniform(self.bitgen) < prob_to_right: 120 | X = self.rand_left_truncated_exp(1. / exp_rate, self.THRESHOLD) 121 | else: 122 | X = self.rand_right_truncated_unit_shape_invgauss(tilt, self.THRESHOLD) 123 | proposal_density = self.calc_next_term_in_series(0, X) 124 | return X, proposal_density 125 | 126 | cdef double calc_prob_to_right(self, double tilt, double exp_rate): 127 | cdef double log_mass_expo \ 128 | = - log(exp_rate) - exp_rate * self.THRESHOLD + log(.25 * M_PI) 129 | cdef double log_mass_invg_1 \ 130 | = - tilt + normal_logcdf( 131 | (self.THRESHOLD * tilt - 1.) / sqrt(self.THRESHOLD)) 132 | cdef double log_mass_invg_2 \ 133 | = tilt + normal_logcdf( 134 | - (self.THRESHOLD * tilt + 1.) / sqrt(self.THRESHOLD)) 135 | cdef double mass_ratio = ( 136 | exp(log_mass_invg_1 - log_mass_expo) 137 | + exp(log_mass_invg_2 - log_mass_expo) 138 | ) 139 | return 1.0 / (1.0 + mass_ratio) 140 | 141 | # Equations (12) and (13) of Polson, Scott, and Windle (2013) 142 | cdef double calc_next_term_in_series(self, int n, double x): 143 | cdef double log_result = log(M_PI * (n + 0.5)) 144 | if x <= self.THRESHOLD: 145 | log_result += - 1.5 * log(.5 * x * M_PI) - 2 * (n + 0.5) ** 2 / x 146 | else: 147 | log_result += - 0.5 * x * M_PI ** 2 * (n + 0.5) ** 2 148 | return exp(log_result) 149 | 150 | cdef bint decide_acceptability(self, double U, double X, double zeroth_term): 151 | 152 | cdef double partial_sum = zeroth_term 153 | cdef int n_summed = 1 154 | cdef int sign = -1 # Sign of the next term in the alternating sequence 155 | cdef bint acceted 156 | cdef bint is_determinate = False 157 | 158 | while not is_determinate: 159 | partial_sum += sign * self.calc_next_term_in_series(n_summed, X) 160 | n_summed += 1 161 | if sign == -1: 162 | if U <= partial_sum: 163 | accepted = True 164 | is_determinate = True 165 | else: # sign == 1 166 | if U > partial_sum: 167 | accepted = False 168 | is_determinate = True 169 | elif n_summed >= self.MAX_SERIES_TERMS: 170 | acceted = True # Take the partial sum lower-bound as the target 171 | is_determinate = True 172 | sign = - sign 173 | 174 | return accepted 175 | 176 | cdef double rand_left_truncated_exp(self, double scale, double trunc): 177 | return trunc - scale * log(1.0 - random_uniform(self.bitgen)) 178 | 179 | # Ref: "Simulation of truncated gamma variables" by Younshik Chung 180 | # Korean Journal of Computational & Applied Mathematics, 1998 181 | cdef double rand_left_truncated_chisq(self, double trunc): 182 | cdef double X, density_ratio 183 | cdef bint accepted = False 184 | while not accepted: 185 | X = self.rand_left_truncated_exp(2., trunc) 186 | density_ratio = sqrt(0.5 * M_PI / X) 187 | accepted = (random_uniform(self.bitgen) <= density_ratio) 188 | return X 189 | 190 | 191 | cdef double rand_right_truncated_unit_shape_invgauss(self, double rate, double trunc): 192 | # Shape parameter is assumed to be one. 193 | cdef double X 194 | cdef double mean = 1. / rate 195 | cdef bint accepted = False 196 | 197 | # Choose a better sampler depending on the input parameters 198 | if mean > trunc: 199 | # Algorithm 3 in Windle's PhD thesis, page 128 200 | while not accepted: 201 | X = 1.0 / self.rand_left_truncated_chisq(.5 * M_PI) 202 | accepted = (log(random_uniform(self.bitgen)) < - 0.5 * X * rate ** 2) 203 | else: 204 | while not accepted: 205 | X = self.rand_unit_shape_invgauss(mean) 206 | accepted = (X < trunc) 207 | return X 208 | 209 | cdef double rand_unit_shape_invgauss(self, double mean): 210 | cdef double V = random_normal(self.bitgen) ** 2 211 | cdef double X = mean + 0.5 * mean * ( 212 | mean * V - sqrt(4.0 * mean * V + mean ** 2 * V ** 2) 213 | ) 214 | if random_uniform(self.bitgen) > mean / (mean + X): 215 | X = mean ** 2 / X 216 | return X 217 | 218 | -------------------------------------------------------------------------------- /bayesbridge/gibbs_util.py: -------------------------------------------------------------------------------- 1 | import math 2 | import time 3 | from warnings import warn 4 | import numpy as np 5 | 6 | 7 | class SamplerOptions(): 8 | 9 | def __init__(self, coef_sampler_type, 10 | global_scale_update='sample', 11 | hmc_curvature_est_stabilized=False): 12 | """ 13 | Parameters 14 | ---------- 15 | coef_sampler_type : {'cholesky', 'cg', 'hmc'} 16 | global_scale_update : str, {'sample', 'optimize', None} 17 | hmc_curvature_est_stabilized : bool 18 | """ 19 | if coef_sampler_type not in ('cholesky', 'cg', 'hmc'): 20 | raise ValueError("Unsupported regression coefficient sampler.") 21 | self.coef_sampler_type = coef_sampler_type 22 | self.gscale_update = global_scale_update 23 | self.curvature_est_stabilized = hmc_curvature_est_stabilized 24 | 25 | def get_info(self): 26 | return { 27 | 'coef_sampler_type': self.coef_sampler_type, 28 | 'global_scale_update': self.gscale_update, 29 | 'hmc_curvature_est_stabilized': self.curvature_est_stabilized 30 | } 31 | 32 | @staticmethod 33 | def pick_default_and_create(coef_sampler_type, options, model_name, design): 34 | """ Initialize class with, if unspecified, an appropriate default 35 | sampling method based on the type and size of model. 36 | """ 37 | if options is None: 38 | options = {} 39 | 40 | if 'coef_sampler_type' in options: 41 | if coef_sampler_type is not None: 42 | warn("Duplicate specification of method for sampling " 43 | "regression coefficient. Will use the dictionary one.") 44 | coef_sampler_type = options['coef_sampler_type'] 45 | 46 | if coef_sampler_type not in (None, 'cholesky', 'cg', 'hmc'): 47 | raise ValueError("Unsupported sampler type.") 48 | 49 | if coef_sampler_type not in (None, 'cg') and design.use_cupy: 50 | raise ValueError("Only 'cg' sampler supported with cupy matrices.") 51 | 52 | if model_name in ('linear', 'logit'): 53 | 54 | n_obs, n_pred = design.shape 55 | if not design.is_sparse: 56 | preferred_method = 'cholesky' 57 | elif design.use_cupy: 58 | preferred_method = 'cg' 59 | else: 60 | # TODO: Make more informed choice between Cholesky and CG. 61 | frac = design.nnz / (n_obs * n_pred) 62 | fisher_info_cost = frac ** 2 * n_obs * n_pred ** 2 63 | cg_cost = design.nnz * 100. 64 | preferred_method = 'cg' if cg_cost < fisher_info_cost \ 65 | else 'cholesky' 66 | 67 | # TODO: Implement Woodbury-based Gaussian sampler. 68 | if n_pred > n_obs: 69 | warn("Sampler has not been optimized for 'small n' problem.") 70 | 71 | if coef_sampler_type is None: 72 | coef_sampler_type = preferred_method 73 | elif coef_sampler_type not in ('hmc', preferred_method): 74 | warn("Specified sampler may not be optimal. Worth experimenting " 75 | "with the '{:s}' option.".format(preferred_method)) 76 | 77 | else: 78 | if coef_sampler_type != 'hmc': 79 | warn("Specified sampler type is not supported for the {:s} " 80 | "model. Will use HMC instead.".format(model_name)) 81 | coef_sampler_type = 'hmc' 82 | 83 | options['coef_sampler_type'] = coef_sampler_type 84 | return SamplerOptions(**options) 85 | 86 | 87 | class MarkovChainManager(): 88 | 89 | def __init__(self, n_obs, n_pred, n_unshrunk, model_name): 90 | self.n_obs = n_obs 91 | self.n_pred = n_pred 92 | self.n_unshrunk = n_unshrunk 93 | self.model_name = model_name 94 | self._prev_timestamp = None # For status update during Gibbs 95 | self._curr_timestamp = None 96 | 97 | def merge_outputs(self, prev_samples, prev_mcmc_info, new_samples, new_mcmc_info): 98 | 99 | new_samples = { 100 | key: np.concatenate( 101 | (prev_samples[key], new_samples[key]), axis=-1 102 | ) for key in new_samples.keys() 103 | } 104 | 105 | for output_key in ['_reg_coef_sampling_info']: 106 | prev_output = prev_mcmc_info[output_key] 107 | next_output = new_mcmc_info[output_key] 108 | new_mcmc_info[output_key] = { 109 | key : np.concatenate( 110 | (prev_output[key], next_output[key]), axis=-1 111 | ) for key in prev_output.keys() 112 | } 113 | 114 | new_mcmc_info['n_iter'] += prev_mcmc_info['n_iter'] 115 | new_mcmc_info['runtime'] += prev_mcmc_info['runtime'] 116 | 117 | for output_key in ['_init_optim_info', 'seed']: 118 | new_mcmc_info[output_key] = prev_mcmc_info[output_key] 119 | 120 | return new_samples, new_mcmc_info 121 | 122 | def pre_allocate(self, samples, sampling_info, n_post_burnin, thin, params_to_save, sampling_method): 123 | 124 | n_sample = math.floor(n_post_burnin / thin) # Number of samples to keep 125 | 126 | if 'coef' in params_to_save: 127 | samples['coef'] = np.zeros((self.n_pred, n_sample)) 128 | 129 | if 'local_scale' in params_to_save: 130 | samples['local_scale'] = np.zeros((self.n_pred - self.n_unshrunk, n_sample)) 131 | 132 | if 'global_scale' in params_to_save: 133 | samples['global_scale'] = np.zeros(n_sample) 134 | 135 | if 'obs_prec' in params_to_save: 136 | if self.model_name == 'linear': 137 | samples['obs_prec'] = np.zeros(n_sample) 138 | elif self.model_name == 'logit': 139 | samples['obs_prec'] = np.zeros((self.n_obs, n_sample)) 140 | 141 | if 'logp' in params_to_save: 142 | samples['logp'] = np.zeros(n_sample) 143 | 144 | for key in self.get_sampling_info_keys(sampling_method): 145 | sampling_info[key] = np.zeros(n_sample) 146 | 147 | def get_sampling_info_keys(self, sampling_method): 148 | if sampling_method == 'cg': 149 | keys = ['n_cg_iter'] 150 | elif sampling_method in ['hmc', 'nuts']: 151 | keys = [ 152 | 'stepsize', 'n_hessian_matvec', 'n_grad_evals', 153 | 'stability_limit_est', 'stability_adjustment_factor', 154 | 'instability_detected' 155 | ] 156 | if sampling_method == 'hmc': 157 | keys += ['n_integrator_step', 'accepted', 'accept_prob'] 158 | else: 159 | keys += ['tree_height', 'ave_accept_prob'] 160 | else: 161 | keys = [] 162 | return keys 163 | 164 | def store_current_state( 165 | self, samples, mcmc_iter, n_burnin, thin, coef, lscale, 166 | gscale, obs_prec, logp, params_to_save): 167 | 168 | if mcmc_iter <= n_burnin or (mcmc_iter - n_burnin) % thin != 0: 169 | return 170 | 171 | index = math.floor((mcmc_iter - n_burnin) / thin) - 1 172 | 173 | if 'coef' in params_to_save: 174 | samples['coef'][:, index] = coef 175 | 176 | if 'local_scale' in params_to_save: 177 | samples['local_scale'][:, index] = lscale 178 | 179 | if 'global_scale' in params_to_save: 180 | samples['global_scale'][index] = gscale 181 | 182 | if 'obs_prec' in params_to_save: 183 | if self.model_name == 'linear': 184 | samples['obs_prec'][index] = obs_prec 185 | elif self.model_name == 'logit': 186 | samples['obs_prec'][:, index] = obs_prec 187 | 188 | if 'logp' in params_to_save: 189 | samples['logp'][index] = logp 190 | 191 | def store_sampling_info( 192 | self, sampling_info, info, mcmc_iter, n_burnin, thin, sampling_method): 193 | 194 | if mcmc_iter <= n_burnin or (mcmc_iter - n_burnin) % thin != 0: 195 | return 196 | 197 | index = math.floor((mcmc_iter - n_burnin) / thin) - 1 198 | for key in self.get_sampling_info_keys(sampling_method): 199 | sampling_info[key][index] = info[key] 200 | 201 | def pack_parameters(self, coef, obs_prec, lscale, gscale): 202 | state = { 203 | 'coef': coef, 204 | 'local_scale': lscale, 205 | 'global_scale': gscale, 206 | } 207 | if self.model_name in ('linear', 'logit'): 208 | state['obs_prec'] = obs_prec 209 | return state 210 | 211 | def stamp_time(self, curr_time): 212 | self._prev_timestamp = curr_time 213 | 214 | def print_status(self, n_status_update, mcmc_iter, n_iter, 215 | time_format='minute'): 216 | 217 | if n_status_update == 0: 218 | return 219 | n_iter_per_update = int(n_iter / n_status_update) 220 | if mcmc_iter % n_iter_per_update != 0: 221 | return 222 | 223 | self._curr_timestamp = time.time() 224 | 225 | time_elapsed = self._curr_timestamp - self._prev_timestamp 226 | if time_format == 'second': 227 | time_str = "{:.3g} seconds".format(time_elapsed) 228 | elif time_format == 'minute': 229 | time_str = "{:.3g} minutes".format(time_elapsed / 60) 230 | else: 231 | raise ValueError() 232 | 233 | msg = " ".join(( 234 | "{:d} Gibbs iterations complete:".format(mcmc_iter), 235 | time_str, "has elasped since the last update." 236 | )) 237 | print(msg) 238 | self._prev_timestamp = self._curr_timestamp -------------------------------------------------------------------------------- /bayesbridge/random/polya_gamma/scipy_ndtr.c: -------------------------------------------------------------------------------- 1 | /* Scipy port of a Cephes Library component, suitably modified to be stand-alone. */ 2 | 3 | /* 4 | * 5 | * Normal distribution function 6 | * 7 | * 8 | * 9 | * SYNOPSIS: 10 | * 11 | * double x, y, ndtr(); 12 | * 13 | * y = ndtr( x ); 14 | * 15 | * 16 | * 17 | * DESCRIPTION: 18 | * 19 | * Returns the area under the Gaussian probability density 20 | * function, integrated from minus infinity to x: 21 | * 22 | * x 23 | * - 24 | * 1 | | 2 25 | * ndtr(x) = --------- | exp( - t /2 ) dt 26 | * sqrt(2pi) | | 27 | * - 28 | * -inf. 29 | * 30 | * = ( 1 + erf(z) ) / 2 31 | * = erfc(z) / 2 32 | * 33 | * where z = x/sqrt(2). Computation is via the functions 34 | * erf and erfc. 35 | * 36 | * 37 | * ACCURACY: 38 | * 39 | * Relative error: 40 | * arithmetic domain # trials peak rms 41 | * IEEE -13,0 30000 3.4e-14 6.7e-15 42 | * 43 | * 44 | * ERROR MESSAGES: 45 | * 46 | * message condition value returned 47 | * erfc underflow x > 37.519379347 0.0 48 | * 49 | */ 50 | /* erf.c 51 | * 52 | * Error function 53 | * 54 | * 55 | * 56 | * SYNOPSIS: 57 | * 58 | * double x, y, erf(); 59 | * 60 | * y = erf( x ); 61 | * 62 | * 63 | * 64 | * DESCRIPTION: 65 | * 66 | * The integral is 67 | * 68 | * x 69 | * - 70 | * 2 | | 2 71 | * erf(x) = -------- | exp( - t ) dt. 72 | * sqrt(pi) | | 73 | * - 74 | * 0 75 | * 76 | * For 0 <= |x| < 1, erf(x) = x * P4(x**2)/Q5(x**2); otherwise 77 | * erf(x) = 1 - erfc(x). 78 | * 79 | * 80 | * 81 | * ACCURACY: 82 | * 83 | * Relative error: 84 | * arithmetic domain # trials peak rms 85 | * IEEE 0,1 30000 3.7e-16 1.0e-16 86 | * 87 | */ 88 | /* erfc.c 89 | * 90 | * Complementary error function 91 | * 92 | * 93 | * 94 | * SYNOPSIS: 95 | * 96 | * double x, y, erfc(); 97 | * 98 | * y = erfc( x ); 99 | * 100 | * 101 | * 102 | * DESCRIPTION: 103 | * 104 | * 105 | * 1 - erf(x) = 106 | * 107 | * inf. 108 | * - 109 | * 2 | | 2 110 | * erfc(x) = -------- | exp( - t ) dt 111 | * sqrt(pi) | | 112 | * - 113 | * x 114 | * 115 | * 116 | * For small x, erfc(x) = 1 - erf(x); otherwise rational 117 | * approximations are computed. 118 | * 119 | * 120 | * 121 | * ACCURACY: 122 | * 123 | * Relative error: 124 | * arithmetic domain # trials peak rms 125 | * IEEE 0,26.6417 30000 5.7e-14 1.5e-14 126 | */ 127 | 128 | 129 | /* 130 | * Cephes Math Library Release 2.2: June, 1992 131 | * Copyright 1984, 1987, 1988, 1992 by Stephen L. Moshier 132 | * Direct inquiries to 30 Frost Street, Cambridge, MA 02140 133 | */ 134 | 135 | #include 136 | 137 | static double P[] = { 138 | 2.46196981473530512524E-10, 139 | 5.64189564831068821977E-1, 140 | 7.46321056442269912687E0, 141 | 4.86371970985681366614E1, 142 | 1.96520832956077098242E2, 143 | 5.26445194995477358631E2, 144 | 9.34528527171957607540E2, 145 | 1.02755188689515710272E3, 146 | 5.57535335369399327526E2 147 | }; 148 | 149 | static double Q[] = { 150 | /* 1.00000000000000000000E0, */ 151 | 1.32281951154744992508E1, 152 | 8.67072140885989742329E1, 153 | 3.54937778887819891062E2, 154 | 9.75708501743205489753E2, 155 | 1.82390916687909736289E3, 156 | 2.24633760818710981792E3, 157 | 1.65666309194161350182E3, 158 | 5.57535340817727675546E2 159 | }; 160 | 161 | static double R[] = { 162 | 5.64189583547755073984E-1, 163 | 1.27536670759978104416E0, 164 | 5.01905042251180477414E0, 165 | 6.16021097993053585195E0, 166 | 7.40974269950448939160E0, 167 | 2.97886665372100240670E0 168 | }; 169 | 170 | static double S[] = { 171 | /* 1.00000000000000000000E0, */ 172 | 2.26052863220117276590E0, 173 | 9.39603524938001434673E0, 174 | 1.20489539808096656605E1, 175 | 1.70814450747565897222E1, 176 | 9.60896809063285878198E0, 177 | 3.36907645100081516050E0 178 | }; 179 | 180 | static double T[] = { 181 | 9.60497373987051638749E0, 182 | 9.00260197203842689217E1, 183 | 2.23200534594684319226E3, 184 | 7.00332514112805075473E3, 185 | 5.55923013010394962768E4 186 | }; 187 | 188 | static double U[] = { 189 | /* 1.00000000000000000000E0, */ 190 | 3.35617141647503099647E1, 191 | 5.21357949780152679795E2, 192 | 4.59432382970980127987E3, 193 | 2.26290000613890934246E4, 194 | 4.92673942608635921086E4 195 | }; 196 | 197 | #define UTHRESH 37.519379347 198 | #define DBL_EPSILON 2.2204460492503131e-16 199 | #define NPY_SQRT1_2 0.707106781186547524400844362104849039 /* 1/sqrt(2) */ 200 | #define MAXLOG 7.09782712893383996843E2 /* log(2**1024) */ 201 | #define M_PI 3.14159265358979323846264338327950288 202 | 203 | 204 | double ndtr(double a); 205 | double erf(double x); 206 | double polevl(double x, const double coef[], int N); 207 | double p1evl(double x, const double coef[], int N); 208 | 209 | 210 | double ndtr(double a) 211 | { 212 | double x, y, z; 213 | 214 | // if (cephes_isnan(a)) { 215 | // sf_error("ndtr", SF_ERROR_DOMAIN, NULL); 216 | // return (NPY_NAN); 217 | // } 218 | 219 | x = a * NPY_SQRT1_2; 220 | z = fabs(x); 221 | 222 | if (z < NPY_SQRT1_2) 223 | y = 0.5 + 0.5 * erf(x); 224 | 225 | else { 226 | y = 0.5 * erfc(z); 227 | 228 | if (x > 0) 229 | y = 1.0 - y; 230 | } 231 | 232 | return (y); 233 | } 234 | 235 | 236 | double erfc(double a) 237 | { 238 | double p, q, x, y, z; 239 | 240 | // if (cephes_isnan(a)) { 241 | // sf_error("erfc", SF_ERROR_DOMAIN, NULL); 242 | // return (NPY_NAN); 243 | // } 244 | 245 | if (a < 0.0) 246 | x = -a; 247 | else 248 | x = a; 249 | 250 | if (x < 1.0) 251 | return (1.0 - erf(a)); 252 | 253 | z = -a * a; 254 | 255 | if (z < -MAXLOG) { 256 | under: 257 | // sf_error("erfc", SF_ERROR_UNDERFLOW, NULL); 258 | if (a < 0) 259 | return (2.0); 260 | else 261 | return (0.0); 262 | } 263 | 264 | z = exp(z); 265 | 266 | if (x < 8.0) { 267 | p = polevl(x, P, 8); 268 | q = p1evl(x, Q, 8); 269 | } 270 | else { 271 | p = polevl(x, R, 5); 272 | q = p1evl(x, S, 6); 273 | } 274 | y = (z * p) / q; 275 | 276 | if (a < 0) 277 | y = 2.0 - y; 278 | 279 | if (y == 0.0) 280 | goto under; 281 | 282 | return (y); 283 | } 284 | 285 | 286 | double erf(double x) 287 | { 288 | double y, z; 289 | 290 | // if (cephes_isnan(x)) { 291 | // sf_error("erf", SF_ERROR_DOMAIN, NULL); 292 | // return (NPY_NAN); 293 | // } 294 | 295 | if (x < 0.0) { 296 | // original implementation used -x instead of fabs(), but led to breaking behavior on some platforms. 297 | // see https://github.com/aki-nishimura/bayes-bridge/pull/7 for more info. 298 | return -erf(fabs(x)); 299 | } 300 | 301 | if (fabs(x) > 1.0) 302 | return (1.0 - erfc(x)); 303 | z = x * x; 304 | 305 | y = x * polevl(z, T, 4) / p1evl(z, U, 5); 306 | return (y); 307 | 308 | } 309 | 310 | 311 | double polevl(double x, const double coef[], int N) 312 | { 313 | double ans; 314 | int i; 315 | const double *p; 316 | 317 | p = coef; 318 | ans = *p++; 319 | i = N; 320 | 321 | do 322 | ans = ans * x + *p++; 323 | while (--i); 324 | 325 | return (ans); 326 | } 327 | 328 | /* p1evl() */ 329 | /* N 330 | * Evaluate polynomial when coefficient of x is 1.0. 331 | * Otherwise same as polevl. 332 | */ 333 | 334 | double p1evl(double x, const double coef[], int N) 335 | { 336 | double ans; 337 | const double *p; 338 | int i; 339 | 340 | p = coef; 341 | ans = x + *p++; 342 | i = N - 1; 343 | 344 | do 345 | ans = ans * x + *p++; 346 | while (--i); 347 | 348 | return (ans); 349 | } 350 | 351 | /* 352 | * double log_ndtr(double a) 353 | * 354 | * For a > -20, use the existing ndtr technique and take a log. 355 | * for a <= -20, we use the Taylor series approximation of erf to compute 356 | * the log CDF directly. The Taylor series consists of two parts which we will name "left" 357 | * and "right" accordingly. The right part involves a summation which we compute until the 358 | * difference in terms falls below the machine-specific EPSILON. 359 | * 360 | * \Phi(z) &=& 361 | * \frac{e^{-z^2/2}}{-z\sqrt{2\pi}} * [1 + \sum_{n=1}^{N-1} (-1)^n \frac{(2n-1)!!}{(z^2)^n}] 362 | * + O(z^{-2N+2}) 363 | * = [\mbox{LHS}] * [\mbox{RHS}] + \mbox{error}. 364 | * 365 | */ 366 | 367 | double log_ndtr(double a) 368 | { 369 | 370 | double log_LHS, /* we compute the left hand side of the approx (LHS) in one shot */ 371 | last_total = 0, /* variable used to check for convergence */ 372 | right_hand_side = 1, /* includes first term from the RHS summation */ 373 | numerator = 1, /* numerator for RHS summand */ 374 | denom_factor = 1, /* use reciprocal for denominator to avoid division */ 375 | denom_cons = 1.0 / (a * a); /* the precomputed division we use to adjust the denominator */ 376 | long sign = 1, i = 0; 377 | 378 | if (a > 6) { 379 | return -ndtr(-a); /* log(1+x) \approx x */ 380 | } 381 | if (a > -20) { 382 | return log(ndtr(a)); 383 | } 384 | log_LHS = -0.5 * a * a - log(-a) - 0.5 * log(2 * M_PI); 385 | 386 | while (fabs(last_total - right_hand_side) > DBL_EPSILON) { 387 | i += 1; 388 | last_total = right_hand_side; 389 | sign = -sign; 390 | denom_factor *= denom_cons; 391 | numerator *= 2 * i - 1; 392 | right_hand_side += sign * numerator * denom_factor; 393 | 394 | } 395 | return log_LHS + log(right_hand_side); 396 | } 397 | -------------------------------------------------------------------------------- /bayesbridge/prior.py: -------------------------------------------------------------------------------- 1 | import math 2 | from warnings import warn 3 | import numpy as np 4 | import scipy as sp 5 | from scipy.special import polygamma as scipy_polygamma 6 | 7 | class RegressionCoefPrior(): 8 | 9 | def __init__( 10 | self, 11 | bridge_exponent=.5, 12 | n_fixed_effect=0, 13 | sd_for_intercept=float('inf'), 14 | sd_for_fixed_effect=float('inf'), 15 | regularizing_slab_size=float('inf'), 16 | global_scale_prior_hyper_param=None, 17 | _global_scale_parametrization='coef_magnitude' 18 | ): 19 | """ Encapisulate prior information for BayesBridge. 20 | 21 | Parameters 22 | ---------- 23 | bridge_exponent : float < 2 24 | Exponent of the bridge prior on regression coefficients. For example, 25 | the value of 2 (albeit unsupported) would correspond to Gaussian prior 26 | and of 1 double-exponential as in Bayesian Lasso. 27 | n_fixed_effect : int 28 | Number of predictors --- other than intercept and placed at the 29 | first columns of the design matrices --- whose coefficients are 30 | estimated with Gaussian priors of pre-specified standard 31 | deviation(s). 32 | sd_for_intercept : float 33 | Standard deviation of Gaussian prior on the intercept. `Inf` 34 | corresponds to an uninformative flat prior. 35 | sd_for_fixed_effect : float, numpy array 36 | Standard deviation(s) of Gaussian prior(s) on fixed effects. 37 | If an array, the length must be the same as `n_fixed_effect`. 38 | `Inf` corresponds to an uninformative flat prior. 39 | regularizing_slab_size : float 40 | Standard deviation of the Gaussian tail-regularizer on 41 | the bridge prior. Used to impose soft prior constraints on a 42 | range of regression coefficients in case the data provides limited 43 | information (e.g. when complete separation occurs). One may, for 44 | example, set the slab size by first choosing a value which 45 | regression coefficients are very unlikely to exceed in magnitude and 46 | then dividing the value by 1.96. 47 | global_scale_prior_hyper_param : dict, None 48 | Should contain pair of keys 'log10_mean' and 'log10_sd', 49 | specifying the prior mean and standard deviation of 50 | log10(global_scale). If None, the default reference prior for a 51 | scale parameter is used. 52 | 53 | Other Parameters 54 | ---------------- 55 | _global_scale_parametrization: str, {'raw', 'coef_magnitude'} 56 | If 'coef_magnitude', scale the local and global scales so that the 57 | global scale parameter coincide with the prior expected 58 | magnitude of regression coefficients. 59 | """ 60 | if not (np.isscalar(sd_for_fixed_effect) 61 | or n_fixed_effect == len(sd_for_fixed_effect)): 62 | raise ValueError( 63 | "Prior sd for fixed effects must be specified either by a " 64 | "scalar or array of the same length as n_fixed_effect." 65 | ) 66 | if bridge_exponent > 2: 67 | raise ValueError("Exponent larger than 2 is unsupported.") 68 | 69 | if np.isscalar(sd_for_fixed_effect): 70 | sd_for_fixed_effect = sd_for_fixed_effect * np.ones(n_fixed_effect) 71 | self.sd_for_intercept = sd_for_intercept 72 | self.sd_for_fixed = sd_for_fixed_effect 73 | self.slab_size = regularizing_slab_size 74 | self.n_fixed = n_fixed_effect 75 | self.bridge_exp = bridge_exponent 76 | self._gscale_paramet = _global_scale_parametrization 77 | if global_scale_prior_hyper_param is None: 78 | self.param = { 79 | 'gscale_neg_power': {'shape': 0., 'rate': 0.}, 80 | # Reference prior for a scale family. 81 | 'gscale': None 82 | } 83 | 84 | else: 85 | keys = global_scale_prior_hyper_param.keys() 86 | if not ({'log10_mean', 'log10_sd'} <= keys): 87 | raise ValueError( 88 | "Dictionary should contain keys 'log10_mean' and 'log10_sd.'" 89 | ) 90 | log10_mean = global_scale_prior_hyper_param['log10_mean'] 91 | log10_sd = global_scale_prior_hyper_param['log10_sd'] 92 | shape, rate = self.solve_for_gscale_prior_hyperparam( 93 | log10_mean, log10_sd, bridge_exponent, self._gscale_paramet 94 | ) 95 | self.param = { 96 | 'gscale_neg_power': {'shape': shape, 'rate': rate}, 97 | 'gscale': {'log10_mean': log10_mean, 'log10_sd': log10_sd} 98 | } # Hyper-parameters on the negative power are specified in 99 | # terms of the 'raw' parametrization. 100 | 101 | def get_info(self): 102 | sd_for_fixed = self.sd_for_fixed 103 | if len(sd_for_fixed) > 0 and np.all(sd_for_fixed == sd_for_fixed[0]): 104 | sd_for_fixed = sd_for_fixed[0] 105 | info = { 106 | 'bridge_exponent': self.bridge_exp, 107 | 'n_fixed_effect': self.n_fixed, 108 | 'sd_for_intercept': self.sd_for_intercept, 109 | 'sd_for_fixed_effect': sd_for_fixed, 110 | 'regularizing_slab_size': self.slab_size, 111 | 'global_scale_prior_hyper_param': self.param['gscale'], 112 | '_global_scale_parametrization': self._gscale_paramet 113 | } 114 | return info 115 | 116 | def clone(self, **kwargs): 117 | """ Make a clone with only specified attributes modified. """ 118 | info = self.get_info() 119 | if '_global_scale_parametrization' in kwargs: 120 | raise ValueError("Change of parametrization is not supported.") 121 | for key in kwargs.keys(): 122 | if key in info: 123 | info[key] = kwargs[key] 124 | else: 125 | warn("'{:s} is not a valid keyward argument.".format(key)) 126 | return RegressionCoefPrior(**info) 127 | 128 | def adjust_scale(self, gscale, lscale, to): 129 | unit_bridge_magnitude \ 130 | = self.compute_power_exp_ave_magnitude(self.bridge_exp, 1.) 131 | if to == 'raw': 132 | gscale /= unit_bridge_magnitude 133 | lscale *= unit_bridge_magnitude 134 | elif to == 'coef_magnitude': 135 | gscale *= unit_bridge_magnitude 136 | lscale /= unit_bridge_magnitude 137 | else: 138 | raise ValueError() 139 | return gscale, lscale 140 | 141 | def solve_for_gscale_prior_hyperparam( 142 | self, log10_mean, log10_sd, bridge_exp, gscale_paramet): 143 | log_mean = self.change_log_base(log10_mean, from_=10., to=math.e) 144 | log_sd = self.change_log_base(log10_sd, from_=10., to=math.e) 145 | if gscale_paramet == 'coef_magnitude': 146 | unit_bridge_magnitude \ 147 | = self.compute_power_exp_ave_magnitude(bridge_exp, 1.) 148 | log_mean -= math.log(unit_bridge_magnitude) 149 | shape, rate = self.solve_for_gamma_param( 150 | log_mean, log_sd, bridge_exp 151 | ) 152 | return shape, rate 153 | 154 | @staticmethod 155 | def compute_power_exp_ave_magnitude(exponent, scale=1.): 156 | """ Returns the expected absolute value of a random variable with 157 | density proportional to exp( - |x / scale|^exponent ). 158 | """ 159 | return scale * math.gamma(2 / exponent) / math.gamma(1 / exponent) 160 | 161 | @staticmethod 162 | def change_log_base(val, from_=math.e, to=10.): 163 | return val * math.log(from_) / math.log(to) 164 | 165 | def solve_for_gamma_param(self, log_mean, log_sd, bridge_exp): 166 | """ Find hyper-parameters matching specified mean and sd in log scale. 167 | 168 | Determine the shape and rate parameters of a Gamma prior on 169 | phi = gscale ** (- 1 / bridge_exp) 170 | so that the mean and sd of log(phi) coincide with log_mean and log_sd. 171 | The calculations are done in the 'raw' parametrization of gscale, 172 | as opposed to the 'coef_magnitude' parametrization. 173 | """ 174 | 175 | f = lambda log_shape: ( 176 | math.sqrt(self._polygamma(1, math.exp(log_shape))) / bridge_exp 177 | - log_sd 178 | ) # Function whose root coincides with the desired log-shape parameter. 179 | lower_lim = -10. # Any sufficiently small number is fine. 180 | if log_sd < 0: 181 | raise ValueError("Variance has to be positive.") 182 | elif log_sd > 10 ** 8: 183 | raise ValueError("Specified prior variance is too large.") 184 | lower, upper = self._find_root_bounds(f, lower_lim) 185 | 186 | try: 187 | log_shape = sp.optimize.brentq(f, lower, upper) 188 | except BaseException as error: 189 | print('Solving for the global scale gamma prior hyper-parameters ' 190 | 'failed; {}'.format(error)) 191 | shape = math.exp(log_shape) 192 | rate = math.exp( 193 | self._polygamma(0, shape) + bridge_exp * log_mean 194 | ) 195 | return shape, rate 196 | 197 | @staticmethod 198 | def _polygamma(n, x): 199 | """ Wrap the scipy function so that it returns a scalar. """ 200 | return scipy_polygamma([n], x)[0] 201 | 202 | @staticmethod 203 | def _find_root_bounds(f, init_lower_lim, increment=5., max_lim=None): 204 | if max_lim is None: 205 | max_lim = init_lower_lim + 10 ** 4 206 | if f(init_lower_lim) < 0: 207 | raise ValueError( 208 | "Objective function must have positive value " 209 | "at the lower limit." 210 | ) 211 | lower_lim = init_lower_lim 212 | while f(lower_lim + increment) > 0 and lower_lim < max_lim: 213 | lower_lim += increment 214 | if lower_lim >= max_lim: 215 | raise Exception() # Replace with a warning. 216 | upper_lim = lower_lim + increment 217 | return (lower_lim, upper_lim) -------------------------------------------------------------------------------- /tests/manual_tests/test_stepsize_adapter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import sys\n", 10 | "sys.path.append('..')\n", 11 | "sys.path.append('../..')" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np\n", 21 | "import scipy as sp\n", 22 | "import matplotlib.pyplot as plt\n", 23 | "%matplotlib inline" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "from distributions import BivariateGaussian, BivariateSkewNormal\n", 33 | "from bayesbridge.reg_coef_sampler.stepsize_adapter \\\n", 34 | " import StepsizeAdapter, RobbinsMonroStepsizer, DualAverageStepsizeAdapter" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "## Define an adaptive RWMH operator and the target distribution." 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "def random_walk_MH_step(f, theta0, logp0, prop_sd):\n", 51 | " \"\"\"\n", 52 | " Params\n", 53 | " ------\n", 54 | " f : function\n", 55 | " Computes the log density of the target density\n", 56 | " prop_sd : scalar or vector\n", 57 | " Proposal standard deviation\n", 58 | " \"\"\"\n", 59 | " theta = theta0.copy()\n", 60 | " theta += prop_sd * np.random.randn(len(theta0))\n", 61 | " logp = f(theta)\n", 62 | " accept_prob = min(1, np.exp(logp - logp0))\n", 63 | " accepted = accept_prob > np.random.uniform()\n", 64 | " if not accepted:\n", 65 | " theta = theta0\n", 66 | " logp = logp0\n", 67 | " return theta, logp, accept_prob, accepted\n", 68 | "\n", 69 | "def adaptive_random_walk_MH(\n", 70 | " f, n_iter, x0, stepsize0, target_accept_prob=.9,\n", 71 | " adapt_method='robbins-monro'\n", 72 | " ):\n", 73 | " \n", 74 | " logp = f(x0)\n", 75 | " x = x0.copy()\n", 76 | " stepsize = stepsize0\n", 77 | " \n", 78 | " # Pre-allocate\n", 79 | " samples = np.zeros((len(x0), n_iter))\n", 80 | " stepsizes = np.zeros(n_iter)\n", 81 | " ave_stepsizes = np.zeros(n_iter)\n", 82 | " accept_probs = np.zeros(n_iter)\n", 83 | " \n", 84 | " if adapt_method == 'dual-average':\n", 85 | " adapter = DualAverageStepsizeAdapter(\n", 86 | " stepsize0, target_accept_prob\n", 87 | " )\n", 88 | " else:\n", 89 | " adapter = StepsizeAdapter(\n", 90 | " stepsize0, target_accept_prob, \n", 91 | " reference_iteration=n_iter, \n", 92 | " adaptsize_at_reference=0.05\n", 93 | " )\n", 94 | " for i in range(n_iter):\n", 95 | " \n", 96 | " x, logp, accept_prob, _ = \\\n", 97 | " random_walk_MH_step(f, x, logp, stepsize)\n", 98 | " stepsize = adapter.adapt_stepsize(accept_prob)\n", 99 | " \n", 100 | " samples[:, i] = x\n", 101 | " stepsizes[i] = stepsize\n", 102 | " ave_stepsizes[i] = adapter.get_current_stepsize(averaged=True)\n", 103 | " accept_probs[i] = accept_prob\n", 104 | " \n", 105 | " return samples, stepsizes, ave_stepsizes, accept_probs" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "bi_skewnorm = BivariateSkewNormal()\n", 115 | "def f(x):\n", 116 | " return bi_skewnorm.compute_logp_and_gradient(x, logp_only=True)[0]" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "## Compare different adaptation schedule for Robbins-Monro algorithm." 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "plt.figure(figsize=(7, 4.5))\n", 133 | "plt.rcParams['font.size'] = 18\n", 134 | "\n", 135 | "for decay_exponent in (1., 2 / 3, 1 / 2):\n", 136 | " rm_stepsizer = RobbinsMonroStepsizer(\n", 137 | " init=1., decay_exponent=decay_exponent,\n", 138 | " reference_iteration=100, size_at_reference=.05)\n", 139 | " adaptation_sizes = rm_stepsizer.calculate_stepsize(np.arange(500))\n", 140 | " plt.plot(np.log10(adaptation_sizes), label='Exponent = {:.2f}'.format(decay_exponent))\n", 141 | " \n", 142 | "plt.ylabel(r'$\\log_{10}$(adaptation size)')\n", 143 | "plt.xlabel('Number of adaptation steps')\n", 144 | "plt.legend()\n", 145 | "plt.show()" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "## Run an adaptive MCMC with Robbins-Monro adaptation." 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "stepsize0 = 1\n", 162 | "x0 = np.array([0., 0.])\n", 163 | "n_iter = 5 * 10 ** 4\n", 164 | "\n", 165 | "samples, stepsizes, ave_stepsizes, accept_probs = adaptive_random_walk_MH(\n", 166 | " f, n_iter, x0, stepsize0, target_accept_prob=.9, \n", 167 | " adapt_method='robbins-monro',\n", 168 | ")" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "print('The average acceptance probability is {:.2f}.'.format(np.mean(accept_probs)))" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "#### Take a look at the empirical distribution: stationary distribution may be perturbed a bit due to adaptation." 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "plt.figure(figsize=(7, 4.5))\n", 194 | "plt.rcParams['font.size'] = 20\n", 195 | "\n", 196 | "grid = np.linspace(-4, 4, 101)\n", 197 | "marginal_pdf = bi_skewnorm.compute_marginal_pdf(grid, grid)\n", 198 | "\n", 199 | "for axis in range(2):\n", 200 | " color = 'C' + str(axis)\n", 201 | " plt.hist(samples[axis, int(n_iter / 2):], \n", 202 | " alpha=.5, bins=21, normed=True,\n", 203 | " color=color)\n", 204 | " plt.plot(grid, marginal_pdf[axis], color=color)\n", 205 | "plt.show()" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "#### Plot the sequence of stepsizes used at each MCMC iteration as well as the average." 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "plt.figure(figsize=(14, 4.5))\n", 222 | "\n", 223 | "plt.plot(np.log10(stepsizes), label='M-H stepsize')\n", 224 | "plt.plot(np.log10(ave_stepsizes), label='averaged stepsize')\n", 225 | "plt.ylim([-2.1, -.4])\n", 226 | "\n", 227 | "plt.legend()\n", 228 | "plt.show()" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "## Run an adaptive MCMC with dual-averaging algorithm." 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "stepsize0 = .1\n", 245 | "x0 = np.array([0., 0.])\n", 246 | "n_iter = 5 * 10 ** 4\n", 247 | "samples, stepsizes, ave_stepsizes, accept_probs = adaptive_random_walk_MH(\n", 248 | " f, n_iter, x0, stepsize0, target_accept_prob=.9, \n", 249 | " adapt_method='dual-average',\n", 250 | ")" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "print('The average acceptance probability is {:.2f}.'.format(np.mean(accept_probs)))" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "#### Take a look at the empirical distribution: stationary distribution may be perturbed a bit due to adaptation." 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "plt.figure(figsize=(7, 4.5))\n", 276 | "plt.rcParams['font.size'] = 20\n", 277 | "\n", 278 | "grid = np.linspace(-4, 4, 101)\n", 279 | "marginal_pdf = bi_skewnorm.compute_marginal_pdf(grid, grid)\n", 280 | "\n", 281 | "for axis in range(2):\n", 282 | " color = 'C' + str(axis)\n", 283 | " plt.hist(samples[axis, int(n_iter / 2):], \n", 284 | " alpha=.5, bins=21, normed=True,\n", 285 | " color=color)\n", 286 | " plt.plot(grid, marginal_pdf[axis], color=color)\n", 287 | "plt.show()" 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": {}, 293 | "source": [ 294 | "#### Plot the sequence of stepsizes used at each MCMC iteration as well as the average." 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": null, 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [ 303 | "plt.figure(figsize=(14, 4.5))\n", 304 | "\n", 305 | "plt.plot(np.log10(stepsizes), label='M-H stepsize')\n", 306 | "plt.plot(np.log10(ave_stepsizes), label='averaged stepsize')\n", 307 | "plt.ylim([-2.1, -.4])\n", 308 | "\n", 309 | "plt.legend()\n", 310 | "plt.show()" 311 | ] 312 | } 313 | ], 314 | "metadata": { 315 | "kernelspec": { 316 | "display_name": "Python 3", 317 | "language": "python", 318 | "name": "python3" 319 | }, 320 | "language_info": { 321 | "codemirror_mode": { 322 | "name": "ipython", 323 | "version": 3 324 | }, 325 | "file_extension": ".py", 326 | "mimetype": "text/x-python", 327 | "name": "python", 328 | "nbconvert_exporter": "python", 329 | "pygments_lexer": "ipython3", 330 | "version": "3.6.3" 331 | } 332 | }, 333 | "nbformat": 4, 334 | "nbformat_minor": 1 335 | } 336 | -------------------------------------------------------------------------------- /bayesbridge/reg_coef_sampler/hamiltonian_monte_carlo/stepsize_adapter.py: -------------------------------------------------------------------------------- 1 | from math import exp, log, log10, sqrt, copysign 2 | from .util import warn_message_only 3 | import scipy.stats as stats 4 | 5 | 6 | class HamiltonianBasedStepsizeAdapter(): 7 | """ 8 | Updates the stepsize of an HMC integrator so that the average Hamiltonian 9 | error matches a pre-specified target value. 10 | """ 11 | 12 | def __init__(self, init_stepsize, target_accept_prob=.9, 13 | init_adaptsize=1., adapt_decay_exponent=1., 14 | reference_iteration=500, adaptsize_at_reference=.05): 15 | """ 16 | Parameters 17 | ---------- 18 | reference_iteration & adaptsize_at_reference: 19 | Stepsize sequence of Robbins-Monro algorithm will be set so that it 20 | decreases to `adaptsize_at_refrence` after `reference_iteration`. 21 | """ 22 | if init_stepsize <= 0: 23 | raise ValueError("The initial stepsize must be positive.") 24 | log_init_stepsize = log(init_stepsize) 25 | self.log_stepsize = log_init_stepsize 26 | self.log_stepsize_averaged = log_init_stepsize 27 | self.n_averaged = 0 28 | self.target_accept_prob = target_accept_prob 29 | self.target_log10_hamiltonian_error \ 30 | = self.convert_to_log_hamiltonian_error(target_accept_prob) 31 | 32 | self.rm_stepsizer = RobbinsMonroStepsizer( 33 | init=init_adaptsize, 34 | decay_exponent=adapt_decay_exponent, 35 | reference_iteration=reference_iteration, 36 | size_at_reference=adaptsize_at_reference 37 | ) 38 | 39 | @staticmethod 40 | def convert_to_log_hamiltonian_error(target_accept_prob): 41 | """ Calculate the target squared Hamiltonian error in the log scale. 42 | 43 | Under a high-dimensional limit of i.i.d. parameters, the Hamiltonian 44 | error is distributed as 45 | Normal(mean = - delta / 2, var = delta), 46 | and the corresponding average acceptance rate is 47 | 2 GausssianCDF(- sqrt(delta) / 2). 48 | So we solve for `delta` that theoretically achieves the target acceptance 49 | rate and try to calibrate the average square error of the Hamiltonian 50 | to be the theoretical value (delta^2 / 4 + delta). 51 | """ 52 | if target_accept_prob <= 0 or target_accept_prob >= 1: 53 | raise ValueError("Target probability must be within (0, 1).") 54 | delta = 4 * stats.norm.ppf(target_accept_prob / 2) ** 2 55 | target_log10_hamiltonian_error = .5 * log10(delta + delta ** 2 / 4) 56 | return target_log10_hamiltonian_error 57 | 58 | def get_current_stepsize(self, averaged=False): 59 | if averaged: 60 | return exp(self.log_stepsize_averaged) 61 | else: 62 | return exp(self.log_stepsize) 63 | 64 | def reinitialize(self, init_stepsize): 65 | log_init_stepsize = log(init_stepsize) 66 | self.log_stepsize = log_init_stepsize 67 | self.log_stepsize_averaged = log_init_stepsize 68 | self.n_averaged = 0 69 | 70 | def adapt_stepsize(self, hamiltonian_error): 71 | rm_stepsize = self.rm_stepsizer.calculate_stepsize(self.n_averaged) 72 | self.n_averaged += 1 73 | adaptsize = self.transform_to_adaptsize(hamiltonian_error) 74 | self.log_stepsize += rm_stepsize * adaptsize 75 | weight = 1 / self.n_averaged 76 | self.log_stepsize_averaged = ( 77 | weight * self.log_stepsize 78 | + (1 - weight) * self.log_stepsize_averaged 79 | ) 80 | return exp(self.log_stepsize) 81 | 82 | def transform_to_adaptsize( 83 | self, error, upper_bound=1., trans_type='piecewise'): 84 | """ 85 | Parameters 86 | ---------- 87 | trans_type: str, {'log-linear', 'sign', 'piecewise'} 88 | """ 89 | 90 | if trans_type == 'probability': 91 | accept_prob = min(1, exp(error)) 92 | adapt_size = accept_prob - self.target_accept_prob 93 | return adapt_size 94 | 95 | if error == 0.: 96 | log10_error = - float('inf') 97 | else: 98 | log10_error = log10(abs(error)) 99 | 100 | target = self.target_log10_hamiltonian_error 101 | if trans_type == 'log-linear': 102 | adapt_size = target - log10_error 103 | 104 | elif trans_type == 'sign': 105 | adapt_size = copysign(1., target - log10_error) 106 | 107 | elif trans_type == 'piecewise': 108 | # Increase the adjustment when the error is larger than the target. 109 | if log10_error > target: 110 | adapt_size = (target - log10_error) / .301 # Convert to log2 scale. 111 | else: 112 | adapt_size = (target - log10_error) / 3 # Convert to log1000 scale. 113 | 114 | else: 115 | raise NotImplementedError() 116 | 117 | if abs(adapt_size) > upper_bound: 118 | adapt_size = copysign(1., adapt_size) 119 | 120 | return adapt_size 121 | 122 | 123 | def initialize_stepsize(compute_acceptprob, dt=1.0): 124 | """ Heuristic for choosing an initial value of dt 125 | 126 | Parameters 127 | ---------- 128 | compute_acceptprob: callable 129 | Computes the acceptance probability of the proposal one-step HMC proposal. 130 | """ 131 | 132 | # Figure out what direction we should be moving dt. 133 | acceptprob = compute_acceptprob(dt) 134 | direc = 2 * int(acceptprob > 0.5) - 1 135 | 136 | # Keep moving dt in that direction until acceptprob crosses 0.5. 137 | while acceptprob == 0 or (2 * acceptprob) ** direc > 1: 138 | dt = dt * (2 ** direc) 139 | acceptprob = compute_acceptprob(dt) 140 | if acceptprob == 0 and direc == 1: 141 | # The last doubling of stepsize was too much. 142 | dt /= 2 143 | break 144 | 145 | return dt 146 | 147 | 148 | class RobbinsMonroStepsizer(): 149 | 150 | def __init__(self, init=1., decay_exponent=1., 151 | reference_iteration=None, size_at_reference=None): 152 | self.init = init 153 | self.exponent = decay_exponent 154 | self.scale = self.determine_decay_scale( 155 | init, decay_exponent, reference_iteration, size_at_reference 156 | ) 157 | 158 | def determine_decay_scale(self, init, decay_exponent, ref_iter, size_at_ref): 159 | 160 | if (ref_iter is not None) and (size_at_ref is not None): 161 | decay_scale = \ 162 | ref_iter / ((init / size_at_ref) ** (1 / decay_exponent) - 1) 163 | else: 164 | warn_message_only( 165 | 'The default stepsize sequence tends to decay too quicky; ' 166 | 'consider manually setting the decay scale.' 167 | ) 168 | decay_scale = 1. 169 | 170 | return decay_scale 171 | 172 | def __iter__(self): 173 | self.n_iter = 0 174 | return self 175 | 176 | def __next__(self): 177 | stepsize = self.calculate_stepsize(self.n_iter) 178 | self.n_iter += 1 179 | return stepsize 180 | 181 | def calculate_stepsize(self, n_iter): 182 | stepsize = self.init / (1 + n_iter / self.scale) ** self.exponent 183 | return stepsize 184 | 185 | 186 | class RobbinsMonroStepsizeAdapter(): 187 | 188 | def __init__(self, init_stepsize, target_accept_prob=.9, 189 | init_adaptsize=1., adapt_decay_exponent=1., 190 | reference_iteration=100, adaptsize_at_reference=.05): 191 | """ 192 | Parameters 193 | ---------- 194 | reference_iteration & adaptsize_at_reference: 195 | Stepsize sequence of Robbins-Monro algorithm will be set so that it 196 | decreases to `adaptsize_at_refrence` after `reference_iteration`. 197 | """ 198 | if init_stepsize <= 0: 199 | raise ValueError("The initial stepsize must be positive.") 200 | log_init_stepsize = log(init_stepsize) 201 | self.log_stepsize = log_init_stepsize 202 | self.log_stepsize_averaged = log_init_stepsize 203 | self.n_averaged = 0 204 | self.target_accept_prob = target_accept_prob 205 | 206 | self.rm_stepsizer = iter(RobbinsMonroStepsizer( 207 | init=init_adaptsize, 208 | decay_exponent=adapt_decay_exponent, 209 | reference_iteration=reference_iteration, 210 | size_at_reference=adaptsize_at_reference 211 | )) 212 | 213 | def get_current_stepsize(self, averaged=False): 214 | if averaged: 215 | return exp(self.log_stepsize_averaged) 216 | else: 217 | return exp(self.log_stepsize) 218 | 219 | def adapt_stepsize(self, accept_prob): 220 | self.n_averaged += 1 221 | rm_stepsize = next(self.rm_stepsizer) 222 | adaptsize = \ 223 | self.transform_to_adaptsize(accept_prob, self.target_accept_prob) 224 | self.log_stepsize += rm_stepsize * adaptsize 225 | weight = 1 / self.n_averaged 226 | self.log_stepsize_averaged = ( 227 | weight * self.log_stepsize 228 | + (1 - weight) * self.log_stepsize_averaged 229 | ) 230 | return exp(self.log_stepsize) 231 | 232 | def transform_to_adaptsize( 233 | self, accept_prob, target, trans_type='linear'): 234 | """ 235 | Parameters 236 | ---------- 237 | trans_type: str, {'linear', 'sign', 'penalize-high-prob'} 238 | """ 239 | 240 | if trans_type == 'linear': 241 | adapt_size = accept_prob - target 242 | 243 | elif trans_type == 'sign': 244 | adapt_size = copysign(1., accept_prob - target) 245 | 246 | elif trans_type == 'penalize-high-prob': 247 | # Transforms accept_prob -> adapt_size so that it roughly interpolate 248 | # the points (0, -1), (target, 0), and (1, 1). Transformation is 249 | # linear near accept_prob = target but quickly goes up to 250 | # adapt_size = 1 as (1 - accecpt_prob) becomes an order of manitude 251 | # smaller than (1 - target). 252 | if accept_prob <= target: 253 | adapt_size = (accept_prob - target) / target 254 | else: 255 | epsilon = 2. ** -52 256 | magnitude_diff = log10( 257 | (1. - (accept_prob - epsilon)) / (1 - target) 258 | ) 259 | if magnitude_diff == 0: 260 | w = 0. 261 | else: 262 | w = exp(magnitude_diff ** - 1) 263 | adapt_size = ( 264 | (1 - w) * (accept_prob - target) / target 265 | - w * magnitude_diff 266 | ) 267 | adapt_size = min(1., adapt_size) 268 | 269 | else: 270 | raise NotImplementedError() 271 | 272 | return adapt_size 273 | 274 | 275 | class DualAverageStepsizeAdapter(): 276 | 277 | def __init__(self, init_stepsize, target_accept_prob=.9): 278 | 279 | if init_stepsize <= 0: 280 | raise ValueError("The initial stepsize must be positive.") 281 | log_init_stepsize = log(init_stepsize) 282 | self.log_stepsize = log_init_stepsize 283 | self.log_stepsize_averaged = log_init_stepsize 284 | self.n_averaged = 0 285 | self.target_accept_prob = target_accept_prob 286 | self.latent_stat = 0. # Used for dual-averaging. 287 | 288 | # Parameters for the dual-averaging algorithm. 289 | self.stepsize_averaging_log_decay_rate = 0.75 290 | self.latent_prior_samplesize = 10 291 | multiplier = 2. # > 1 to err on the side of shrinking toward a larger value. 292 | self.log_stepsize_shrinkage_mean = log(multiplier) + log_init_stepsize 293 | self.log_stepsize_shrinkage_strength = 0.05 294 | # Variable name is not quite accurate since this parameter interacts with latent_prior_samplesize. 295 | 296 | def get_current_stepsize(self, averaged=False): 297 | if averaged: 298 | return exp(self.log_stepsize_averaged) 299 | else: 300 | return exp(self.log_stepsize) 301 | 302 | def adapt_stepsize(self, accept_prob): 303 | self.n_averaged += 1 304 | self.latent_stat = self.update_latent_stat( 305 | accept_prob, self.target_accept_prob, self.latent_stat 306 | ) 307 | self.log_stepsize, self.log_stepsize_averaged = self.dual_average_stepsize( 308 | self.latent_stat, self.log_stepsize_averaged 309 | ) 310 | return exp(self.log_stepsize) 311 | 312 | def update_latent_stat(self, accept_prob, target_accept_prob, latent_stat): 313 | weight_latent = (self.n_averaged + self.latent_prior_samplesize) ** -1 314 | latent_stat = (1 - weight_latent) * latent_stat \ 315 | + weight_latent * (target_accept_prob - accept_prob) 316 | return latent_stat 317 | 318 | def dual_average_stepsize(self, latent_stat, log_stepsize_optimized): 319 | log_stepsize = ( 320 | self.log_stepsize_shrinkage_mean 321 | - sqrt(self.n_averaged) / self.log_stepsize_shrinkage_strength * latent_stat 322 | ) 323 | weight = self.n_averaged ** - self.stepsize_averaging_log_decay_rate 324 | log_stepsize_optimized = \ 325 | (1 - weight) * log_stepsize_optimized + weight * log_stepsize 326 | return log_stepsize, log_stepsize_optimized -------------------------------------------------------------------------------- /bayesbridge/random/tilted_stable/tilted_stable.pyx: -------------------------------------------------------------------------------- 1 | cimport cython 2 | from libc.math cimport exp as exp_c 3 | from libc.math cimport fabs, pow, log, sqrt, sin, floor, INFINITY, M_PI 4 | import random 5 | import numpy as np 6 | cimport numpy as np 7 | from numpy.random import PCG64 8 | from numpy.random.bit_generator cimport BitGenerator 9 | from bayesbridge.random.normal.normal cimport random_normal 10 | from bayesbridge.random.uniform.uniform cimport random_uniform 11 | 12 | 13 | cdef double MAX_EXP_ARG = 709 # ~ log(2 ** 1024) 14 | ctypedef np.uint8_t np_uint8 15 | ctypedef double (*rand_generator)() 16 | 17 | 18 | cdef double exp(double x): 19 | if x > MAX_EXP_ARG: 20 | val = INFINITY 21 | elif x < - MAX_EXP_ARG: 22 | val = 0. 23 | else: 24 | val = exp_c(x) 25 | return val 26 | 27 | 28 | @cython.cdivision(True) 29 | cdef double sinc(double x): 30 | cdef double x_sq 31 | if fabs(x) < .01: 32 | x_sq = x * x 33 | val = 1. - x_sq / 6. * (1 - x_sq / 20.) 34 | # Taylor approximation with an error bounded by 2e-16 35 | else: 36 | val = sin(x) / x 37 | return val 38 | 39 | 40 | cdef double python_builtin_next_double(): 41 | return random.random() 42 | 43 | 44 | cdef class ExpTiltedStableDist(): 45 | cdef rand_generator next_double 46 | cdef double TILT_POWER_THRESHOLD # For deciding the faster of two algorithms 47 | cdef BitGenerator bitgen 48 | 49 | def __init__(self, seed=None): 50 | self.set_seed(seed) 51 | self.bitgen = PCG64(seed) 52 | self.TILT_POWER_THRESHOLD = 2. 53 | 54 | def set_seed(self, seed): 55 | self.bitgen = PCG64(seed) 56 | 57 | def get_state(self): 58 | return self.bitgen.state 59 | 60 | def set_state(self, state): 61 | self.bitgen.state = state 62 | 63 | @cython.boundscheck(False) 64 | @cython.wraparound(False) 65 | def sample(self, char_exponent, tilt, method=None): 66 | """ 67 | Generate a random variable from a stable distribution with 68 | characteristic exponent = char_exponent < 1 69 | skewness = 1 70 | scale = cos(char_exponent * pi / 2) ** (1 / char_exponent) 71 | location = 0 72 | exponential tilting = tilt 73 | (The density p(x) is tilted by exp(- tilt * x).) 74 | 75 | The cost of the divide-conquer algorithm increases as a function of 76 | 'tilt ** char_exp'. While the cost of double-rejection algorithm is 77 | bounded, the divide-conquer algorithm is simpler and faster for small 78 | 'tilt ** char_exp'. 79 | 80 | References: 81 | ----------- 82 | Implementation is mostly based on the algorithm descriptions in 83 | 'Sampling Exponentially Tilted Stable Distributions' by Hofert (2011) 84 | Ideas behind and details on the double-rejection sampling is better 85 | described in 86 | 'Random variate generation for exponentially and polynomially tilted 87 | stable distributions' by Devroye (2009) 88 | """ 89 | if not isinstance(tilt, np.ndarray): 90 | raise TypeError('Tilt parameter must be a numpy array.') 91 | if isinstance(char_exponent, (np.floating, float)): 92 | char_exponent = np.tile(char_exponent, tilt.size) 93 | elif isinstance(char_exponent, np.ndarray): 94 | if not char_exponent.size == tilt.size: 95 | raise ValueError('Input arrays must be of the same length.') 96 | else: 97 | raise TypeError('Characteristic exponent must be float or numpy array.') 98 | if not np.all(char_exponent < 1): 99 | raise ValueError('Characteristic exponent must be smaller than 1.') 100 | if not np.all(tilt > 0): 101 | raise ValueError('Tilting parameter must be positive.') 102 | 103 | if method is None: 104 | # Choose a likely faster method. 105 | divide_conquer_cost = tilt ** char_exponent 106 | double_rejection_cost = self.TILT_POWER_THRESHOLD 107 | # The relative costs are implementation & architecture dependent. 108 | use_divide_conquer = (divide_conquer_cost < double_rejection_cost) 109 | elif method in ['divide-conquer', 'double-rejection']: 110 | use_divide_conquer = np.tile(method == 'divide-conquer', tilt.size) 111 | else: 112 | raise ValueError("Unrecognized method name.") 113 | 114 | char_exponent = char_exponent.astype(np.double) 115 | tilt = tilt.astype(np.double) 116 | use_divide_conquer = use_divide_conquer.astype(np.uint8) 117 | result = np.zeros(tilt.size, dtype=np.double) 118 | 119 | cdef double[:] char_exponent_view = char_exponent 120 | cdef double[:] tilt_view = tilt 121 | cdef np_uint8[:] use_divide_conquer_view = use_divide_conquer 122 | cdef double[:] result_view = result 123 | cdef long n_sample = tilt.size 124 | cdef Py_ssize_t i 125 | 126 | for i in range(n_sample): 127 | if use_divide_conquer_view[i]: 128 | result_view[i] = self.sample_by_divide_and_conquer( 129 | char_exponent_view[i], tilt_view[i] 130 | ) 131 | else: 132 | result_view[i] = self.sample_by_double_rejection( 133 | char_exponent_view[i], tilt_view[i] 134 | ) 135 | return result 136 | 137 | cdef double sample_by_divide_and_conquer(self, double char_exp, double tilt): 138 | cdef double X, c 139 | cdef long partition_size = max(1, floor(pow(tilt, char_exp))) 140 | X = 0. 141 | c = pow(1. / partition_size, 1. / char_exp) 142 | for i in range(partition_size): 143 | X += self.sample_divided_rv(char_exp, tilt, c) 144 | return X 145 | 146 | cdef double sample_divided_rv(self, double char_exp, double tilt, double c): 147 | cdef bint accepted = False 148 | while not accepted: 149 | S = c * self.sample_non_tilted_rv(char_exp) 150 | accept_prob = exp(- tilt * S) 151 | accepted = (random_uniform(self.bitgen) < accept_prob) 152 | return S 153 | 154 | cdef double sample_non_tilted_rv(self, double char_exp): 155 | cdef double S = pow( 156 | - self.zolotarev_function(M_PI * random_uniform(self.bitgen), char_exp) 157 | / log(random_uniform(self.bitgen)), 158 | (1. - char_exp) / char_exp 159 | ) 160 | return S 161 | 162 | cdef double sample_by_double_rejection(self, double char_exp, double tilt): 163 | 164 | cdef double U, V, X, z, log_accept_prob 165 | cdef double tilt_power = pow(tilt, char_exp) 166 | 167 | # Start double-rejection sampling. 168 | cdef bint accepted = False 169 | while not accepted: 170 | U, V, z = self.sample_aux_rv(char_exp, tilt_power) 171 | X, log_accept_prob = \ 172 | self.sample_reference_rv(U, char_exp, tilt_power, z) 173 | accepted = (log_accept_prob > log(V)) 174 | 175 | return pow(X, - (1. - char_exp) / char_exp) 176 | 177 | cdef (double, double, double) \ 178 | sample_aux_rv(self, double char_exp, double tilt_power): 179 | """ 180 | Samples an auxiliary random variable for the double-rejection algorithm. 181 | Returns: 182 | U : auxiliary random variable for the double-rejection algorithm 183 | V : uniform random variable independent of U, X 184 | z : scalar quantity used later 185 | """ 186 | cdef double U, V, z, accept_prob 187 | cdef double gamma, xi, psi 188 | # Intermediate quantities; could be computed outside the funciton 189 | # and reused in case of rejection 190 | gamma = tilt_power * char_exp * (1. - char_exp) 191 | xi = (1. + sqrt(2. * gamma) * (2. + sqrt(.5 * M_PI))) / M_PI 192 | psi = sqrt(gamma / M_PI) * (2. + sqrt(.5 * M_PI)) \ 193 | * exp(- gamma * M_PI * M_PI / 8.) 194 | cdef bint accepted = False 195 | while not accepted: 196 | U = self.sample_aux2_rv(xi, psi, gamma) 197 | if U > M_PI: 198 | continue 199 | 200 | zeta = sqrt(self.zolotarev_pdf_exponentiated(U, char_exp)) 201 | z = 1. / (1. - pow(1. + char_exp * zeta / sqrt(gamma), -1. / char_exp)) 202 | accept_prob = self.compute_aux2_accept_prob( 203 | U, xi, psi, zeta, z, tilt_power, gamma 204 | ) 205 | if accept_prob > 0.: 206 | V = random_uniform(self.bitgen) / accept_prob 207 | accepted = (U < M_PI and V <= 1.) 208 | 209 | return U, V, z 210 | 211 | cdef double sample_aux2_rv(self, 212 | double xi, double psi, double gamma): 213 | """ 214 | Sample the 2nd level auxiliary random variable (i.e. the additional 215 | auxiliary random variable used to sample the auxilary variable for 216 | double-rejection algorithm.) 217 | """ 218 | 219 | w1 = sqrt(.5 * M_PI / gamma) * xi 220 | w2 = 2. * sqrt(M_PI) * psi 221 | w3 = xi * M_PI 222 | V = random_uniform(self.bitgen) 223 | if gamma >= 1: 224 | if V < w1 / (w1 + w2): 225 | U = fabs(random_normal(self.bitgen)) / sqrt(gamma) 226 | else: 227 | W = random_uniform(self.bitgen) 228 | U = M_PI * (1. - W * W) 229 | else: 230 | W = random_uniform(self.bitgen) 231 | if V < w3 / (w2 + w3): 232 | U = M_PI * W 233 | else: 234 | U = M_PI * (1. - W * W) 235 | 236 | return U 237 | 238 | cdef double compute_aux2_accept_prob(self, 239 | double U, double xi, double psi, double zeta, double z, 240 | double tilt_power, double gamma 241 | ): 242 | inverse_accept_prob = M_PI * exp(-tilt_power * (1. - 1. / (zeta * zeta))) \ 243 | / ((1. + sqrt(.5 * M_PI)) * sqrt(gamma) / zeta + z) 244 | d = 0. 245 | if U >= 0. and gamma >= 1: 246 | d += xi * exp(-gamma * U * U / 2.) 247 | if U > 0. and U < M_PI: 248 | d += psi / sqrt(M_PI - U) 249 | if U >= 0. and U <= M_PI and gamma < 1.: 250 | d += xi 251 | inverse_accept_prob *= d 252 | accept_prob = 1 / inverse_accept_prob 253 | return accept_prob 254 | 255 | cdef (double, double) sample_reference_rv(self, 256 | double U, double char_exp, double tilt_power, double z): 257 | """ 258 | Generate a sample from the reference (augmented) distribution conditional 259 | on U for the double-rejection algorithm. The algorithm use a rejection 260 | sampler with half-Gaussian, uniform, and truncated exponential to the 261 | left, middle, and right of a partitioned real-line. 262 | 263 | Returns: 264 | -------- 265 | X : random variable from the reference distribution 266 | N, E : random variables used later for computing the acceptance prob 267 | """ 268 | cdef double a, left_thresh, right_thresh, expo_scale, \ 269 | mass_left, mass_mid, mass_right, mass_total, X, V, N, E 270 | a = self.zolotarev_function(U, char_exp) 271 | left_thresh = pow((1. - char_exp) / char_exp / a, char_exp) * tilt_power 272 | right_thresh = left_thresh + sqrt(left_thresh * char_exp / a) 273 | expo_scale = z / a 274 | mass_left = (right_thresh - left_thresh) * sqrt(.5 * M_PI) 275 | mass_mid = (right_thresh - left_thresh) 276 | mass_right = expo_scale 277 | mass_total = mass_left + mass_mid + mass_right 278 | V = random_uniform(self.bitgen) 279 | N = 0. 280 | E = 0. 281 | # Divided into three pieces at left_thresh and (left_thresh + mid_width) 282 | if V < mass_left / mass_total: 283 | N = random_normal(self.bitgen) 284 | X = left_thresh - (right_thresh - left_thresh) * fabs(N) 285 | elif V < (mass_left + mass_mid) / mass_total: 286 | X = left_thresh + (right_thresh - left_thresh) * random_uniform(self.bitgen) 287 | else: 288 | E = - log(random_uniform(self.bitgen)) 289 | X = right_thresh + E * mass_right 290 | 291 | log_accept_prob = self.compute_log_accept_prob( 292 | X, N, E, left_thresh, right_thresh, a, char_exp, tilt_power 293 | ) 294 | return X, log_accept_prob 295 | 296 | cdef double compute_log_accept_prob(self, 297 | double X, double N, double E, double left_thresh, double right_thresh, 298 | double a, double char_exp, double tilt_power 299 | ): 300 | cdef double char_exp_odds = (1. - char_exp) / char_exp 301 | if X < 0: 302 | log_accept_prob = - INFINITY 303 | else: 304 | log_accept_prob = - ( 305 | a * (X - left_thresh) 306 | + exp(log(tilt_power) / char_exp - char_exp_odds * log(left_thresh)) 307 | * (pow(left_thresh / X, char_exp_odds) - 1.) 308 | ) 309 | if X < left_thresh: 310 | log_accept_prob += N * N / 2. 311 | elif X > right_thresh: 312 | log_accept_prob += E 313 | 314 | return log_accept_prob 315 | 316 | cdef double zolotarev_pdf_exponentiated(self, double x, double char_exp): 317 | """ 318 | Evaluates a function proportional to a power of the Zolotarev density. 319 | """ 320 | cdef double denominator, numerator 321 | denominator = pow(sinc(char_exp * x), char_exp) \ 322 | * pow(sinc((1. - char_exp) * x), (1. - char_exp)) 323 | numerator = sinc(x) 324 | return numerator / denominator 325 | 326 | cdef double zolotarev_function(self, double x, double char_exp): 327 | cdef double val = pow( 328 | pow((1. - char_exp) * sinc((1. - char_exp) * x), (1. - char_exp)) 329 | * pow(char_exp * sinc(char_exp * x), char_exp) 330 | / sinc(x) 331 | , 1. / (1. - char_exp)) 332 | return val 333 | -------------------------------------------------------------------------------- /bayesbridge/reg_coef_sampler/hamiltonian_monte_carlo/nuts.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import time 4 | from .stepsize_adapter import HamiltonianBasedStepsizeAdapter, initialize_stepsize 5 | from .dynamics import HamiltonianDynamics 6 | from .util import warn_message_only 7 | 8 | 9 | class NoUTurnSampler(): 10 | 11 | def __init__(self, f, mass=None, warning_requested=True): 12 | """ 13 | Parameters 14 | ---------- 15 | f: callable 16 | Return the log probability and gradient evaluated at q. 17 | mass: None, numpy 1d array, or callable 18 | """ 19 | self.f = f 20 | self.dynamics = HamiltonianDynamics(mass) 21 | self.warning_requested = warning_requested 22 | 23 | def generate_samples( 24 | self, q0, n_burnin, n_sample, dt_range=None, seed=None, n_update=0, 25 | adapt_stepsize=False, target_accept_prob=.9, final_adaptsize=.05): 26 | """ 27 | Implements the No-U-Turn Sampler (NUTS) of Hoffman and Gelman (2011). 28 | 29 | Parameters: 30 | ----------- 31 | dt_range: None, float, or ndarray of length 2 32 | adapt_stepsize: bool 33 | If True, the max stepsize will be adjusted to to achieve the target 34 | acceptance rate. Forced to be True if dt_range is None. 35 | """ 36 | 37 | if seed is not None: 38 | np.random.seed(seed) 39 | 40 | q = q0 41 | logp, grad = self.f(q) 42 | 43 | if np.isscalar(dt_range): 44 | dt_range = np.array(2 * [dt_range]) 45 | 46 | elif dt_range is None: 47 | p = self.dynamics.draw_momentum(len(q)) 48 | logp_joint0 = - self.dynamics.compute_hamiltonian(logp, p) 49 | dt = initialize_stepsize( 50 | lambda dt: self.compute_onestep_accept_prob(dt, q, p, grad, logp_joint0) 51 | ) 52 | dt_range = dt * np.array([.8, 1.0]) 53 | adapt_stepsize = True 54 | 55 | max_stepsize_adapter = HamiltonianBasedStepsizeAdapter( 56 | init_stepsize=1., target_accept_prob=target_accept_prob, 57 | reference_iteration=n_burnin, adaptsize_at_reference=final_adaptsize 58 | ) 59 | 60 | if n_update > 0: 61 | n_per_update = math.ceil((n_burnin + n_sample) / n_update) 62 | else: 63 | n_per_update = float('inf') 64 | samples = np.zeros((len(q), n_sample + n_burnin)) 65 | logp_samples = np.zeros(n_sample + n_burnin) 66 | accept_prob = np.zeros(n_sample + n_burnin) 67 | max_dt = np.zeros(n_burnin) 68 | 69 | tic = time.time() 70 | use_averaged_stepsize = False 71 | for i in range(n_sample + n_burnin): 72 | dt_multiplier \ 73 | = max_stepsize_adapter.get_current_stepsize(use_averaged_stepsize) 74 | dt = np.random.uniform(dt_range[0], dt_range[1]) 75 | dt *= dt_multiplier 76 | q, info = self.generate_next_state(dt, q, logp, grad) 77 | logp, grad = info['logp'], info['grad'] 78 | if i < n_burnin and adapt_stepsize: 79 | max_dt[i] = dt_range[1] * dt_multiplier 80 | max_stepsize_adapter.adapt_stepsize(info['ave_hamiltonian_error']) 81 | elif i == n_burnin - 1: 82 | use_averaged_stepsize = True 83 | samples[:, i] = q 84 | logp_samples[i] = logp 85 | if (i + 1) % n_per_update == 0: 86 | print('{:d} iterations have been completed.'.format(i + 1)) 87 | 88 | toc = time.time() 89 | time_elapsed = toc - tic 90 | 91 | info = { 92 | 'logp_samples': logp_samples, 93 | 'accept_prob_samples': accept_prob, 94 | 'sampling_time': time_elapsed 95 | } 96 | if adapt_stepsize: 97 | info['max_stepsize'] = max_dt 98 | 99 | return samples, info 100 | 101 | 102 | def compute_onestep_accept_prob(self, dt, q0, p0, grad0, logp_joint0): 103 | _, p, logp, _ = self.dynamics.integrate(self.f, dt, q0, p0, grad0) 104 | logp_joint = - self.dynamics.compute_hamiltonian(logp, p) 105 | accept_prob = np.exp(logp_joint - logp_joint0) 106 | return accept_prob 107 | 108 | def generate_next_state(self, dt, q, logp=None, grad=None, p=None, 109 | max_height=10, hamiltonian_error_tol=100): 110 | 111 | n_grad_evals = 0 112 | if logp is None or grad is None: 113 | logp, grad = self.f(q) 114 | n_grad_evals += 1 115 | 116 | if p is None: 117 | p = self.dynamics.draw_momentum(len(q)) 118 | 119 | logp_joint = - self.dynamics.compute_hamiltonian(logp, p) 120 | logp_joint_threshold = logp_joint - np.random.exponential() 121 | # Slicing variable in the log-scale. 122 | 123 | tree = _TrajectoryTree( 124 | self.dynamics, self.f, dt, q, p, logp, grad, logp_joint, logp_joint, 125 | logp_joint_threshold, hamiltonian_error_tol 126 | ) 127 | directions = 2 * (np.random.rand(max_height) < 0.5) - 1 128 | # Pre-allocation of random directions is unnecessary, but makes the code easier to test. 129 | tree, final_height, last_doubling_rejected, maxed_before_u_turn \ 130 | = self._grow_trajectory_till_u_turn(tree, directions) 131 | q, logp, grad = tree.sample 132 | n_grad_evals += tree.n_integration_step 133 | 134 | if self.warning_requested: 135 | self._issue_warnings( 136 | tree.instability_detected, maxed_before_u_turn, max_height 137 | ) 138 | 139 | info = { 140 | 'logp': logp, 141 | 'grad': grad, 142 | 'ave_accept_prob': tree.ave_accept_prob, 143 | 'ave_hamiltonian_error': tree.ave_hamiltonian_error, 144 | 'n_grad_evals': n_grad_evals, 145 | 'tree_height': final_height, 146 | 'u_turn_detected': tree.u_turn_detected, 147 | 'instability_detected': tree.instability_detected, 148 | 'last_doubling_rejected': last_doubling_rejected 149 | } 150 | 151 | return q, info 152 | 153 | def _issue_warnings( 154 | self, instability_detected, maxed_before_u_turn, max_height): 155 | 156 | if instability_detected: 157 | warn_message_only( 158 | "Numerical integration became unstable while simulating a " 159 | "NUTS trajectory." 160 | ) 161 | if maxed_before_u_turn: 162 | warn_message_only( 163 | 'The trajectory tree reached the max height of {:d} before ' 164 | 'meeting the U-turn condition.'.format(max_height) 165 | ) 166 | return 167 | 168 | @staticmethod 169 | def _grow_trajectory_till_u_turn(tree, directions): 170 | 171 | height = 0 # Referred to as 'depth' in the original paper, but arguably the 172 | # trajectory tree is built 'upward' on top of the existing ones. 173 | max_height = len(directions) 174 | trajectory_terminated = False 175 | while not trajectory_terminated: 176 | 177 | doubling_rejected \ 178 | = tree.double_trajectory(height, directions[height]) 179 | # No transition to the next half of trajectory takes place if the 180 | # termination criteria are met within the next half tree. 181 | 182 | height += 1 183 | trajectory_terminated \ 184 | = tree.u_turn_detected or tree.instability_detected or (height >= max_height) 185 | maxed_before_u_turn \ 186 | = height >= max_height and (not tree.u_turn_detected) 187 | 188 | return tree, height, doubling_rejected, maxed_before_u_turn 189 | 190 | 191 | class _TrajectoryTree(): 192 | """ 193 | Collection of (a subset of) states along the simulated Hamiltonian dynamics 194 | trajcetory endowed with a binary tree structure. 195 | """ 196 | 197 | def __init__( 198 | self, dynamics, f, dt, q, p, logp, grad, joint_logp, 199 | init_joint_logp, joint_logp_threshold, hamiltonian_error_tol=100., 200 | u_turn_criterion='momentum'): 201 | 202 | self.dynamics = dynamics 203 | self.f = f 204 | self.dt = dt 205 | self.joint_logp_threshold = joint_logp_threshold 206 | self.front_state = (q, p, grad) 207 | self.rear_state = (q, p, grad) 208 | self.sample = (q, logp, grad) 209 | self.u_turn_detected = False 210 | self.min_hamiltonian = - joint_logp 211 | self.max_hamiltonian = - joint_logp 212 | self.hamiltonian_error_tol = hamiltonian_error_tol 213 | self.n_acceptable_state = int(joint_logp > joint_logp_threshold) 214 | self.n_integration_step = 0 215 | self.init_joint_logp = init_joint_logp 216 | self.height = 0 217 | self.ave_hamiltonian_error = abs(init_joint_logp - joint_logp) 218 | self.ave_accept_prob = min(1, math.exp(joint_logp - init_joint_logp)) 219 | self.velocity_based_u_turn = (u_turn_criterion == 'velocity') 220 | 221 | @property 222 | def n_node(self): 223 | return 2 ** self.height 224 | 225 | @property 226 | def instability_detected(self): 227 | fluctuation_along_trajectory = self.max_hamiltonian - self.min_hamiltonian 228 | return fluctuation_along_trajectory > self.hamiltonian_error_tol 229 | 230 | def double_trajectory(self, height, direction): 231 | next_tree = self._build_next_tree( 232 | *self._get_states(direction), height, direction 233 | ) 234 | no_transition_to_next_tree_attempted \ 235 | = self._merge_next_tree(next_tree, direction, sampling_method='swap') 236 | return no_transition_to_next_tree_attempted 237 | 238 | def _build_next_tree(self, q, p, grad, height, direction): 239 | 240 | if height == 0: 241 | return self._build_next_singleton_tree(q, p, grad, direction) 242 | 243 | subtree = self._build_next_tree(q, p, grad, height - 1, direction) 244 | trajectory_terminated_within_subtree \ 245 | = subtree.u_turn_detected or subtree.instability_detected 246 | if not trajectory_terminated_within_subtree: 247 | next_subtree = self._build_next_tree( 248 | *subtree._get_states(direction), height - 1, direction 249 | ) 250 | subtree._merge_next_tree(next_subtree, direction, sampling_method='uniform') 251 | 252 | return subtree 253 | 254 | def _build_next_singleton_tree(self, q, p, grad, direction): 255 | q, p, logp, grad = \ 256 | self.dynamics.integrate(self.f, direction * self.dt, q, p, grad) 257 | self.n_integration_step += 1 258 | if math.isinf(logp): 259 | joint_logp = - float('inf') 260 | else: 261 | joint_logp = - self.dynamics.compute_hamiltonian(logp, p) 262 | return self._clone_tree(q, p, logp, grad, joint_logp) 263 | 264 | def _clone_tree(self, q, p, logp, grad, joint_logp): 265 | """ Construct a tree with shared dynamics and acceptance criteria. """ 266 | return _TrajectoryTree( 267 | self.dynamics, self.f, self.dt, q, p, logp, grad, joint_logp, self.init_joint_logp, 268 | self.joint_logp_threshold, self.hamiltonian_error_tol 269 | ) 270 | 271 | def _merge_next_tree(self, next_tree, direction, sampling_method): 272 | 273 | # Trajectory termination flags from the next tree must be propagated up 274 | # the call stack, but other states of the tree is updated only if the 275 | # next tree is accessible from the current tree (i.e. the trajectory 276 | # did not get terminated within the next tree). 277 | 278 | self.u_turn_detected = self.u_turn_detected or next_tree.u_turn_detected 279 | self.min_hamiltonian = min(self.min_hamiltonian, next_tree.min_hamiltonian) 280 | self.max_hamiltonian = max(self.max_hamiltonian, next_tree.max_hamiltonian) 281 | trajectory_terminated_within_next_tree \ 282 | = next_tree.u_turn_detected or next_tree.instability_detected 283 | 284 | if not trajectory_terminated_within_next_tree: 285 | self._update_sample(next_tree, sampling_method) 286 | self.n_acceptable_state += next_tree.n_acceptable_state 287 | self._set_states(*next_tree._get_states(direction), direction) 288 | self.u_turn_detected \ 289 | = self.u_turn_detected or self._check_u_turn_at_front_and_rear_ends() 290 | weight = self.n_node / (self.n_node + next_tree.n_node) 291 | self.ave_hamiltonian_error \ 292 | = weight * self.ave_hamiltonian_error + (1 - weight) * next_tree.ave_hamiltonian_error 293 | self.ave_accept_prob \ 294 | = weight * self.ave_accept_prob + (1 - weight) * next_tree.ave_accept_prob 295 | self.height += 1 296 | 297 | return trajectory_terminated_within_next_tree 298 | 299 | def _update_sample(self, next_tree, method): 300 | """ 301 | Parameters 302 | ---------- 303 | method: {'uniform', 'swap'} 304 | """ 305 | if method == 'uniform': 306 | n_total = self.n_acceptable_state + next_tree.n_acceptable_state 307 | sampling_weight_on_next_tree \ 308 | = next_tree.n_acceptable_state / max(1, n_total) 309 | elif method == 'swap': 310 | sampling_weight_on_next_tree \ 311 | = next_tree.n_acceptable_state / self.n_acceptable_state 312 | if np.random.uniform() < sampling_weight_on_next_tree: 313 | self.sample = next_tree.sample 314 | 315 | def _check_u_turn_at_front_and_rear_ends(self): 316 | q_front, p_front, _ = self._get_states(1) 317 | q_rear, p_rear, _ = self._get_states(-1) 318 | dq = q_front - q_rear 319 | if self.velocity_based_u_turn: 320 | v_front = self.dynamics.convert_to_velocity(p_front) 321 | v_rear = self.dynamics.convert_to_velocity(p_rear) 322 | u_turned = (np.dot(dq, v_front) < 0) or (np.dot(dq, v_rear) < 0) 323 | else: 324 | u_turned = (np.dot(dq, p_front) < 0) or (np.dot(dq, p_rear) < 0) 325 | return u_turned 326 | 327 | def _set_states(self, q, p, grad, direction): 328 | if direction > 0: 329 | self.front_state = (q, p, grad) 330 | else: 331 | self.rear_state = (q, p, grad) 332 | 333 | def _get_states(self, direction): 334 | if direction > 0: 335 | return self.front_state 336 | else: 337 | return self.rear_state 338 | --------------------------------------------------------------------------------