├── .coveragerc ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── kernel_hmc ├── __init__.py ├── densities │ ├── __init__.py │ ├── banana.py │ ├── gaussian.py │ └── posterior_gp_classification_ard.py ├── examples │ ├── __init__.py │ ├── demo_mcmc_gp_glass.py │ ├── demo_mcmc_kmc_adaptive.py │ ├── demo_mcmc_kmc_static.py │ ├── demo_mcmc_other_metropolis.py │ ├── demo_trajectories.py │ ├── plotting.py │ └── run_all.sh ├── hamiltonian │ ├── __init__.py │ ├── hamiltonian.py │ └── leapfrog.py ├── mini_mcmc │ ├── __init__.py │ └── mini_mcmc.py ├── proposals │ ├── __init__.py │ ├── base.py │ ├── hmc.py │ ├── kmc.py │ └── metropolis.py └── tools │ ├── __init__.py │ ├── assertions.py │ ├── file.py │ ├── log.py │ ├── math.py │ └── mcmc_convergence.py ├── requirements.txt ├── setup.py └── tests ├── __init__.py ├── densities ├── __init__.py ├── test_gaussian.py └── test_posterior_gp_classification_ard.py ├── proposals ├── __init__.py ├── test_adaptive_metropolis.py ├── test_base.py └── test_kmc.py └── tools ├── __init__.py └── test_math.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | omit = 3 | *__init__.py 4 | */python?.?/* 5 | */site-packages/nose/* 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # pydev/eclipse project files 2 | .project 3 | .pydevproject 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | 57 | # Sphinx documentation 58 | docs/_build/ 59 | 60 | # PyBuilder 61 | target/ 62 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | 5 | virtualenv: 6 | system_site_packages: true 7 | 8 | notifications: 9 | email: false 10 | 11 | # new container based dependencies 12 | sudo: false 13 | addons: 14 | apt: 15 | packages: 16 | - libatlas-base-dev 17 | - gfortran 18 | - python-numpy 19 | - python-matplotlib 20 | - cython 21 | - python-tk # for matplotlib in server 22 | 23 | # install up to date scipy, taken from 24 | # https://gist.github.com/debsankha/7f610a9f2a826803381a 25 | before_install: 26 | - mkdir -p $HOME/.cache/pip/wheels 27 | # build wheel only if none present 28 | - travis_wait pip wheel --find-links=$HOME/.cache/pip/wheels --use-wheel --wheel-dir=$HOME/.cache/pip/wheels scipy 29 | # now install from it 30 | - pip install --no-index --find-links=$HOME/.cache/pip/wheels scipy 31 | 32 | # optional requirements 33 | - pip install theano 34 | - pip install git+https://github.com/jcrudy/choldate.git 35 | 36 | install: 37 | - pip install -r requirements.txt --use-mirrors 38 | - pip install coveralls 39 | 40 | script: 41 | - python setup.py install 42 | - nosetests --with-coverage --cover-package=kernel_hmc 43 | - cd kernel_hmc/examples && sh run_all.sh && cd ../.. 44 | 45 | after_success: 46 | - coveralls 47 | 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Heiko Strathmann 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of kernel_exp_family nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kernel Hamiltonian Monte Carlo 2 | 3 | [![Build Status](https://travis-ci.org/karlnapf/kernel_hmc.png)](https://travis-ci.org/karlnapf/kernel_hmc) 4 | [![Coverage Status](https://coveralls.io/repos/karlnapf/kernel_hmc/badge.svg?branch=master&service=github)](https://coveralls.io/github/karlnapf/kernel_hmc?branch=master) 5 | 6 | Code for NIPS 2015 [Gradient-free Hamiltonain Monte Carlo with Efficient Kernel Exponential Families](http://arxiv.org/abs/1506.02564). 7 | 8 | This package implements the kernel HMC part of the paper. It heavily depends on the [kernel exponential family package](https://github.com/karlnapf/kernel_exp_family), where all gradient estimation code is located. 9 | 10 | My [blog post](http://herrstrathmann.de/kamiltonian-monte-carlo/) about KMC. 11 | 12 | An [IPython notebook](http://nbviewer.ipython.org/gist/karlnapf/da0089726c43ed52a899) featuring KMC lite's ability to move in previously unexplored regions. 13 | 14 | 15 | 16 | Install dependencies: 17 | 18 | pip install -r https://raw.githubusercontent.com/karlnapf/kernel_hmc/master/requirements.txt 19 | 20 | Optional dependencies are: 21 | * [```cholupdate```](https://github.com/jcrudy/choldate) for effecient low-rank updates of Cholesky factors of covariances. Speeds up Adaptive-Metropolis and KMC Finite from cubic to quadratic costs, see paper. 22 | * [```Shogun-toolbox```](https://github.com/shogun-toolbox/shogun) for the Gaussian Process marginal posterior over hyper-parameters example. To compute unbiased estimates of the marginal likelihood via approximate inference and importance sampling. 23 | * [```theano```](https://github.com/theano/theano) for the Banana example, to compute gradients via auto-grad. 24 | 25 | Install ```kernel_hmc```: 26 | 27 | pip install git+https://github.com/karlnapf/kernel_hmc.git 28 | 29 | A list of examples can be found [here](kernel_hmc/examples). For example, run 30 | 31 | python -m kernel_hmc.examples.demo_trajectories.py 32 | python -m kernel_hmc.examples.demo_mcmc_kmc_static.py 33 | 34 | -------------------------------------------------------------------------------- /kernel_hmc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlnapf/kernel_hmc/8ab93ae0470cc5916d5349b40bae7f91075bc385/kernel_hmc/__init__.py -------------------------------------------------------------------------------- /kernel_hmc/densities/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlnapf/kernel_hmc/8ab93ae0470cc5916d5349b40bae7f91075bc385/kernel_hmc/densities/__init__.py -------------------------------------------------------------------------------- /kernel_hmc/densities/banana.py: -------------------------------------------------------------------------------- 1 | from theano import function 2 | import theano 3 | 4 | import numpy as np 5 | import theano.tensor as T 6 | 7 | def log_banana_pdf_theano_expr(x, bananicity, V): 8 | transformed = x.copy() 9 | transformed = T.set_subtensor(transformed[1], x[1] - bananicity * ((x[0] ** 2) - V)) 10 | transformed = T.set_subtensor(transformed[0], x[0] / T.sqrt(V)) 11 | 12 | log_determinant_part = 0. 13 | quadratic_part = -0.5 * transformed.dot(transformed) 14 | const_part = -0.5 * x.shape[0] * np.log(2 * np.pi) 15 | 16 | banana_log_pdf_expr = const_part + log_determinant_part + quadratic_part 17 | return banana_log_pdf_expr 18 | 19 | # build theano functions for log-pdf and gradient 20 | x = T.dvector('x') 21 | bananicity = T.dscalar('bananicity') 22 | V = T.dscalar('V') 23 | banana_log_pdf_expr = log_banana_pdf_theano_expr(x, bananicity, V) 24 | banana_log_pdf_theano = function([x, bananicity, V], banana_log_pdf_expr) 25 | banana_log_pdf_grad_theano = function([x, bananicity, V], theano.gradient.jacobian(banana_log_pdf_expr, x)) 26 | 27 | def log_banana_pdf(x, bananicity=0.03, V=100, compute_grad=False): 28 | if not compute_grad: 29 | return np.float64(banana_log_pdf_theano(x, bananicity, V)) 30 | else: 31 | return np.float64(banana_log_pdf_grad_theano(x, bananicity, V)) 32 | 33 | def sample_banana(N, D, bananicity=0.03, V=100): 34 | X = np.random.randn(N, 2) 35 | X[:, 0] = np.sqrt(V) * X[:, 0] 36 | X[:, 1] = X[:, 1] + bananicity * (X[:, 0] ** 2 - V) 37 | if D > 2: 38 | X = np.hstack((X, np.random.randn(N, D - 2))) 39 | 40 | return X 41 | 42 | class Banana(object): 43 | def __init__(self, D=2, bananicity=0.03, V=100): 44 | self.D = D 45 | self.bananicity = bananicity 46 | self.V = V 47 | 48 | def log_pdf(self, x): 49 | return log_banana_pdf(x, self.bananicity, self.V, compute_grad=False) 50 | 51 | def grad(self, x): 52 | return log_banana_pdf(x, self.bananicity, self.V, compute_grad=True) 53 | 54 | def set_up(self): 55 | pass 56 | -------------------------------------------------------------------------------- /kernel_hmc/densities/gaussian.py: -------------------------------------------------------------------------------- 1 | from scipy.linalg import solve_triangular 2 | 3 | from kernel_hmc.tools.math import qmult 4 | from kernel_hmc.tools.assertions import assert_positive_int 5 | import numpy as np 6 | 7 | 8 | def log_gaussian_pdf(x, mu=None, Sigma=None, is_cholesky=False, compute_grad=False): 9 | if mu is None: 10 | mu = np.zeros(len(x)) 11 | if Sigma is None: 12 | Sigma = np.eye(len(mu)) 13 | 14 | if is_cholesky is False: 15 | L = np.linalg.cholesky(Sigma) 16 | else: 17 | L = Sigma 18 | 19 | assert len(x) == Sigma.shape[0] 20 | assert len(x) == Sigma.shape[1] 21 | assert len(x) == len(mu) 22 | 23 | # solve y=K^(-1)x = L^(-T)L^(-1)x 24 | x = np.array(x - mu) 25 | y = solve_triangular(L, x.T, lower=True) 26 | y = solve_triangular(L.T, y, lower=False) 27 | 28 | if not compute_grad: 29 | log_determinant_part = -np.sum(np.log(np.diag(L))) 30 | quadratic_part = -0.5 * x.dot(y) 31 | const_part = -0.5 * len(L) * np.log(2 * np.pi) 32 | 33 | return const_part + log_determinant_part + quadratic_part 34 | else: 35 | return -y 36 | 37 | def sample_gaussian(N, mu=np.zeros(2), Sigma=np.eye(2), is_cholesky=False): 38 | D = len(mu) 39 | assert len(mu.shape) == 1 40 | assert len(Sigma.shape) == 2 41 | assert D == Sigma.shape[0] 42 | assert D == Sigma.shape[1] 43 | 44 | if is_cholesky is False: 45 | L = np.linalg.cholesky(Sigma) 46 | else: 47 | L = Sigma 48 | 49 | return L.dot(np.random.randn(D, N)).T + mu 50 | 51 | class GaussianBase(object): 52 | def __init__(self, D=1): 53 | assert_positive_int(D) 54 | self.D = D 55 | 56 | def log_pdf(self, x): 57 | raise NotImplementedError() 58 | 59 | def grad(self, x): 60 | raise NotImplementedError() 61 | 62 | def sample(self): 63 | raise NotImplementedError() 64 | 65 | class IsotropicZeroMeanGaussian(GaussianBase): 66 | def __init__(self, sigma=1., D=1): 67 | self.sigma = sigma 68 | GaussianBase.__init__(self, D) 69 | 70 | def log_pdf(self, x): 71 | D = len(x) 72 | const_part = -0.5 * D * np.log(2 * np.pi) 73 | quadratic_part = -np.dot(x, x) / (2 * (self.sigma ** 2)) 74 | log_determinant_part = -D * np.log(self.sigma) 75 | return const_part + log_determinant_part + quadratic_part 76 | 77 | def grad(self, x): 78 | return -x / (self.sigma ** 2) 79 | 80 | def sample(self): 81 | return np.random.randn(self.D) * self.sigma 82 | 83 | class GammaEigenvalueRotatedGaussian(GaussianBase): 84 | def __init__(self, gamma_shape=1., D=1): 85 | GaussianBase.__init__(self, D) 86 | 87 | # place a gamma on the Eigenvalues of a Gaussian covariance 88 | EVs = np.random.gamma(shape=gamma_shape, size=D) 89 | 90 | # random orthogonal matrix to rotate 91 | Q = qmult(np.eye(D)) 92 | Sigma = Q.T.dot(np.diag(EVs)).dot(Q) 93 | 94 | # Cholesky of random covariance 95 | self.L = np.linalg.cholesky(Sigma) 96 | 97 | def log_pdf(self, x): 98 | return log_gaussian_pdf(x, Sigma=self.L, is_cholesky=True, compute_grad=False) 99 | 100 | def grad(self, x): 101 | return log_gaussian_pdf(x, Sigma=self.L, is_cholesky=True, compute_grad=True) 102 | 103 | def sample(self, N): 104 | return sample_gaussian(N=N, mu=np.zeros(self.D), Sigma=self.L, is_cholesky=True) 105 | -------------------------------------------------------------------------------- /kernel_hmc/densities/posterior_gp_classification_ard.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib 3 | 4 | from kernel_hmc.densities.gaussian import log_gaussian_pdf 5 | from kernel_hmc.tools.file import sha1sum 6 | from kernel_hmc.tools.log import logger 7 | try: 8 | import modshogun as sg 9 | except ImportError: 10 | import shogun as sg 11 | import numpy as np 12 | import scipy as sp 13 | 14 | 15 | class PseudoMarginalHyperparameters(object): 16 | """ 17 | Class to represent a GP's marginal posterior distribution of hyperparameters 18 | 19 | p(theta|y) \propto p(y|theta) p(theta) 20 | 21 | as an MCMC target. The p(y|theta) function is an unbiased estimate. 22 | Hyperparameters are the length scales of a Gaussian ARD kernel. 23 | 24 | Uses the Shogun machine learning toolbox for GP inference. 25 | """ 26 | def __init__(self, X, y, n_importance, prior_log_pdf, ridge=0., num_shogun_threads=1): 27 | self.n_importance = n_importance 28 | self.prior_log_pdf = prior_log_pdf 29 | self.ridge = ridge 30 | self.X = X 31 | self.y = y 32 | 33 | self.num_shogun_threads = num_shogun_threads 34 | 35 | # tell shogun to use 1 thread only 36 | logger.debug("Using Shogun with %d threads" % self.num_shogun_threads) 37 | sg.ZeroMean().parallel.set_num_threads(self.num_shogun_threads) 38 | 39 | # shogun representation of data 40 | self.sg_labels = sg.BinaryLabels(self.y) 41 | self.sg_feats_train = sg.RealFeatures(self.X.T) 42 | 43 | # ARD: set theta, which is in log-scale, as kernel weights 44 | D = X.shape[1] 45 | theta_start = np.ones(D) 46 | 47 | self.sg_mean = sg.ZeroMean() 48 | self.sg_likelihood = sg.LogitLikelihood() 49 | 50 | def log_pdf(self, theta): 51 | self.sg_kernel = sg.GaussianARDKernel() 52 | exp_theta = np.exp(theta) 53 | if np.any(exp_theta<=0): 54 | exp_theta[exp_theta<=0]=np.finfo('d').eps 55 | self.sg_kernel.set_vector_weights(exp_theta) 56 | inference = sg.EPInferenceMethod( 57 | # inference=sg.SingleLaplacianInferenceMethod( 58 | self.sg_kernel, 59 | self.sg_feats_train, 60 | self.sg_mean, 61 | self.sg_labels, 62 | self.sg_likelihood) 63 | 64 | # fix kernel scaling for now 65 | inference.set_scale(1.) 66 | 67 | log_ml_estimate = inference.get_marginal_likelihood_estimate(self.n_importance, self.ridge) 68 | 69 | # prior is also in log-domain, so no exp of theta 70 | log_prior = self.prior_log_pdf(theta) 71 | result = log_ml_estimate + log_prior 72 | 73 | return result 74 | 75 | def log_prior_log_pdf(x): 76 | D = len(x) 77 | return log_gaussian_pdf(x, mu=0.*np.ones(D), Sigma=np.eye(D) * 5) 78 | 79 | class GlassPosterior(object): 80 | def __init__(self, n_importance=100, ridge=1e-3, prior_log_pdf=log_prior_log_pdf): 81 | self.n_importance = n_importance 82 | self.ridge = ridge 83 | self.prior_log_pdf = prior_log_pdf 84 | 85 | @staticmethod 86 | def _load_glass_data(data_dir=os.sep.join([os.path.expanduser('~'), "data"])): 87 | filename = os.sep.join([data_dir, "glass.data"]) 88 | 89 | try: 90 | data = np.loadtxt(filename, delimiter=",") 91 | except IOError: 92 | # make sure dir exists 93 | try: 94 | os.makedirs(data_dir) 95 | except OSError: 96 | pass 97 | 98 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data" 99 | logger.warning("%s not found. Trying to download from %s" % (filename, url)) 100 | urllib.urlretrieve (url, filename) 101 | 102 | # try again 103 | try: 104 | data = np.loadtxt(filename, delimiter=",") 105 | except IOError: 106 | raise RuntimeError("Download failed. Please download manually.") 107 | 108 | # make sure file is as expected 109 | s_reference = "eb292f3709b6fbbeb18a34f95e2293470cbe58ed" 110 | logger.info("Asserting sha1sum(%s)==%s" % (filename, s_reference)) 111 | s = sha1sum(filename) 112 | if s != s_reference: 113 | raise RuntimeError("sha1sum(%s) is %s while reference is %s" % (filename,s, s_reference)) 114 | 115 | 116 | # create a binary "window glass" vs "non-window glass" labelling 117 | lab = data[:, -1] 118 | lab = np.array([1. if x <= 4 else -1.0 for x in lab]) 119 | 120 | # cut off ids and labeling 121 | data = data[:, 1:-1] 122 | 123 | return data, lab 124 | 125 | def set_up(self): 126 | # load data using kameleon-mcmc code 127 | logger.info("Loading data") 128 | X, y = GlassPosterior._load_glass_data() 129 | 130 | # normalise and whiten dataset, as done in kameleon-mcmc code 131 | logger.info("Whitening data") 132 | X -= np.mean(X, 0) 133 | L = np.linalg.cholesky(np.cov(X.T)) 134 | X = sp.linalg.solve_triangular(L, X.T, lower=True).T 135 | 136 | # build target, as in kameleon-mcmc code 137 | self.gp_posterior = PseudoMarginalHyperparameters(X, y, 138 | self.n_importance, 139 | self.prior_log_pdf, 140 | self.ridge, 141 | num_shogun_threads=1) 142 | 143 | def log_pdf(self, theta): 144 | if not hasattr(self, "gp_posterior"): 145 | raise RuntimeError("Call set_up method first.") 146 | 147 | return self.gp_posterior.log_pdf(theta) 148 | -------------------------------------------------------------------------------- /kernel_hmc/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlnapf/kernel_hmc/8ab93ae0470cc5916d5349b40bae7f91075bc385/kernel_hmc/examples/__init__.py -------------------------------------------------------------------------------- /kernel_hmc/examples/demo_mcmc_gp_glass.py: -------------------------------------------------------------------------------- 1 | from kernel_exp_family.estimators.lite.gaussian import KernelExpLiteGaussianAdaptive 2 | from kernel_hmc.densities.gaussian import IsotropicZeroMeanGaussian 3 | from kernel_hmc.examples.demo_mcmc_kmc_static import visualise_trace 4 | from kernel_hmc.mini_mcmc.mini_mcmc import mini_mcmc 5 | from kernel_hmc.proposals.base import standard_sqrt_schedule 6 | from kernel_hmc.proposals.kmc import KMC 7 | from kernel_hmc.proposals.metropolis import AdaptiveMetropolis,\ 8 | StandardMetropolis, KernelAdaptiveMetropolis 9 | from kernel_hmc.tools.log import Log 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | 13 | 14 | logger = Log.get_logger() 15 | 16 | # depends on optional dependency shogun 17 | try: 18 | from kernel_hmc.densities.posterior_gp_classification_ard import GlassPosterior 19 | glass_available = True 20 | except ImportError: 21 | logger.warning("Optional dependency Shogun not available, using Gaussian instead.") 22 | glass_available = False 23 | 24 | def get_am_instance(target): 25 | # adaptive version that tunes itself towards the "optimal" acceptance rate 26 | # set schedule=None for completely non-adaptive version 27 | step_size = 1. 28 | gamma2 = 0.1 29 | schedule = standard_sqrt_schedule 30 | acc_star = 0.234 31 | am = AdaptiveMetropolis(target, D, step_size, gamma2, schedule, acc_star) 32 | 33 | return am 34 | 35 | def get_mh_instance(target): 36 | # adaptive version that tunes itself towards the "optimal" acceptance rate 37 | step_size = 0.7 38 | schedule = standard_sqrt_schedule 39 | acc_star = 0.234 40 | mh = StandardMetropolis(target, D, step_size, schedule, acc_star) 41 | 42 | return mh 43 | 44 | def get_kam_instance(target): 45 | # adaptive version that tunes itself towards the "optimal" acceptance rate 46 | step_size = 1. 47 | gamma2 = 0.1 48 | schedule = standard_sqrt_schedule 49 | acc_star = 0.234 50 | kernel_sigma = 1. 51 | 52 | return KernelAdaptiveMetropolis(target, D, N=200, kernel_sigma=kernel_sigma, 53 | gamma2=gamma2, step_size=step_size, 54 | adaptation_schedule=schedule, acc_star=acc_star) 55 | 56 | def get_kmc_instance(target): 57 | step_size_min = 0.01 58 | step_size_max = 0.1 59 | num_steps_min = 1 60 | num_steps_max = 10 61 | momentum = IsotropicZeroMeanGaussian(D=D) 62 | schedule = standard_sqrt_schedule 63 | acc_star = 0.7 64 | 65 | # fully automatic parameter tuning in every fit call 66 | surrogate = KernelExpLiteGaussianAdaptive(sigma=1., lmbda=.001, D=D, N=200, 67 | n_initial=3, n_iter=3, minimum_size_learning=100, 68 | n_initial_relearn=3, n_iter_relearn=3, 69 | param_bounds={'sigma': [-3, 3]} 70 | ) 71 | 72 | return KMC(surrogate, target, 73 | momentum, num_steps_min, num_steps_max, step_size_min, step_size_max, 74 | adaptation_schedule=schedule, acc_star=acc_star) 75 | 76 | if __name__ == '__main__': 77 | """ 78 | This example samples from the marginal posterior over hyper-parameters of a 79 | Gaussian Process classification model. 80 | 81 | All samplers in the paper are used. 82 | 83 | Note this is an illustrative demo and the number of iterations are set very low. 84 | """ 85 | 86 | # Glass posterior has 9 dimensions 87 | D = 9 88 | if glass_available: 89 | target = GlassPosterior() 90 | target.set_up() 91 | else: 92 | target = IsotropicZeroMeanGaussian(D=D) 93 | 94 | # transition kernel, pick any 95 | samplers = [ 96 | get_am_instance(target), 97 | get_mh_instance(target), 98 | get_kam_instance(target), 99 | get_kmc_instance(target) 100 | ] 101 | 102 | for sampler in samplers: 103 | 104 | # MCMC parameters 105 | # small number of iterations here to keep runtime short, feel free to increase 106 | start = np.zeros(D) 107 | num_iter = 50 108 | 109 | # run MCMC 110 | samples, proposals, accepted, acc_prob, log_pdf, times, step_sizes = mini_mcmc(sampler, start, num_iter, D) 111 | 112 | visualise_trace(samples, log_pdf, accepted, step_sizes, idx0=1, idx1=6) 113 | 114 | plt.suptitle("%s on %s, acceptance rate: %.2f" % \ 115 | (sampler.__class__.__name__, target.__class__.__name__, np.mean(accepted))) 116 | 117 | plt.show() 118 | -------------------------------------------------------------------------------- /kernel_hmc/examples/demo_mcmc_kmc_adaptive.py: -------------------------------------------------------------------------------- 1 | from kernel_exp_family.estimators.finite.gaussian import KernelExpFiniteGaussian 2 | from kernel_exp_family.estimators.lite.gaussian import KernelExpLiteGaussianAdaptive 3 | from kernel_hmc.densities.gaussian import IsotropicZeroMeanGaussian 4 | from kernel_hmc.examples.plotting import visualise_trace 5 | from kernel_hmc.mini_mcmc.mini_mcmc import mini_mcmc 6 | from kernel_hmc.proposals.kmc import KMC 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | from kernel_exp_family.estimators.parameter_search_bo import BayesOptSearch 10 | 11 | 12 | # banana gradient depends on theano, which is an optional dependency 13 | try: 14 | from kernel_hmc.densities.banana import Banana 15 | banana_available = True 16 | except ImportError: 17 | banana_available = False 18 | 19 | if __name__ == '__main__': 20 | """ 21 | This example samples from the Banana target (if theano is installed). 22 | It uses here uses an adaptive instance of KMC lite to start with, then (optionally) 23 | switches to KMC finite using the KMC exploration as initial sketch of the target 24 | 25 | Note this is an illustrative demo and the number of iterations are set very low. 26 | """ 27 | D = 2 28 | N = 500 29 | 30 | # target is banana density, with fallback option 31 | if banana_available: 32 | target = Banana(D=D) 33 | else: 34 | target = IsotropicZeroMeanGaussian(D=D) 35 | 36 | # KMC lite is geometrically ergodic on this target, use it if nothing about target is known 37 | # KMC finite can be used after burn-in, i.e. if some oracle samples are available 38 | # see below 39 | # this surrogate automatically learns parameters in every fit call 40 | surrogate = KernelExpLiteGaussianAdaptive(sigma=20., lmbda=0.001, D=D, N=N) 41 | 42 | # HMC parameters, step size will be adapted 43 | momentum = IsotropicZeroMeanGaussian(D=D) 44 | num_steps_min = 10 45 | num_steps_max = 50 46 | step_size_min = .1 47 | step_size_max = .1 48 | 49 | 50 | # kmc sampler instance, schedule here also controls updating the surrogate 51 | # this is a very liberate schedule, i.e. constant adaptation 52 | # necessary if KMC is not initialised with oracle samples 53 | schedule = lambda t: 0.001 54 | acc_star = 0.7 55 | kmc = KMC(surrogate, target, 56 | momentum, num_steps_min, num_steps_max, step_size_min, step_size_max, 57 | schedule, acc_star) 58 | 59 | # MCMC parameters 60 | # set to around 5000-10000 iterations to have KMC lite explored all of the support 61 | start = np.zeros(D) 62 | start[1] = -3 63 | num_iter = 500 64 | 65 | # run MCMC 66 | samples, proposals, accepted, acc_prob, log_pdf, times, step_sizes = mini_mcmc(kmc, start, num_iter, D) 67 | 68 | visualise_trace(samples, log_pdf, accepted, log_pdf_density=surrogate, step_sizes=step_sizes) 69 | plt.suptitle("KMC lite %s, acceptance rate: %.2f" % \ 70 | (surrogate.__class__.__name__, np.mean(accepted))) 71 | 72 | # now initialise KMC finite with the samples from the surrogate, and run for more 73 | # learn parameters before starting 74 | thinned = samples[np.random.permutation(len(samples))[:N]] 75 | surrogate2 = KernelExpFiniteGaussian(sigma=2, lmbda=0.001, D=D, m=N) 76 | surrogate2.set_parameters_from_dict(BayesOptSearch(surrogate2, thinned, {'sigma': [-3,3]}).optimize(3)) 77 | surrogate2.fit(thinned) 78 | 79 | # now use conservative schedule, or None at all if confident in oracle samples 80 | schedule2 = lambda t: 0.01 if t < 3000 else 0. 81 | kmc2 = KMC(surrogate2, target, 82 | momentum, kmc.num_steps_min, kmc.num_steps_max, kmc.step_size[0], kmc.step_size[1], 83 | schedule2, acc_star) 84 | 85 | # run MCMC 86 | samples2, proposals2, accepted2, acc_prob2, log_pdf2, times2, step_sizes = mini_mcmc(kmc2, start, num_iter, D) 87 | visualise_trace(samples2, log_pdf2, accepted2, log_pdf_density=surrogate2, step_sizes=step_sizes) 88 | plt.suptitle("KMC finite, %s, acceptance rate: %.2f" % \ 89 | (surrogate.__class__.__name__, np.mean(accepted2))) 90 | plt.show() -------------------------------------------------------------------------------- /kernel_hmc/examples/demo_mcmc_kmc_static.py: -------------------------------------------------------------------------------- 1 | from kernel_exp_family.estimators.finite.gaussian import KernelExpFiniteGaussian 2 | from kernel_exp_family.estimators.lite.gaussian import KernelExpLiteGaussian 3 | from kernel_exp_family.estimators.lite.gaussian_low_rank import KernelExpLiteGaussianLowRank 4 | from kernel_hmc.densities.gaussian import IsotropicZeroMeanGaussian, \ 5 | sample_gaussian 6 | from kernel_hmc.examples.plotting import visualise_trace 7 | from kernel_hmc.mini_mcmc.mini_mcmc import mini_mcmc 8 | from kernel_hmc.proposals.base import standard_sqrt_schedule 9 | from kernel_hmc.proposals.kmc import KMCStatic 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | 13 | 14 | # banana gradient depends on theano, which is an optional dependency 15 | try: 16 | from kernel_hmc.densities.banana import Banana, sample_banana 17 | banana_available = True 18 | except ImportError: 19 | banana_available = False 20 | 21 | if __name__ == '__main__': 22 | """ 23 | This example samples from the Banana target (if theano is installed). 24 | It uses a fixed instance of KMC that receives a number of oracle samples as 25 | input. 26 | 27 | Note this is an illustrative demo and the number of iterations are set very low. 28 | """ 29 | # possible to change 30 | # for D=2, the fitted log-density is plotted, otherwise trajectory only 31 | D = 2 32 | N = 1000 33 | 34 | # target is banana density, fallback to Gaussian if theano is not present 35 | if banana_available: 36 | target = Banana(D=D) 37 | X = sample_banana(N, D) 38 | else: 39 | target = IsotropicZeroMeanGaussian(D=D) 40 | X = sample_gaussian(N=N) 41 | 42 | # plot trajectories for both KMC lite and finite, parameters are chosen for D=2 43 | for surrogate in [ 44 | KernelExpFiniteGaussian(sigma=2, lmbda=0.001, m=N, D=D), 45 | KernelExpLiteGaussian(sigma=20., lmbda=0.001, D=D, N=N), 46 | KernelExpLiteGaussianLowRank(sigma=20, lmbda=0.1, D=D, N=N, cg_tol=0.01), 47 | ]: 48 | # try uncommenting this line to illustrate KMC's ability to mix even 49 | # when no (or incomplete) samples from the target are available 50 | surrogate.fit(X) 51 | 52 | # HMC parameters, fixed here, use oracle mean variance to set momentum 53 | momentum = IsotropicZeroMeanGaussian(D=D, sigma=np.sqrt(np.mean(np.var(X, 0)))) 54 | num_steps_min = 10 55 | num_steps_max = 50 56 | step_size_min = 1. 57 | step_size_max = 1. 58 | 59 | # kmc sampler instance 60 | # note that this version still adapts the step size towards certain acceptance 61 | # set schedule=None to avoid this; other schedules are possible 62 | # the sqrt schedule is very conservative 63 | schedule = standard_sqrt_schedule 64 | acc_star = 0.7 65 | kmc = KMCStatic(surrogate, target, momentum, num_steps_min, num_steps_max, step_size_min, step_size_max, 66 | adaptation_schedule=schedule, acc_star=acc_star) 67 | 68 | # MCMC parameters, feel free to increase number of iterations 69 | start = X[0] 70 | num_iter = 500 71 | 72 | # run MCMC 73 | samples, proposals, accepted, acc_prob, log_pdf, times, step_sizes = mini_mcmc(kmc, start, num_iter, D) 74 | 75 | visualise_trace(samples, log_pdf, accepted, log_pdf_density=surrogate, step_sizes=step_sizes) 76 | plt.suptitle("%s, acceptance rate: %.2f" % \ 77 | (surrogate.__class__.__name__, np.mean(accepted))) 78 | 79 | plt.show() 80 | -------------------------------------------------------------------------------- /kernel_hmc/examples/demo_mcmc_other_metropolis.py: -------------------------------------------------------------------------------- 1 | from kernel_hmc.densities.gaussian import IsotropicZeroMeanGaussian 2 | from kernel_hmc.examples.plotting import visualise_trace 3 | from kernel_hmc.mini_mcmc.mini_mcmc import mini_mcmc 4 | from kernel_hmc.proposals.metropolis import StandardMetropolis,\ 5 | AdaptiveMetropolis, KernelAdaptiveMetropolis 6 | from kernel_hmc.tools.log import Log 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | 10 | 11 | # banana gradient depends on theano, which is an optional dependency 12 | try: 13 | from kernel_hmc.densities.banana import Banana 14 | banana_available = True 15 | except ImportError: 16 | banana_available = False 17 | 18 | Log.set_loglevel(20) 19 | 20 | if __name__ == '__main__': 21 | """ 22 | This example shows how to run all Metropolis-Hastings sampler (including 23 | the Kernel Adaptive Metropolis-Hastings) from the paper on a simple target. 24 | """ 25 | # possible to change 26 | D = 2 27 | N = 1000 28 | 29 | # target is banana density, fallback to Gaussian if theano is not present 30 | if banana_available: 31 | target = Banana(D=D) 32 | else: 33 | target = IsotropicZeroMeanGaussian(D=D) 34 | 35 | samplers = [ 36 | StandardMetropolis(target, D), 37 | AdaptiveMetropolis(target, D), 38 | KernelAdaptiveMetropolis(target, D, N=200) 39 | 40 | ] 41 | 42 | for sampler in samplers: 43 | # MCMC parameters, feel free to increase number of iterations 44 | start = np.zeros(D) 45 | num_iter = 1000 46 | 47 | # run MCMC 48 | samples, proposals, accepted, acc_prob, log_pdf, times, step_sizes = mini_mcmc(sampler, start, num_iter, D) 49 | 50 | visualise_trace(samples, log_pdf, accepted, step_sizes) 51 | plt.suptitle("%s, acceptance rate: %.2f" % \ 52 | (sampler.__class__.__name__, np.mean(accepted))) 53 | 54 | plt.show() 55 | -------------------------------------------------------------------------------- /kernel_hmc/examples/demo_trajectories.py: -------------------------------------------------------------------------------- 1 | from kernel_exp_family.estimators.finite.gaussian import KernelExpFiniteGaussian 2 | from kernel_exp_family.estimators.lite.gaussian import KernelExpLiteGaussian 3 | from kernel_exp_family.estimators.lite.gaussian_low_rank import KernelExpLiteGaussianLowRank 4 | from kernel_hmc.densities.gaussian import IsotropicZeroMeanGaussian,\ 5 | sample_gaussian 6 | from kernel_hmc.examples.plotting import visualise_trajectory 7 | from kernel_hmc.proposals.kmc import KMCStatic 8 | import matplotlib.pyplot as plt 9 | 10 | 11 | # banana gradient depends on theano, which is an optional dependency 12 | try: 13 | from kernel_hmc.densities.banana import Banana, sample_banana 14 | banana_available = True 15 | except ImportError: 16 | banana_available = False 17 | 18 | if __name__ == '__main__': 19 | """ 20 | Example that visualises trajectories of KMC lite and finite on a simple target. 21 | C.f. Figures 1 and 2 in the paper. 22 | """ 23 | 24 | # for D=2, the fitted log-density is plotted, otherwise trajectory only 25 | D = 2 26 | N = 1000 27 | 28 | # target is banana density, fallback to Gaussian if theano is not present 29 | if banana_available: 30 | target = Banana() 31 | X = sample_banana(N, D) 32 | else: 33 | target = IsotropicZeroMeanGaussian(D=D) 34 | X = sample_gaussian(N=N) 35 | 36 | # plot trajectories for both KMC lite and finite, parameters are chosen for D=2 37 | for surrogate in [ 38 | KernelExpFiniteGaussian(sigma=10, lmbda=0.001, m=N, D=D), 39 | KernelExpLiteGaussian(sigma=20, lmbda=0.001, D=D, N=N), 40 | KernelExpLiteGaussianLowRank(sigma=20, lmbda=0.1, D=D, N=N, cg_tol=0.01), 41 | 42 | ]: 43 | surrogate.fit(X) 44 | 45 | 46 | # HMC parameters 47 | momentum = IsotropicZeroMeanGaussian(D=D, sigma=.1) 48 | num_steps = 1000 49 | step_size = .01 50 | 51 | # kmc sampler instance 52 | kmc = KMCStatic(surrogate, target, momentum, num_steps, num_steps, step_size, step_size) 53 | 54 | # simulate trajectory from starting point, note _proposal_trajectory is a "hidden" method 55 | current = X[0] 56 | current_log_pdf = target.log_pdf(current) 57 | Qs, acc_probs, log_pdf_q = kmc._proposal_trajectory(current, current_log_pdf) 58 | 59 | visualise_trajectory(Qs, acc_probs, log_pdf_q, D, surrogate) 60 | plt.suptitle("%s" % surrogate.__class__.__name__) 61 | 62 | plt.show() 63 | -------------------------------------------------------------------------------- /kernel_hmc/examples/plotting.py: -------------------------------------------------------------------------------- 1 | from kernel_exp_family.examples.tools import pdf_grid, visualise_array 2 | from kernel_hmc.tools.mcmc_convergence import autocorr 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | def visualise_trajectory(Qs, acc_probs, log_pdf_q, D, log_pdf=None): 7 | assert Qs.ndim == 2 8 | 9 | plot_density = log_pdf is not None and D==2 10 | 11 | plt.figure(figsize=(10, 12)) 12 | plt.subplot(411) 13 | 14 | # plot density if given and dimension is 2 15 | if plot_density: 16 | Xs = np.linspace(-30, 30, 75) 17 | Ys = np.linspace(-10, 20, len(Xs)) 18 | D, G = pdf_grid(Xs, Ys, log_pdf) 19 | visualise_array(Xs, Ys, D) 20 | 21 | plt.plot(Qs[:, 0], Qs[:, 1]) 22 | plt.plot(Qs[0, 0], Qs[0, 1], 'r*', markersize=15) 23 | plt.title("Log-pdf surrogate") 24 | 25 | plt.subplot(412) 26 | if plot_density: 27 | visualise_array(Xs, Ys, G) 28 | plt.plot(Qs[:, 0], Qs[:, 1]) 29 | plt.plot(Qs[0, 0], Qs[0, 1], 'r*', markersize=15) 30 | plt.title("Gradient norm surrogate") 31 | 32 | plt.subplot(413) 33 | plt.title("Acceptance probability") 34 | plt.xlabel("Leap frog iteration") 35 | plt.plot(acc_probs) 36 | plt.plot([0, len(acc_probs)], [np.mean(acc_probs) for _ in range(2)], 'r--') 37 | plt.xlim([0, len(acc_probs)]) 38 | 39 | plt.subplot(414) 40 | plt.title("Target log-pdf") 41 | plt.xlabel("Leap frog iteration") 42 | plt.plot(log_pdf_q) 43 | plt.xlim([0, len(log_pdf_q)]) 44 | 45 | def visualise_trace(samples, log_pdf_trajectory, accepted, step_sizes=None, log_pdf_density=None, idx0=0, idx1=1): 46 | assert samples.ndim == 2 47 | 48 | D = samples.shape[1] 49 | 50 | plt.figure(figsize=(15, 12)) 51 | 52 | plt.subplot(421) 53 | plt.plot(samples[:, idx0]) 54 | plt.title("Trace $x_%d$" % (idx0+1)) 55 | plt.xlabel("MCMC iteration") 56 | plt.grid(True) 57 | 58 | plt.subplot(422) 59 | plt.plot(samples[:, idx1]) 60 | plt.title("Trace $x_%d$" % (idx1+1)) 61 | plt.xlabel("MCMC iteration") 62 | plt.grid(True) 63 | 64 | plt.subplot(423) 65 | if not log_pdf_density is None and D == 2: 66 | Xs = np.linspace(-28, 28, 50) 67 | Ys = np.linspace(-6, 16, len(Xs)) 68 | D, _ = pdf_grid(Xs, Ys, log_pdf_density) 69 | visualise_array(Xs, Ys, D) 70 | 71 | plt.plot(samples[:, idx0], samples[:, idx1]) 72 | plt.title("Trace $(x_%d, x_%d)$" % (idx0+1, idx1+1)) 73 | plt.grid(True) 74 | plt.xlabel("$x_%d$" % (idx0+1)) 75 | plt.ylabel("$x_%d$" % (idx1+1)) 76 | 77 | plt.subplot(424) 78 | plt.plot(log_pdf_trajectory) 79 | plt.title("log pdf along trajectory") 80 | plt.xlabel("MCMC iteration") 81 | plt.grid(True) 82 | 83 | plt.subplot(425) 84 | plt.plot(autocorr(samples[:, idx0])) 85 | plt.title("Autocorrelation $x_%d$" % (idx0+1)) 86 | plt.xlabel("Lag") 87 | plt.grid(True) 88 | 89 | plt.subplot(426) 90 | plt.plot(autocorr(samples[:, idx1])) 91 | plt.title("Autocorrelation $x_%d$" % (idx1+1)) 92 | plt.xlabel("Lag") 93 | plt.grid(True) 94 | 95 | plt.subplot(427) 96 | plt.plot(np.cumsum(accepted) / np.arange(1, len(accepted)+1)) 97 | plt.title("Average acceptance rate") 98 | plt.xlabel("MCMC iterations") 99 | plt.grid(True) 100 | 101 | if step_sizes is not None: 102 | plt.subplot(428) 103 | if step_sizes.ndim>1: 104 | for i in range(step_sizes.shape[1]): 105 | plt.plot(step_sizes[:,i]) 106 | plt.title("Step sizes") 107 | else: 108 | plt.plot(step_sizes) 109 | plt.title("Step size") 110 | 111 | plt.xlabel("MCMC iterations") 112 | plt.grid(True) 113 | -------------------------------------------------------------------------------- /kernel_hmc/examples/run_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Runs all example python script in the current folder, 4 | # prepending a matplotlib backend, such that plotting 5 | # code in the example doesn't block the script, or fail 6 | # the execution on servers without DISPLAY 7 | 8 | # create temp file for all example scripts 9 | LIST=$(mktemp /tmp/kernel_hmc_examples.XXXXXXXXXX) || { echo "Failed to create temp file"; exit 1; } 10 | 11 | # find all example scripts 12 | find . -type f -name 'demo_*.py' ! -name '__init__.py' > $LIST 13 | 14 | # iterate over all scripts 15 | while read name 16 | do 17 | # prepend matplotlib backend that does not block 18 | echo "import matplotlib; matplotlib.use('Agg')\n" | cat - "$name" > "$name"_with_header 19 | 20 | # run 21 | echo Running example "$name" 22 | python "$name"_with_header > /dev/null 23 | 24 | # store return code 25 | retval=$? 26 | 27 | # clean up 28 | rm "$name"_with_header 29 | 30 | if [ $retval -ne 0 ]; then 31 | echo "Example $name failed with error code $retval." 32 | exit 1 33 | fi 34 | done < $LIST 35 | 36 | # clean up 37 | rm $LIST 38 | -------------------------------------------------------------------------------- /kernel_hmc/hamiltonian/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlnapf/kernel_hmc/8ab93ae0470cc5916d5349b40bae7f91075bc385/kernel_hmc/hamiltonian/__init__.py -------------------------------------------------------------------------------- /kernel_hmc/hamiltonian/hamiltonian.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def compute_hamiltonian(Qs, Ps, logq, logp): 4 | assert len(Ps) == len(Qs) 5 | return np.asarray([-logq(Qs[i]) - logp(Ps[i]) for i in range(len(Qs))]) 6 | 7 | def compute_log_accept_pr(q0, p0, Qs, Ps, logq, logp): 8 | H0 = compute_hamiltonian(q0[np.newaxis, :], p0[np.newaxis, :], logq, logp) 9 | H = compute_hamiltonian(Qs, Ps, logq, logp) 10 | 11 | return np.minimum(np.zeros(H.shape), -H + H0) 12 | 13 | def compute_log_accept_pr_single(q0, p0, q, p, logq, logp): 14 | H0 = compute_hamiltonian(q0[np.newaxis, :], p0[np.newaxis, :], logq, logp)[0] 15 | H = compute_hamiltonian(q[np.newaxis, :], p[np.newaxis, :], logq, logp)[0] 16 | return np.minimum(0., -H + H0) 17 | -------------------------------------------------------------------------------- /kernel_hmc/hamiltonian/leapfrog.py: -------------------------------------------------------------------------------- 1 | from kernel_hmc.densities.gaussian import sample_gaussian 2 | import numpy as np 3 | 4 | 5 | def leapfrog(q, dlogq, p, dlogp, step_size=0.3, num_steps=1): 6 | # for storing trajectory 7 | Ps = np.zeros((num_steps + 1, len(p))) 8 | Qs = np.zeros(Ps.shape) 9 | 10 | # create copy of state 11 | p = np.array(p.copy()) 12 | q = np.array(q.copy()) 13 | Ps[0] = p 14 | Qs[0] = q 15 | 16 | # half momentum update 17 | p = p - (step_size / 2) * -dlogq(q) 18 | 19 | # alternate full variable and momentum updates 20 | for i in range(num_steps): 21 | q = q + step_size * -dlogp(p) 22 | Qs[i + 1] = q 23 | 24 | # precompute since used for two half-steps 25 | dlogq_eval = dlogq(q) 26 | 27 | # first half momentum update 28 | p = p - (step_size / 2) * -dlogq_eval 29 | 30 | # store p as now fully updated 31 | Ps[i + 1] = p 32 | 33 | # second half momentum update 34 | if i != num_steps - 1: 35 | p = p - (step_size / 2) * -dlogq_eval 36 | 37 | return Qs, Ps 38 | 39 | def leapfrog_no_storing(q, dlogq, p, dlogp, step_size=0.3, num_steps=1): 40 | # create copy of state 41 | p = np.array(p.copy()) 42 | q = np.array(q.copy()) 43 | 44 | # half momentum update 45 | p = p - (step_size / 2) * -dlogq(q) 46 | 47 | # alternate full variable and momentum updates 48 | for i in range(num_steps): 49 | q = q + step_size * -dlogp(p) 50 | 51 | # precompute since used for two half-steps 52 | dlogq_eval = dlogq(q) 53 | 54 | # first half momentum update 55 | p = p - (step_size / 2) * -dlogq_eval 56 | 57 | # second half momentum update 58 | if i != num_steps - 1: 59 | p = p - (step_size / 2) * -dlogq_eval 60 | 61 | return q, p 62 | 63 | def leapfrog_friction_habc_no_storing(c, V, q, dlogq, p, dlogp, step_size=0.3, num_steps=1): 64 | """ 65 | MATLAB code by Chen et al 66 | 67 | function [ newx ] = sghmc( U, gradU, m, dt, nstep, x, C, V ) 68 | %% SGHMC using gradU, for nstep, starting at position x 69 | 70 | p = randn( size(x) ) * sqrt( m ); 71 | B = 0.5 * V * dt; 72 | D = sqrt( 2 * (C-B) * dt ); 73 | 74 | for i = 1 : nstep 75 | p = p - gradU( x ) * dt - p * C * dt + randn(1)*D; 76 | x = x + p./m * dt; 77 | end 78 | newx = x; 79 | end 80 | """ 81 | 82 | # friction term (as in HABC) 83 | D = len(q) 84 | B = 0.5 * V * step_size 85 | C = np.eye(D) * c + V 86 | L_friction = np.linalg.cholesky(2 * step_size * (C - B)) 87 | zeros_D = np.zeros(D) 88 | 89 | # create copy of state 90 | p = np.array(p.copy()) 91 | q = np.array(q.copy()) 92 | 93 | # alternate full momentum and variable updates 94 | for _ in range(num_steps): 95 | friction = sample_gaussian(N=1, mu=zeros_D, Sigma=L_friction, is_cholesky=True)[0] 96 | 97 | # just like normal momentum update but with friction 98 | p = p - step_size * -dlogq(q) - step_size * C.dot(-dlogp(p)) + friction 99 | 100 | # normal position update 101 | q = q + step_size * -dlogp(p) 102 | 103 | 104 | return q, p 105 | -------------------------------------------------------------------------------- /kernel_hmc/mini_mcmc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlnapf/kernel_hmc/8ab93ae0470cc5916d5349b40bae7f91075bc385/kernel_hmc/mini_mcmc/__init__.py -------------------------------------------------------------------------------- /kernel_hmc/mini_mcmc/mini_mcmc.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from kernel_hmc.tools.log import Log 4 | import numpy as np 5 | 6 | 7 | logger = Log.get_logger() 8 | 9 | def mini_mcmc(transition_kernel, start, num_iter, D, recompute_log_pdf=False, time_budget=None): 10 | # MCMC results 11 | samples = np.zeros((num_iter, D)) + np.nan 12 | proposals = np.zeros((num_iter, D)) + np.nan 13 | accepted = np.zeros(num_iter) + np.nan 14 | acc_prob = np.zeros(num_iter) + np.nan 15 | log_pdf = np.zeros(num_iter) + np.nan 16 | step_sizes = [] 17 | 18 | # timings for output and time limit 19 | times = np.zeros(num_iter) 20 | last_time_printed = time.time() 21 | 22 | # for adaptive transition kernels 23 | avg_accept = 0. 24 | 25 | # init MCMC (first iteration) 26 | current = start 27 | current_log_pdf = None 28 | 29 | logger.info("Starting MCMC using %s in D=%d dimensions" % \ 30 | (transition_kernel.__class__.__name__, D,)) 31 | 32 | for it in range(num_iter): 33 | times[it] = time.time() 34 | 35 | # stop sampling if time budget exceeded 36 | if time_budget is not None: 37 | if times[it] > times[0] + time_budget: 38 | logger.info("Time limit of %ds exceeded. Stopping MCMC at iteration %d." % (time_budget, it)) 39 | break 40 | 41 | # print chain progress 42 | if times[it] > last_time_printed + 5: 43 | log_str = "MCMC iteration %d/%d, current log_pdf: %.6f, avg acceptance: %.3f" % (it + 1, num_iter, 44 | np.nan if log_pdf[it - 1] is None else log_pdf[it - 1], 45 | avg_accept) 46 | last_time_printed = times[it] 47 | logger.info(log_str) 48 | 49 | # marginal sampler: make transition kernel re-compute log_pdf of current state 50 | if recompute_log_pdf: 51 | current_log_pdf = None 52 | 53 | # generate proposal and acceptance probability 54 | logger.debug("Performing MCMC step") 55 | proposals[it], acc_prob[it], log_pdf_proposal = transition_kernel.proposal(current, current_log_pdf) 56 | 57 | # accept-reject 58 | r = np.random.rand() 59 | accepted[it] = r < acc_prob[it] 60 | 61 | logger.debug("Proposed %s" % str(proposals[it])) 62 | logger.debug("Acceptance prob %.4f" % acc_prob[it]) 63 | logger.debug("Accepted: %d" % accepted[it]) 64 | 65 | 66 | # update running mean according to knuth's stable formula 67 | avg_accept += (accepted[it] - avg_accept) / (it + 1) 68 | 69 | # update state 70 | logger.debug("Updating chain") 71 | if accepted[it]: 72 | current = proposals[it] 73 | current_log_pdf = log_pdf_proposal 74 | 75 | # store sample 76 | samples[it] = current 77 | log_pdf[it] = current_log_pdf 78 | 79 | # update transition kernel, might do nothing 80 | # make all samples and acceptance probabilities available 81 | transition_kernel.update(samples[:(it+1)], acc_prob[:(it+1)]) 82 | 83 | # store step size 84 | step_sizes += [transition_kernel.step_size] 85 | 86 | # recall it might be less than last iterations due to time budget 87 | return samples[:it], proposals[:it], accepted[:it], acc_prob[:it], log_pdf[:it], times[:it], np.array(step_sizes) 88 | -------------------------------------------------------------------------------- /kernel_hmc/proposals/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlnapf/kernel_hmc/8ab93ae0470cc5916d5349b40bae7f91075bc385/kernel_hmc/proposals/__init__.py -------------------------------------------------------------------------------- /kernel_hmc/proposals/base.py: -------------------------------------------------------------------------------- 1 | from kernel_hmc.tools.assertions import assert_implements_log_pdf_and_grad 2 | from kernel_hmc.tools.log import Log 3 | import numpy as np 4 | 5 | 6 | logger = Log.get_logger() 7 | 8 | def standard_sqrt_schedule(t): 9 | return 1. / np.sqrt(t + 1) 10 | 11 | class ProposalBase(object): 12 | def __init__(self, target, D, step_size, adaptation_schedule, acc_star): 13 | self.target = target 14 | self.D = D 15 | self.step_size = step_size 16 | self.adaptation_schedule = adaptation_schedule 17 | self.acc_star = acc_star 18 | 19 | self.t = 0 20 | 21 | # some sanity checks 22 | assert acc_star is None or acc_star > 0 and acc_star < 1 23 | if adaptation_schedule is not None: 24 | lmbdas = np.array([adaptation_schedule(t) for t in np.arange(100)]) 25 | assert np.all(lmbdas >= 0) 26 | assert np.allclose(np.sort(lmbdas)[::-1], lmbdas) 27 | 28 | assert_implements_log_pdf_and_grad(target, assert_grad=False) 29 | 30 | def initialise(self): 31 | pass 32 | 33 | def proposal(self): 34 | pass 35 | 36 | def update(self, samples, acc_probs): 37 | self.t += 1 38 | 39 | previous_accpept_prob = acc_probs[-1] 40 | 41 | if self.adaptation_schedule is not None and self.acc_star is not None: 42 | # always update with weight 43 | lmbda = self.adaptation_schedule(self.t) 44 | self._update_scaling(lmbda, previous_accpept_prob) 45 | 46 | def _update_scaling(self, lmbda, accept_prob): 47 | # difference desired and actuall acceptance rate 48 | diff = accept_prob - self.acc_star 49 | 50 | new_log_step_size = np.log(self.step_size) + lmbda * diff 51 | new_step_size = np.exp(new_log_step_size) 52 | 53 | logger.debug("Acc. prob. diff. was %.3f-%.3f=%.3f. Updating step-size from %s to %s." % \ 54 | (accept_prob, self.acc_star, diff, self.step_size, new_step_size)) 55 | 56 | self.step_size = new_step_size 57 | -------------------------------------------------------------------------------- /kernel_hmc/proposals/hmc.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | 3 | from kernel_hmc.densities.gaussian import GaussianBase 4 | from kernel_hmc.hamiltonian.leapfrog import leapfrog_no_storing, leapfrog 5 | from kernel_hmc.proposals.base import ProposalBase, standard_sqrt_schedule 6 | from kernel_hmc.tools.assertions import assert_positive_int,\ 7 | assert_implements_log_pdf_and_grad, assert_positive_float,\ 8 | assert_inout_log_pdf_and_grad 9 | from kernel_hmc.tools.log import Log 10 | import numpy as np 11 | 12 | 13 | logger = Log.get_logger() 14 | 15 | class HMCBase(ProposalBase): 16 | def __init__(self, target, momentum, num_steps_min=10, num_steps_max=100, step_size_min=0.05, 17 | step_size_max=0.3, adaptation_schedule=standard_sqrt_schedule, acc_star=0.7): 18 | 19 | if not isinstance(momentum, GaussianBase): 20 | raise TypeError("Momentum (%s) must be subclass of %s" % \ 21 | (str(type(momentum)), str(GaussianBase))) 22 | assert_implements_log_pdf_and_grad(target) 23 | assert_implements_log_pdf_and_grad(momentum) 24 | assert_inout_log_pdf_and_grad(target, momentum.D, assert_grad=False) 25 | 26 | assert_positive_int(num_steps_min) 27 | assert_positive_int(num_steps_max) 28 | if not num_steps_min<=num_steps_max: 29 | raise ValueError("Minimum number of leapfrog steps (%d) must be larger than maximum number (%d)." % \ 30 | (num_steps_min, num_steps_max)) 31 | 32 | assert_positive_float(step_size_min) 33 | assert_positive_float(step_size_max) 34 | if not num_steps_min<=num_steps_max: 35 | raise ValueError("Minimum size of leapfrog steps (%d) must be larger than maximum size (%d)." % \ 36 | (step_size_min, step_size_max)) 37 | 38 | step_size = np.array([step_size_min, step_size_max]) 39 | ProposalBase.__init__(self, target, momentum.D, step_size, adaptation_schedule, acc_star) 40 | 41 | self.momentum = momentum 42 | self.num_steps_min = num_steps_min 43 | self.num_steps_max = num_steps_max 44 | 45 | 46 | def _proposal_trajectory(self, current, current_log_pdf): 47 | # sample momentum and leapfrog parameters 48 | p0 = self.momentum.sample() 49 | p0_log_pdf = self.momentum.log_pdf(p0) 50 | num_steps = np.random.randint(self.num_steps_min, self.num_steps_max + 1) 51 | step_size = np.random.rand() * (self.step_size[1] - self.step_size[0]) + self.step_size[0] 52 | 53 | logger.debug("Simulating Hamiltonian flow trajectory.") 54 | Qs, Ps = leapfrog(current, self.target.grad, p0, self.momentum.grad, step_size, num_steps) 55 | 56 | # compute acceptance probability, extracting log_pdf of q 57 | logger.debug("Computing acceptance probabilies.") 58 | acc_probs = np.zeros(len(Qs)) 59 | log_pdf_q = np.zeros(len(Qs)) 60 | 61 | for i in range(len(Qs)): 62 | p = Ps[i] 63 | q = Qs[i] 64 | p_log_pdf = self.momentum.log_pdf(p) 65 | acc_probs[i], log_pdf_q[i] = self.accept_prob_log_pdf(current, q, p0_log_pdf, p_log_pdf, current_log_pdf) 66 | 67 | return Qs, acc_probs, log_pdf_q 68 | 69 | def proposal(self, current, current_log_pdf): 70 | """ 71 | """ 72 | 73 | # sample momentum and leapfrog parameters 74 | p0 = self.momentum.sample() 75 | p0_log_pdf = self.momentum.log_pdf(p0) 76 | num_steps = np.random.randint(self.num_steps_min, self.num_steps_max + 1) 77 | step_size = np.random.rand() * (self.step_size[1] - self.step_size[0]) + self.step_size[0] 78 | 79 | logger.debug("Simulating Hamiltonian flow.") 80 | q, p = leapfrog_no_storing(current, self.target.grad, p0, self.momentum.grad, step_size, num_steps) 81 | 82 | # compute acceptance probability, extracting log_pdf of q 83 | logger.debug("Computing acceptance probability.") 84 | p_log_pdf = self.momentum.log_pdf(p) 85 | acc_prob, log_pdf_q = self.accept_prob_log_pdf(current, q, p0_log_pdf, p_log_pdf, current_log_pdf) 86 | 87 | return q, acc_prob, log_pdf_q 88 | 89 | @abstractmethod 90 | def accept_prob_log_pdf(self, current, q, p0_log_pdf, p_log_pdf, current_log_pdf=None): 91 | # potentially re-use log_pdf of last accepted state 92 | if current_log_pdf is None: 93 | current_log_pdf = self.target.log_pdf(current) 94 | 95 | log_pdf_q = self.target.log_pdf(q) 96 | H0 = -current_log_pdf - p0_log_pdf 97 | H = -log_pdf_q - p_log_pdf 98 | difference = -H + H0 99 | acc_prob = np.exp(np.minimum(0., difference)) 100 | 101 | return acc_prob, log_pdf_q 102 | -------------------------------------------------------------------------------- /kernel_hmc/proposals/kmc.py: -------------------------------------------------------------------------------- 1 | from kernel_hmc.proposals.base import standard_sqrt_schedule 2 | from kernel_hmc.proposals.hmc import HMCBase 3 | from kernel_hmc.tools.assertions import assert_implements_log_pdf_and_grad 4 | from kernel_hmc.tools.log import Log 5 | import numpy as np 6 | 7 | 8 | logger = Log.get_logger() 9 | 10 | class KMCStatic(HMCBase): 11 | """ 12 | """ 13 | 14 | def __init__(self, surrogate, target, momentum, num_steps_min=10, num_steps_max=100, step_size_min=0.05, 15 | step_size_max=0.3, adaptation_schedule=None, acc_star=0.7): 16 | """ 17 | """ 18 | HMCBase.__init__(self, target, momentum, num_steps_min, num_steps_max, step_size_min, step_size_max, 19 | adaptation_schedule, acc_star) 20 | 21 | assert_implements_log_pdf_and_grad(surrogate) 22 | 23 | self.surrogate = surrogate 24 | self.target = surrogate 25 | self.orig_target = target 26 | 27 | def accept_prob_log_pdf(self, current, q, p0_log_pdf, p_log_pdf, current_log_pdf): 28 | # same as super-class, but with original target 29 | kernel_target = self.target 30 | self.target = self.orig_target 31 | 32 | acc_prob, log_pdf_q = HMCBase.accept_prob_log_pdf(self, current, q, p0_log_pdf, p_log_pdf, current_log_pdf) 33 | 34 | # restore target 35 | self.target = kernel_target 36 | 37 | return acc_prob, log_pdf_q 38 | 39 | class KMC(KMCStatic): 40 | def __init__(self, surrogate, target, momentum, num_steps_min=10, num_steps_max=100, step_size_min=0.05, 41 | step_size_max=0.3, adaptation_schedule=standard_sqrt_schedule, acc_star=0.6): 42 | KMCStatic.__init__(self, surrogate, target, 43 | momentum, num_steps_min, num_steps_max, step_size_min, step_size_max, 44 | adaptation_schedule, acc_star) 45 | 46 | # can the surrogate be online updated? 47 | self.surrogate_has_update_fit = hasattr(surrogate, 'update_fit') 48 | 49 | def update(self, samples, acc_probs): 50 | self.t += 1 51 | 52 | z_new = samples[-1][np.newaxis, :] 53 | previous_accpept_prob = acc_probs[-1] 54 | 55 | if self.adaptation_schedule is not None: 56 | # generate updating weight 57 | lmbda = self.adaptation_schedule(self.t) 58 | 59 | if np.random.rand() <= lmbda: 60 | if self.surrogate_has_update_fit: 61 | logger.info("Updating surrogate (was probability lmbda=%.3f)" % lmbda) 62 | self.surrogate.update_fit(z_new) 63 | else: 64 | logger.info("Re-fitting surrogate (was probability lmbda=%.3f)" % lmbda) 65 | self.surrogate.fit(samples) 66 | 67 | if self.acc_star is not None: 68 | self._update_scaling(lmbda, previous_accpept_prob) 69 | -------------------------------------------------------------------------------- /kernel_hmc/proposals/metropolis.py: -------------------------------------------------------------------------------- 1 | from scipy.spatial.distance import cdist, squareform, pdist 2 | 3 | from kernel_hmc.densities.gaussian import sample_gaussian, log_gaussian_pdf 4 | from kernel_hmc.proposals.base import ProposalBase, standard_sqrt_schedule 5 | from kernel_hmc.tools.log import Log 6 | import numpy as np 7 | 8 | 9 | logger = Log.get_logger() 10 | 11 | # low rank update depends on "cholupdate" optional dependency 12 | try: 13 | from choldate._choldate import cholupdate 14 | cholupdate_available = True 15 | except ImportError: 16 | cholupdate_available = False 17 | logger.warning("Package cholupdate not available. Adaptive Metropolis falls back to (more expensive) re-estimation of covariance.") 18 | 19 | if cholupdate_available: 20 | def rank_one_update_mean_covariance_cholesky_lmbda(u, lmbda=.1, mean=None, cov_L=None, nu2=1., gamma2=None): 21 | """ 22 | Returns updated mean and Cholesky of sum of outer products following a 23 | (1-lmbda)*old + lmbda* step_size*uu^T+lmbda*gamm2*I 24 | rule 25 | 26 | Optional: If gamma2 is given, an isotropic term gamma2 * I is added to the uu^T part 27 | 28 | where old mean and cov_L=Cholesky(old) (lower Cholesky) are given. 29 | 30 | Performs efficient rank-one updates of the Cholesky directly. 31 | """ 32 | assert lmbda >= 0 and lmbda <= 1 33 | assert u.ndim == 1 34 | D = len(u) 35 | 36 | # check if first term 37 | if mean is None or cov_L is None : 38 | # in that case, zero mean and scaled identity matrix 39 | mean = np.zeros(D) 40 | cov_L = np.eye(D) * nu2 41 | else: 42 | assert len(mean) == D 43 | assert mean.ndim == 1 44 | assert cov_L.ndim == 2 45 | assert cov_L.shape[0] == D 46 | assert cov_L.shape[1] == D 47 | 48 | # update mean 49 | updated_mean = (1 - lmbda) * mean + lmbda * u 50 | 51 | # update Cholesky: first downscale existing Cholesky 52 | update_cov_L = np.sqrt(1 - lmbda) * cov_L.T 53 | 54 | # rank-one update of the centered new vector 55 | update_vec = np.sqrt(lmbda) * np.sqrt(nu2) * (u - mean) 56 | cholupdate(update_cov_L, update_vec) 57 | 58 | # optional: add isotropic term if specified, requires looping rank-one updates over 59 | # all basis vectors e_1, ..., e_D 60 | if gamma2 is not None: 61 | e_d = np.zeros(D) 62 | for d in range(D): 63 | e_d[:] = 0 64 | e_d[d] = np.sqrt(gamma2) 65 | 66 | # could do a Cholesky update, but this routine does a loop over dimensions 67 | # where the vector only has one non-zero component 68 | # That is O(D^2) and therefore not efficient when used in a loop 69 | cholupdate(update_cov_L, np.sqrt(lmbda) * e_d) 70 | 71 | # TODO: 72 | # in contrast, can do a simplified update when knowing that e_d is sparse 73 | # manual Cholesky update (only doing the d-th component of algorithm on 74 | # https://en.wikipedia.org/wiki/Cholesky_decomposition#Rank-one_update 75 | # # wiki (MB) code: 76 | # r = sqrt(L(k,k)^2 + x(k)^2); 77 | # c = r / L(k, k); 78 | # s = x(k) / L(k, k); 79 | # L(k, k) = r; 80 | # L(k+1:n,k) = (L(k+1:n,k) + s*x(k+1:n)) / c; 81 | # x(k+1:n) = c*x(k+1:n) - s*L(k+1:n,k); 82 | 83 | # since cholupdate works on transposed version 84 | update_cov_L = update_cov_L.T 85 | 86 | # done updating Cholesky 87 | 88 | return updated_mean, update_cov_L 89 | 90 | def rank_update_mean_covariance_cholesky_lmbda_naive(u, lmbda=.1, mean=None, cov_L=None, nu2=1., gamma2=None): 91 | """ 92 | Returns updated mean and Cholesky of sum of outer products following a 93 | (1-lmbda)*old + lmbda* step_size*uu^T 94 | rule 95 | 96 | Optional: If gamma2 is given, an isotropic term gamma2 * I is added to the uu^T part 97 | 98 | where old mean and cov_L=Cholesky(old) (lower Cholesky) are given. 99 | 100 | Naive version that re-computes the Cholesky factorisation 101 | """ 102 | assert lmbda >= 0 and lmbda <= 1 103 | assert u.ndim == 1 104 | D = len(u) 105 | 106 | # check if first term 107 | if mean is None or cov_L is None : 108 | # in that case, zero mean and scaled identity matrix 109 | mean = np.zeros(D) 110 | cov_L = np.eye(D) * nu2 111 | else: 112 | assert len(mean) == D 113 | assert mean.ndim == 1 114 | assert cov_L.ndim == 2 115 | assert cov_L.shape[0] == D 116 | assert cov_L.shape[1] == D 117 | 118 | # update mean 119 | updated_mean = (1 - lmbda) * mean + lmbda * u 120 | 121 | # centered new vector 122 | update_vec = u - mean 123 | 124 | # reconstruct covariance, update 125 | update_cov = np.dot(cov_L, cov_L.T) 126 | update_cov = (1 - lmbda)*update_cov + lmbda*nu2*np.outer(update_vec, update_vec) 127 | 128 | # optional: add isotropic term if specified 129 | if gamma2 is not None: 130 | update_cov += np.eye(update_cov.shape[0])*gamma2 131 | 132 | # re-compute Cholesky 133 | update_cov_L = np.linalg.cholesky(update_cov) 134 | 135 | return updated_mean, update_cov_L 136 | 137 | 138 | class AdaptiveMetropolis(ProposalBase): 139 | """ 140 | Implements the adaptive MH. 141 | 142 | If "cholupdate" package is available, 143 | performs efficient low-rank updates of Cholesky factor of covariance, 144 | costing O(d^2) computation. 145 | 146 | Otherwise, covariance is is simply updated every iteration and its Cholesky 147 | factorisation is re-computed every time, costing O(d^3) computation. 148 | """ 149 | 150 | def __init__(self, target, D, step_size=1., gamma2=0.1, 151 | adaptation_schedule=standard_sqrt_schedule, acc_star=0.234): 152 | ProposalBase.__init__(self, target, D, step_size, adaptation_schedule, acc_star) 153 | 154 | self.gamma2 = gamma2 155 | 156 | # initialise as scaled isotropic, otherwise Cholesky updates fail 157 | self.mu = np.zeros(self.D) 158 | self.L_C = np.eye(self.D) * np.sqrt(self.step_size) 159 | 160 | def set_batch_covariance(self, Z): 161 | self.mu = np.mean(Z, axis=0) 162 | self.L_C = np.linalg.cholesky(self.step_size*np.cov(Z.T)+np.eye(Z.shape[1])*self.gamma2) 163 | 164 | def update(self, samples, acc_probs): 165 | self.t += 1 166 | 167 | z_new = samples[-1] 168 | previous_accpept_prob = acc_probs[-1] 169 | if self.adaptation_schedule is not None: 170 | # generate updating weight 171 | lmbda = self.adaptation_schedule(self.t) 172 | 173 | logger.debug("Updating covariance using lmbda=%.3f" % lmbda) 174 | if cholupdate_available: 175 | # low-rank update of Cholesky, costs O(d^2) only, adding exploration noise on the fly 176 | logger.debug("O(d^2) Low rank update of Cholesky of covariance") 177 | self.mu, self.L_C = rank_one_update_mean_covariance_cholesky_lmbda(z_new, 178 | lmbda, 179 | self.mu, 180 | self.L_C, 181 | self.step_size, 182 | self.gamma2) 183 | else: 184 | # low-rank update of Cholesky, naive costs O(d^3), adding exploration noise on the fly 185 | logger.debug("O(d^3) Low rank update of Cholesky of covariance") 186 | self.mu, self.L_C = rank_update_mean_covariance_cholesky_lmbda_naive(z_new, 187 | lmbda, 188 | self.mu, 189 | self.L_C, 190 | self.step_size, 191 | self.gamma2) 192 | 193 | # update scalling parameter if wanted 194 | if self.acc_star is not None: 195 | self._update_scaling(lmbda, previous_accpept_prob) 196 | 197 | def proposal(self, current, current_log_pdf): 198 | """ 199 | Returns a sample from the proposal centred at current, acceptance probability, 200 | and its log-pdf under the target. 201 | """ 202 | if current_log_pdf is None: 203 | current_log_pdf = self.target.log_pdf(current) 204 | 205 | # generate proposal 206 | proposal = sample_gaussian(N=1, mu=current, Sigma=self.L_C, is_cholesky=True)[0] 207 | proposal_log_pdf = self.target.log_pdf(proposal) 208 | 209 | # compute acceptance prob, proposals probability cancels due to symmetry 210 | acc_log_prob = np.min([0, proposal_log_pdf - current_log_pdf]) 211 | 212 | # probability of proposing current when would be sitting at proposal is symmetric 213 | return proposal, np.exp(acc_log_prob), proposal_log_pdf 214 | 215 | 216 | class StandardMetropolis(AdaptiveMetropolis): 217 | """ 218 | Implements the adaptive MH with a isotropic proposal covariance. 219 | """ 220 | 221 | def __init__(self, target, D, step_size=1., 222 | adaptation_schedule=standard_sqrt_schedule, acc_star=0.234): 223 | AdaptiveMetropolis.__init__(self, target, D, step_size, 0.0, 224 | adaptation_schedule, acc_star) 225 | 226 | def proposal(self, current, current_log_pdf): 227 | """ 228 | Returns a sample from the proposal centred at current, acceptance probability, 229 | and its log-pdf under the target. 230 | """ 231 | if current_log_pdf is None: 232 | current_log_pdf = self.target.log_pdf(current) 233 | 234 | # generate proposal 235 | proposal = sample_gaussian(N=1, mu=current, Sigma=np.eye(self.D) * np.sqrt(self.step_size), is_cholesky=True)[0] 236 | proposal_log_pdf = self.target.log_pdf(proposal) 237 | 238 | # compute acceptance prob, proposals probability cancels due to symmetry 239 | acc_log_prob = np.min([0, proposal_log_pdf - current_log_pdf]) 240 | 241 | # probability of proposing current when would be sitting at proposal is symmetric 242 | return proposal, np.exp(acc_log_prob), proposal_log_pdf 243 | 244 | def gamma_median_heuristic(Z, num_subsample=1000): 245 | """ 246 | Computes the median pairwise distance in a random sub-sample of Z. 247 | Returns a \gamma for k(x,y)=\exp(-\gamma ||x-y||^2), according to the median heuristc, 248 | i.e. it corresponds to \sigma in k(x,y)=\exp(-0.5*||x-y||^2 / \sigma^2) where 249 | \sigma is the median distance. \gamma = 0.5/(\sigma^2) 250 | """ 251 | inds = np.random.permutation(len(Z))[:np.max([num_subsample, len(Z)])] 252 | dists = squareform(pdist(Z[inds], 'sqeuclidean')) 253 | median_dist = np.median(dists[dists > 0]) 254 | sigma = np.sqrt(0.5 * median_dist) 255 | gamma = 0.5 / (sigma ** 2) 256 | 257 | return gamma 258 | 259 | class KernelAdaptiveMetropolis(ProposalBase): 260 | def __init__(self, target, D, N, kernel_sigma=1., minimum_size_sigma_learning=100, 261 | step_size=1., gamma2=0.1, adaptation_schedule=standard_sqrt_schedule, acc_star=0.234): 262 | ProposalBase.__init__(self, target, D, step_size, adaptation_schedule, acc_star) 263 | 264 | self.kernel_sigma = kernel_sigma 265 | self.minimum_size_sigma_learning = minimum_size_sigma_learning 266 | self.N = N 267 | self.gamma2 = gamma2 268 | self.Z = np.zeros((0, D)) 269 | 270 | def set_batch_covariance(self, Z): 271 | self.Z = Z 272 | 273 | def update(self, samples, acc_probs): 274 | self.t += 1 275 | 276 | previous_accpept_prob = acc_probs[-1] 277 | if self.adaptation_schedule is not None: 278 | # generate updating probability 279 | lmbda = self.adaptation_schedule(self.t) 280 | 281 | if np.random.rand() < lmbda: 282 | # update sub-sample of chain history 283 | self.Z = samples[np.random.permutation(len(samples))[:self.N]] 284 | logger.info("Updated chain history sub-sample of size %d with probability lmbda=%.3f" % (self.N, lmbda)) 285 | 286 | if self.minimum_size_sigma_learning < len(self.Z): 287 | # re-compute median heuristic for kernel 288 | self.kernel_sigma = 1./gamma_median_heuristic(self.Z, self.N) 289 | logger.info("Re-computed kernel bandwith using median heuristic to sigma=%.3f" % self.kernel_sigma) 290 | 291 | # update scaling parameter if wanted 292 | if self.acc_star is not None: 293 | self._update_scaling(lmbda, previous_accpept_prob) 294 | 295 | def proposal(self, current, current_log_pdf): 296 | """ 297 | Returns a sample from the proposal centred at current, acceptance probability, 298 | and its log-pdf under the target. 299 | """ 300 | if current_log_pdf is None: 301 | current_log_pdf = self.target.log_pdf(current) 302 | 303 | L_R = self.construct_proposal_covariance_(current) 304 | proposal = sample_gaussian(N=1, mu=current, Sigma=L_R, is_cholesky=True)[0] 305 | proposal_log_prob = log_gaussian_pdf(proposal, current, L_R, is_cholesky=True) 306 | proposal_log_pdf = self.target.log_pdf(proposal) 307 | 308 | # probability of proposing y when would be sitting at proposal 309 | L_R_inv = self.construct_proposal_covariance_(proposal) 310 | proopsal_log_prob_inv = log_gaussian_pdf(current, proposal, L_R_inv, is_cholesky=True) 311 | 312 | log_acc_prob = proposal_log_pdf - current_log_pdf + proopsal_log_prob_inv - proposal_log_prob 313 | 314 | return proposal, np.exp(log_acc_prob), proposal_log_pdf 315 | 316 | def construct_proposal_covariance_(self, y): 317 | """ 318 | Helper method to compute Cholesky factor of the Gaussian Kameleon proposal centred at y. 319 | """ 320 | R = self.gamma2 * np.eye(self.D) 321 | 322 | if len(self.Z) > 0: 323 | # the code is parametrised in gamma=1./sigma 324 | kernel_gamma = 1./self.kernel_sigma 325 | # k(y,z) = exp(-gamma ||y-z||) 326 | # d/dy k(y,z) = k(y,z) * (-gamma * d/dy||y-z||^2) 327 | # = 2 * k(y,z) * (-gamma * ||y-z||^2) 328 | # = 2 * k(y,z) * (gamma * ||z-y||^2) 329 | 330 | # gaussian kernel gradient, same as in kameleon-mcmc package, but without python overhead 331 | sq_dists = cdist(y[np.newaxis, :], self.Z, 'sqeuclidean') 332 | k = np.exp(-kernel_gamma * sq_dists) 333 | neg_differences = self.Z - y 334 | G = 2 * kernel_gamma * (k.T * neg_differences) 335 | 336 | # Kameleon 337 | G *= 2 # = M 338 | # R = gamma^2 I + \eta^2 * M H M^T 339 | H = np.eye(len(self.Z)) - 1.0 / len(self.Z) 340 | R += self.step_size * G.T.dot(H.dot(G)) 341 | 342 | L_R = np.linalg.cholesky(R) 343 | 344 | return L_R 345 | 346 | -------------------------------------------------------------------------------- /kernel_hmc/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlnapf/kernel_hmc/8ab93ae0470cc5916d5349b40bae7f91075bc385/kernel_hmc/tools/__init__.py -------------------------------------------------------------------------------- /kernel_hmc/tools/assertions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def assert_array_shape(a, ndim=None, shape=None, dims={}): 4 | if not type(a) is np.ndarray: 5 | raise TypeError("Provided object type (%s) is not nunpy.array." % str(type(a))) 6 | 7 | if ndim is not None: 8 | if not a.ndim == ndim: 9 | raise ValueError("Provided array dimensions (%d) are not as expected (%d)." % (a.ndim, ndim)) 10 | 11 | if shape is not None: 12 | if not np.all(a.shape==shape): 13 | raise ValueError("Provided array size (%s) are not as expected (%s)." % (str(a.shape), shape)) 14 | 15 | for k,v in dims.items(): 16 | if not a.shape[k] == v: 17 | raise ValueError("Provided array's %d-th dimension's size (%d) is not as expected (%d)." % (k, a.shape[k], v)) 18 | 19 | def assert_positive_int(i): 20 | if not issubclass(type(i), np.int): 21 | raise TypeError("Provided argument (%s) must be npumpy.int." % str(type(i))) 22 | 23 | if not i>0: 24 | raise ValueError("Provided integer (%d) must be positive." % i) 25 | 26 | def assert_positive_float(f): 27 | if not issubclass(type(f), np.float): 28 | raise TypeError("Provided argument (%s) must be numpy.float." % str(type(f))) 29 | 30 | if not f>0: 31 | raise ValueError("Provided float (%f) must be positive." % f) 32 | 33 | def assert_implements_log_pdf_and_grad(density, assert_log_pdf=True, assert_grad=True): 34 | if assert_log_pdf: 35 | if not hasattr(density, 'log_pdf') or not callable(density.log_pdf): 36 | raise ValueError("Density object does not implement log_pdf method") 37 | 38 | if assert_grad: 39 | if not hasattr(density, 'grad') or not callable(density.grad): 40 | raise ValueError("Density object does not implement grad method") 41 | 42 | def assert_inout_log_pdf_and_grad(density, D, assert_log_pdf=True, assert_grad=True): 43 | x = np.random.randn(D) 44 | 45 | if assert_log_pdf: 46 | result = density.log_pdf(x) 47 | 48 | if not issubclass(type(result), np.float): 49 | raise ValueError("Density object's log_pdf does not return numpy.float64 but %s" % str(type(result))) 50 | 51 | if assert_grad: 52 | grad = density.grad(x) 53 | assert_array_shape(grad, ndim=1, shape=(D,)) -------------------------------------------------------------------------------- /kernel_hmc/tools/file.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | from kernel_hmc.tools.log import logger 3 | 4 | def sha1sum(fname, blocksize=65536): 5 | """ 6 | Computes sha1sum of the given file. Same as the unix command line hash. 7 | 8 | Returns: string with the hex-formatted sha1sum hash 9 | """ 10 | hasher = hashlib.sha1() 11 | with open(fname, 'rb') as afile: 12 | logger.debug("Hasing %s" % fname) 13 | buf = afile.read(blocksize) 14 | while len(buf) > 0: 15 | hasher.update(buf) 16 | buf = afile.read(blocksize) 17 | return hasher.hexdigest() -------------------------------------------------------------------------------- /kernel_hmc/tools/log.py: -------------------------------------------------------------------------------- 1 | 2 | import logging 3 | 4 | 5 | class Log(object): 6 | level_set = False 7 | 8 | @staticmethod 9 | def set_loglevel(loglevel): 10 | global logger 11 | Log.get_logger().setLevel(loglevel) 12 | Log.get_logger().info("Set loglevel to %d" % loglevel) 13 | logger = Log.get_logger() 14 | Log.level_set=True 15 | 16 | @staticmethod 17 | def get_logger(): 18 | return logging.getLogger("kernel_hmc") 19 | 20 | if not Log.level_set: 21 | level = logging.INFO 22 | logging.basicConfig(format='KERNEL_HMC: %(levelname)s: %(asctime)s: %(module)s.%(funcName)s(): %(message)s', 23 | level=level) 24 | Log.get_logger().info("Global logger initialised with loglevel %d" % level) 25 | Log.level_set = True 26 | 27 | logger = Log.get_logger() -------------------------------------------------------------------------------- /kernel_hmc/tools/math.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.linalg as nl 3 | import numpy.random as nrnd 4 | 5 | def log_sum_exp(X): 6 | """ 7 | Computes log sum_i exp(X_i). 8 | Useful if you want to solve log \int f(x)p(x) dx 9 | where you have samples from p(x) and can compute log f(x) 10 | """ 11 | # extract minimum 12 | X0 = X.min() 13 | X_without_X0 = np.delete(X, X.argmin()) 14 | 15 | return X0 + np.log(1 + np.sum(np.exp(X_without_X0 - X0))) 16 | 17 | def log_mean_exp(X): 18 | """ 19 | Computes log 1/n sum_i exp(X_i). 20 | Useful if you want to solve log \int f(x)p(x) dx 21 | where you have samples from p(x) and can compute log f(x) 22 | """ 23 | 24 | return log_sum_exp(X) - np.log(len(X)) 25 | 26 | def avg_prob_of_log_probs(X): 27 | """ 28 | Given a set of log-probabilities, this computes log-mean-exp of them. 29 | Careful checking is done to prevent buffer overflows 30 | Similar to calling (but overflow-safe): log_mean_exp(log_prob) 31 | """ 32 | 33 | # extract inf inds (no need to delete X0 from X here) 34 | X0 = X.min() 35 | inf_inds = np.isinf(np.exp(X - X0)) 36 | 37 | # remove these numbers 38 | X_without_inf = X[~inf_inds] 39 | 40 | # return exp-log-mean-exp on shortened array 41 | avg_prob_without_inf = np.exp(log_mean_exp(X_without_inf)) 42 | 43 | # re-normalise by the full length, which is equivalent to adding a zero probability observation 44 | renormaliser = float(len(X_without_inf)) / len(X) 45 | avg_prob_without_inf = avg_prob_without_inf * renormaliser 46 | 47 | return avg_prob_without_inf 48 | 49 | 50 | def qmult(b): 51 | """ 52 | QMULT Pre-multiply by random orthogonal matrix. 53 | QMULT(A) is Q*A where Q is a random real orthogonal matrix from 54 | the Haar distribution, of dimension the number of rows in A. 55 | Special case: if A is a scalar then QMULT(A) is the same as 56 | QMULT(EYE(A)). 57 | 58 | Called by RANDSVD. 59 | 60 | Reference: 61 | G.W. Stewart, The efficient generation of random 62 | orthogonal matrices with an application to condition estimators, 63 | SIAM J. Numer. Anal., 17 (1980), 403-409. 64 | """ 65 | try: 66 | n = b.shape[0] 67 | a = b.copy() 68 | except AttributeError: 69 | n = b 70 | a = np.eye(n) 71 | 72 | d = np.zeros(n) 73 | 74 | for k in range(n - 2, -1, -1): 75 | # Generate random Householder transformation. 76 | x = nrnd.randn(n - k) 77 | s = nl.norm(x) 78 | # Modification to make sign(0) == 1 79 | sgn = np.sign(x[0]) + float(x[0] == 0) 80 | s = sgn * s 81 | d[k] = -sgn 82 | x[0] = x[0] + s 83 | beta = s * x[0] 84 | 85 | # Apply the transformation to a 86 | y = np.dot(x, a[k:n, :]) 87 | a[k:n, :] = a[k:n, :] - np.outer(x, (y / beta)) 88 | 89 | # Tidy up signs. 90 | for i in range(n - 1): 91 | a[i, :] = d[i] * a[i, :] 92 | 93 | # Now randomly change the sign (Gaussian dist) 94 | a[n - 1, :] = a[n - 1, :] * np.sign(nrnd.randn()) 95 | 96 | return a -------------------------------------------------------------------------------- /kernel_hmc/tools/mcmc_convergence.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def autocorr(x): 5 | """ 6 | Computes the ( normalised) auto-correlation function of a 7 | one dimensional sequence of numbers. 8 | 9 | Utilises the numpy correlate function that is based on an efficient 10 | convolution implementation. 11 | 12 | Inputs: 13 | x - one dimensional numpy array 14 | 15 | Outputs: 16 | Vector of autocorrelation values for a lag from zero to max possible 17 | """ 18 | 19 | # normalise, compute norm 20 | xunbiased = x - np.mean(x) 21 | xnorm = np.sum(xunbiased ** 2) 22 | 23 | # convolve with itself 24 | acor = np.correlate(xunbiased, xunbiased, mode='same') 25 | 26 | # use only second half, normalise 27 | acor = acor[len(acor) / 2:] / xnorm 28 | 29 | return acor -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scipy>=0.15 2 | numpy>=1.6 3 | 4 | # bayesian optimisation via pybo and dependencies 5 | git+https://github.com/mwhoffman/ezplot.git#egg=ezplot 6 | git+https://github.com/mwhoffman/reggie.git#egg=reggie 7 | git+https://github.com/mwhoffman/pybo.git#egg=pybo 8 | 9 | # for cholesky updating 10 | git+https://github.com/jcrudy/choldate.git#egg=choldate 11 | 12 | # kernel_exp_family 13 | git+https://github.com/karlnapf/kernel_exp_family.git#egg=kernel_exp_family 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | 4 | setup(name='kernel_hmc', 5 | version='0.1', 6 | description='Code for NIPS 2015 Gradient-Free Hamiltonian Monte Carlo with Efficient Kernel Exponential Families', 7 | url='https://github.com/karlnapf/kernel_hmc', 8 | author='Heiko Strathmann', 9 | author_email='heiko.strathmann@gmail.com', 10 | license='BSD3', 11 | packages=find_packages('.', exclude=["*tests*", "*.develop"]), 12 | zip_safe=False) 13 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlnapf/kernel_hmc/8ab93ae0470cc5916d5349b40bae7f91075bc385/tests/__init__.py -------------------------------------------------------------------------------- /tests/densities/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlnapf/kernel_hmc/8ab93ae0470cc5916d5349b40bae7f91075bc385/tests/densities/__init__.py -------------------------------------------------------------------------------- /tests/densities/test_gaussian.py: -------------------------------------------------------------------------------- 1 | from numpy.ma.testutils import assert_close 2 | 3 | from kernel_hmc.densities.gaussian import IsotropicZeroMeanGaussian, \ 4 | log_gaussian_pdf 5 | import numpy as np 6 | 7 | 8 | def test_isotropic_zero_mean_equals_log_gaussian_pdf(): 9 | D = 2 10 | x = np.random.randn(D) 11 | g = IsotropicZeroMeanGaussian(sigma=np.sqrt(2)) 12 | log_pdf = log_gaussian_pdf(x, mu=np.zeros(D), Sigma=np.eye(D) * 2, is_cholesky=False, compute_grad=False) 13 | assert_close(log_pdf, g.log_pdf(x)) 14 | 15 | def test_isotropic_zero_mean_equals_log_gaussian_pdf_grad(): 16 | D = 2 17 | x = np.random.randn(D) 18 | g = IsotropicZeroMeanGaussian(sigma=np.sqrt(2)) 19 | log_pdf = log_gaussian_pdf(x, mu=np.zeros(D), Sigma=np.eye(D) * 2, is_cholesky=False, compute_grad=True) 20 | assert_close(log_pdf, g.grad(x)) 21 | 22 | -------------------------------------------------------------------------------- /tests/densities/test_posterior_gp_classification_ard.py: -------------------------------------------------------------------------------- 1 | # depends on shogun, which might not be available 2 | from nose import SkipTest 3 | 4 | import numpy as np 5 | 6 | 7 | try: 8 | from kernel_hmc.densities.posterior_gp_classification_ard import GlassPosterior 9 | glass_available = True 10 | except ImportError: 11 | glass_available = False 12 | 13 | 14 | 15 | def test_glass_posterior_setup_execute(): 16 | if not glass_available: 17 | raise SkipTest("Shogun not available") 18 | 19 | GlassPosterior().set_up() 20 | 21 | def test_glass_posterior_log_pdf_execute(): 22 | if not glass_available: 23 | raise SkipTest("Shogun not available") 24 | 25 | D = 9 26 | theta = np.random.randn(D) 27 | 28 | target = GlassPosterior() 29 | target.set_up() 30 | 31 | target.log_pdf(theta) 32 | -------------------------------------------------------------------------------- /tests/proposals/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlnapf/kernel_hmc/8ab93ae0470cc5916d5349b40bae7f91075bc385/tests/proposals/__init__.py -------------------------------------------------------------------------------- /tests/proposals/test_adaptive_metropolis.py: -------------------------------------------------------------------------------- 1 | from nose import SkipTest 2 | from numpy.testing.utils import assert_allclose 3 | 4 | from kernel_hmc.proposals.metropolis import rank_update_mean_covariance_cholesky_lmbda_naive 5 | import numpy as np 6 | 7 | 8 | # low rank update depends on "cholupdate" optional dependency 9 | try: 10 | from kernel_hmc.proposals.metropolis import rank_one_update_mean_covariance_cholesky_lmbda 11 | low_rank_update_available = True 12 | except ImportError: 13 | low_rank_update_available = False 14 | 15 | 16 | def test_rank_update_mean_covariance_cholesky_lmbda_cholupdate_equals_naive(): 17 | if not low_rank_update_available: 18 | raise SkipTest("cholupdate not available") 19 | 20 | D = 3 21 | N = 100 22 | X = np.random.randn(N, D) 23 | 24 | mean = np.mean(X, 0) 25 | Sigma = np.cov(X.T) 26 | L = np.linalg.cholesky(Sigma) 27 | assert_allclose(np.dot(L, L.T), Sigma) 28 | 29 | # update with one more vector 30 | u = np.random.randn(D) 31 | lmbda = 0.1 32 | nu2 = 0.2 33 | gamma2 = 0.3 34 | 35 | m_test_naive, L_test_naive = rank_update_mean_covariance_cholesky_lmbda_naive(u, lmbda, mean, L, nu2=nu2, gamma2=gamma2) 36 | m_test, L_test = rank_update_mean_covariance_cholesky_lmbda_naive(u, lmbda, mean, L, nu2=nu2, gamma2=gamma2) 37 | 38 | assert_allclose(m_test_naive, m_test) 39 | assert_allclose(L_test_naive, L_test) 40 | 41 | def test_rank_update_mean_covariance_cholesky_lmbda_naive(): 42 | D = 3 43 | N = 100 44 | X = np.random.randn(N, D) 45 | 46 | mean = np.mean(X, 0) 47 | Sigma = np.cov(X.T) 48 | L = np.linalg.cholesky(Sigma) 49 | assert_allclose(np.dot(L, L.T), Sigma) 50 | 51 | # update with one more vector 52 | u = np.random.randn(D) 53 | lmbda = 0.1 54 | nu2 = 0.2 55 | gamma2 = 0.3 56 | 57 | updated_mean = (1 - lmbda) * mean + lmbda * u 58 | updated_Sigma = (1 - lmbda) * Sigma + lmbda * nu2 * np.outer(u - mean, u - mean) + np.eye(D) * gamma2 59 | updated_L = np.linalg.cholesky(updated_Sigma) 60 | 61 | m_test, L_test = rank_update_mean_covariance_cholesky_lmbda_naive(u, lmbda, mean, L, nu2=nu2, gamma2=gamma2) 62 | assert_allclose(updated_mean, m_test) 63 | assert_allclose(updated_L, L_test) 64 | 65 | def test_rank_one_update_mean_covariance_cholesky_lmbda(): 66 | if not low_rank_update_available: 67 | raise SkipTest("cholupdate not available") 68 | 69 | D = 3 70 | N = 100 71 | X = np.random.randn(N, D) 72 | 73 | mean = np.mean(X, 0) 74 | Sigma = np.cov(X.T) 75 | L = np.linalg.cholesky(Sigma) 76 | assert_allclose(np.dot(L, L.T), Sigma) 77 | 78 | # update with one more vector 79 | u = np.random.randn(D) 80 | lmbda = 0.1 81 | 82 | updated_mean = (1 - lmbda) * mean + lmbda * u 83 | updated_Sigma = (1 - lmbda) * Sigma + lmbda * np.outer(u - mean, u - mean) 84 | updated_L = np.linalg.cholesky(updated_Sigma) 85 | 86 | m_test, L_test = rank_one_update_mean_covariance_cholesky_lmbda(u, lmbda, mean, L) 87 | assert_allclose(updated_mean, m_test) 88 | assert_allclose(updated_L, L_test) 89 | 90 | def test_rank_one_update_mean_covariance_cholesky_lmbda_gamma2(): 91 | if not low_rank_update_available: 92 | raise SkipTest("cholupdate not available") 93 | 94 | D = 3 95 | N = 100 96 | X = np.random.randn(N, D) 97 | gamma2 = 2. 98 | 99 | mean = np.mean(X, 0) 100 | Sigma = np.cov(X.T) 101 | L = np.linalg.cholesky(Sigma) 102 | assert_allclose(np.dot(L, L.T), Sigma) 103 | 104 | # update with one more vector 105 | u = np.random.randn(D) 106 | lmbda = 0.1 107 | 108 | updated_mean = (1 - lmbda) * mean + lmbda * u 109 | updated_Sigma = (1 - lmbda) * Sigma + lmbda * np.outer(u - mean, u - mean) + lmbda * gamma2 * np.eye(D) 110 | updated_L = np.linalg.cholesky(updated_Sigma) 111 | 112 | m_test, L_test = rank_one_update_mean_covariance_cholesky_lmbda(u, lmbda, mean, L, gamma2=gamma2) 113 | assert_allclose(updated_mean, m_test) 114 | assert_allclose(updated_L, L_test) 115 | 116 | def test_rank_one_update_mean_covariance_cholesky_lmbda_nu2(): 117 | if not low_rank_update_available: 118 | raise SkipTest("cholupdate not available") 119 | 120 | D = 3 121 | N = 100 122 | X = np.random.randn(N, D) 123 | nu2 = 2. 124 | 125 | mean = np.mean(X, 0) 126 | Sigma = np.cov(X.T) 127 | L = np.linalg.cholesky(Sigma) 128 | assert_allclose(np.dot(L, L.T), Sigma) 129 | 130 | # update with one more vector 131 | u = np.random.randn(D) 132 | lmbda = 0.1 133 | 134 | updated_mean = (1 - lmbda) * mean + lmbda * u 135 | updated_Sigma = (1 - lmbda) * Sigma + lmbda * nu2 * np.outer(u - mean, u - mean) 136 | updated_L = np.linalg.cholesky(updated_Sigma) 137 | 138 | m_test, L_test = rank_one_update_mean_covariance_cholesky_lmbda(u, lmbda, mean, L, nu2=2.) 139 | assert_allclose(updated_mean, m_test) 140 | assert_allclose(updated_L, L_test) 141 | -------------------------------------------------------------------------------- /tests/proposals/test_base.py: -------------------------------------------------------------------------------- 1 | 2 | from nose.tools import assert_raises 3 | 4 | from kernel_exp_family.estimators.lite.gaussian import KernelExpLiteGaussian 5 | from kernel_hmc.densities.gaussian import IsotropicZeroMeanGaussian 6 | from kernel_hmc.proposals.hmc import HMCBase 7 | from kernel_hmc.proposals.kmc import KMCStatic 8 | from kernel_hmc.tools.assertions import assert_array_shape 9 | import numpy as np 10 | from tests.proposals.test_kmc import get_static_surrogate 11 | 12 | 13 | def get_hmc_kernel(): 14 | num_steps_min, num_steps_max, step_size_min, step_size_max = get_hmc_parameters() 15 | target, momentum = get_target_momentum() 16 | hmc = HMCBase(target, momentum, num_steps_min, num_steps_max, step_size_min, step_size_max) 17 | return hmc 18 | 19 | def get_kmc_static_kernel(): 20 | num_steps_min, num_steps_max, step_size_min, step_size_max = get_hmc_parameters() 21 | target, momentum = get_target_momentum() 22 | 23 | N = 200 24 | X = np.random.randn(N, momentum.D) 25 | est = KernelExpLiteGaussian(sigma=1, lmbda=.1, D=momentum.D, N=N) 26 | est.fit(X) 27 | 28 | surrogate = get_static_surrogate(momentum.D) 29 | kmc = KMCStatic(surrogate, target, momentum, num_steps_min, num_steps_max, step_size_min, step_size_max) 30 | 31 | return kmc 32 | 33 | def get_all_kernels(): 34 | 35 | 36 | proposals = [ 37 | get_hmc_kernel(), 38 | get_kmc_static_kernel(), 39 | ] 40 | 41 | return proposals 42 | 43 | def get_target_momentum(): 44 | D = 2 45 | target = IsotropicZeroMeanGaussian(D=D) 46 | momentum = IsotropicZeroMeanGaussian(D=D) 47 | 48 | return target, momentum 49 | 50 | def get_hmc_parameters(): 51 | num_steps_min = 1 52 | num_steps_max = 10 53 | step_size_min = 0.01 54 | step_size_max = 0.1 55 | 56 | return num_steps_min, num_steps_max, step_size_min, step_size_max 57 | 58 | def test_hmc_base_init_execute(): 59 | num_steps_min, num_steps_max, step_size_min, step_size_max = get_hmc_parameters() 60 | target, momentum = get_target_momentum() 61 | HMCBase(target, momentum, num_steps_min, num_steps_max, step_size_min, step_size_max) 62 | 63 | def test_hmc_base_init_wrong_target_type(): 64 | num_steps_min, num_steps_max, step_size_min, step_size_max = get_hmc_parameters() 65 | _, momentum = get_target_momentum() 66 | target = None 67 | assert_raises(ValueError, HMCBase, target, momentum, num_steps_min, num_steps_max, step_size_min, step_size_max) 68 | 69 | def test_hmc_base_init_wrong_target_logpdf(): 70 | num_steps_min, num_steps_max, step_size_min, step_size_max = get_hmc_parameters() 71 | target, momentum = get_target_momentum() 72 | target.log_pdf = None 73 | assert_raises(ValueError, HMCBase, target, momentum, num_steps_min, num_steps_max, step_size_min, step_size_max) 74 | 75 | def test_hmc_base_init_wrong_target_grad(): 76 | num_steps_min, num_steps_max, step_size_min, step_size_max = get_hmc_parameters() 77 | target, momentum = get_target_momentum() 78 | target.grad = None 79 | assert_raises(ValueError, HMCBase, target, momentum, num_steps_min, num_steps_max, step_size_min, step_size_max) 80 | 81 | def test_hmc_base_init_wrong_momentum_type(): 82 | num_steps_min, num_steps_max, step_size_min, step_size_max = get_hmc_parameters() 83 | target, _ = get_target_momentum() 84 | momentum = None 85 | assert_raises(TypeError, HMCBase, target, momentum, num_steps_min, num_steps_max, step_size_min, step_size_max) 86 | 87 | def test_hmc_base_init_wrong_momentum_log_pdf(): 88 | num_steps_min, num_steps_max, step_size_min, step_size_max = get_hmc_parameters() 89 | target, momentum = get_target_momentum() 90 | momentum.log_pdf = None 91 | assert_raises(ValueError, HMCBase, target, momentum, num_steps_min, num_steps_max, step_size_min, step_size_max) 92 | 93 | def test_hmc_base_init_wrong_momentum_grad(): 94 | num_steps_min, num_steps_max, step_size_min, step_size_max = get_hmc_parameters() 95 | target, momentum = get_target_momentum() 96 | momentum.grad = None 97 | assert_raises(ValueError, HMCBase, target, momentum, num_steps_min, num_steps_max, step_size_min, step_size_max) 98 | 99 | 100 | def test_all_proposal_execute_result(): 101 | for kernel in get_all_kernels(): 102 | current = np.zeros(kernel.momentum.D) 103 | current_log_pdf = 0. 104 | q, acc_prob, log_pdf_q = kernel.proposal(current, current_log_pdf) 105 | 106 | assert_array_shape(q, ndim=1, shape=(kernel.momentum.D,)) 107 | 108 | assert type(log_pdf_q) is np.float64 109 | assert type(acc_prob) is np.float64 110 | 111 | assert acc_prob >= 0 and acc_prob <= 1. 112 | 113 | def test_all_accept_prob_log_pdf_execute_result(): 114 | for kernel in get_all_kernels(): 115 | 116 | current = np.zeros(kernel.momentum.D) 117 | q = current.copy() 118 | current_log_pdf = 0. 119 | p0_log_pdf = 0. 120 | p_log_pdf = 0. 121 | 122 | acc_prob, log_pdf_q = kernel.accept_prob_log_pdf(current, q, p0_log_pdf, p_log_pdf, current_log_pdf) 123 | 124 | assert type(log_pdf_q) is np.float64 125 | assert type(acc_prob) is np.float64 126 | 127 | assert acc_prob >= 0 and acc_prob <= 1. 128 | -------------------------------------------------------------------------------- /tests/proposals/test_kmc.py: -------------------------------------------------------------------------------- 1 | from kernel_exp_family.estimators.lite.gaussian import KernelExpLiteGaussian 2 | from kernel_hmc.densities.gaussian import IsotropicZeroMeanGaussian 3 | from kernel_hmc.proposals.kmc import KMCStatic 4 | import numpy as np 5 | 6 | 7 | def get_target_momentum(): 8 | D = 2 9 | target = IsotropicZeroMeanGaussian(D=D) 10 | momentum = IsotropicZeroMeanGaussian(D=D) 11 | 12 | return target, momentum 13 | 14 | def get_hmc_parameters(): 15 | num_steps_min = 1 16 | num_steps_max = 10 17 | step_size_min = 0.01 18 | step_size_max = 0.1 19 | 20 | return num_steps_min, num_steps_max, step_size_min, step_size_max 21 | 22 | def get_static_surrogate(D): 23 | N = 200 24 | X = np.random.randn(N, D) 25 | est = KernelExpLiteGaussian(sigma=1, lmbda=.1, D=D, N=N) 26 | est.fit(X) 27 | 28 | return est 29 | 30 | def test_kmc_base_init_execute(): 31 | num_steps_min, num_steps_max, step_size_min, step_size_max = get_hmc_parameters() 32 | target, momentum = get_target_momentum() 33 | surrogate = get_static_surrogate(momentum.D) 34 | KMCStatic(surrogate, target, momentum, num_steps_min, num_steps_max, step_size_min, step_size_max) 35 | 36 | -------------------------------------------------------------------------------- /tests/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karlnapf/kernel_hmc/8ab93ae0470cc5916d5349b40bae7f91075bc385/tests/tools/__init__.py -------------------------------------------------------------------------------- /tests/tools/test_math.py: -------------------------------------------------------------------------------- 1 | from nose.tools import assert_equal 2 | from numpy.testing.utils import assert_allclose 3 | 4 | from kernel_hmc.tools.math import log_sum_exp, log_mean_exp, \ 5 | avg_prob_of_log_probs 6 | import numpy as np 7 | 8 | 9 | def test_log_sum_exp(): 10 | X = np.abs(np.random.randn(100)) 11 | direct = np.log(np.sum(np.exp(X))) 12 | indirect = log_sum_exp(X) 13 | assert_allclose(direct, indirect) 14 | 15 | def test_log_mean_exp(): 16 | X = np.abs(np.random.randn(100)) 17 | direct = np.log(np.mean(np.exp(X))) 18 | indirect = log_mean_exp(X) 19 | assert_allclose(direct, indirect) 20 | 21 | def test_log_mean_exp_equals_avg_prob_of_log_probs(): 22 | X = np.abs(np.random.randn(100)) 23 | direct_exp_log_mean_exp = np.exp(log_mean_exp(X)) 24 | safe = avg_prob_of_log_probs(X) 25 | assert_allclose(direct_exp_log_mean_exp, safe) 26 | 27 | def test_log_mean_exp_fail(): 28 | X = np.abs(np.random.randn(100)) 29 | X[0] = -3000 30 | direct_exp_log_mean_exp = np.exp(log_mean_exp(X)) 31 | assert_equal(direct_exp_log_mean_exp, np.inf) 32 | 33 | def test_log_mean_exp_fail_avg_prob_of_log_probs_succ(): 34 | X = np.abs(np.random.randn(100)) 35 | X[0] = 3000 36 | safe = avg_prob_of_log_probs(X) 37 | 38 | temp = X.copy() 39 | temp[0] = 0 40 | temp = np.exp(temp) 41 | temp[0] = 0 42 | manual_safe = np.mean(temp) 43 | 44 | assert_allclose(safe, manual_safe) 45 | --------------------------------------------------------------------------------