├── .gitignore ├── LICENSE ├── README.md ├── demo.py ├── pmslice.py ├── setup.py └── simple_slice.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Iain Murray 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pseudo-Marginal Slice Sampling 2 | 3 | `pmslice.py` is a python module for running 4 | [pseudo-marginal slice sampling](http://homepages.inf.ed.ac.uk/imurray2/pub/16pmss/) 5 | It is documented in the module itself. 6 | 7 | Pseudo-Marginal slice sampling takes a Markov chain Monte Carlo (MCMC) 8 | method that evaluates the log of an unnormalized probability function, and 9 | turns it into a method that only needs the log of an unbiased estimator of 10 | the function. 11 | 12 | This small Python module makes it easy to drop pseudo-marginal slice 13 | sampling into whatever (Python) MCMC code you already have. In our 14 | demonstration `demo.py` a pseudo-marginal slice-sampling scheme only 15 | needs three extra lines of code compared to its conventional MCMC version. 16 | None of these lines required deriving anything problem-specific. 17 | 18 | `simple_slice.py` is a helper used in the demonstration. The only file you 19 | need to drop into your MCMC codebase (and all that setup.py installs) is 20 | the module `pmslice.py`. 21 | 22 | Please refer to [the paper](http://homepages.inf.ed.ac.uk/imurray2/pub/16pmss/) 23 | for more details, and links to other implementations. 24 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | ### First a demo of conventional MCMC for comparison: 2 | ################################################################################ 3 | 4 | from __future__ import print_function 5 | 6 | import numpy as np 7 | from simple_slice import slice_sweep 8 | 9 | def log_f(theta): 10 | """log of unnormalized target probability density function""" 11 | log_var = theta[0] 12 | var = np.exp(log_var) 13 | xx = theta[1:] 14 | return -0.5*log_var**2 - (0.5/var)*np.dot(xx, xx) - 0.5*xx.size*log_var 15 | 16 | print('Running conventional MCMC...') 17 | D = 5 # dimensionality of demo 18 | theta = np.random.randn(D) # initial condition 19 | Lp = log_f(theta) 20 | S = int(1e4) 21 | samples = np.zeros((S, D)) 22 | for ss in range(S): 23 | if not ((ss+1) % 10): 24 | print('Iteration %d / %d' % (ss+1, S), end='\r') 25 | theta, Lp = slice_sweep(theta, log_f, Lp=Lp) # theta updated here 26 | samples[ss,:] = theta 27 | print('Iteration %d / %d' % (ss+1, S)) 28 | 29 | 30 | ### Now a pseudo-marginal version: 31 | ################################################################################ 32 | 33 | import pmslice 34 | 35 | # In the pseudo-marginal setup we only have an unbiased random estimate of the 36 | # probability. As a demo, here is some function where exp(log_fhat(theta)) is an 37 | # unbiased estimate of exp(log_f(theta)) above. The pmslice module needs the 38 | # estimator to expose its sources of randomness as keyword arguments. 39 | def log_fhat(theta, rand=np.random.rand, randn=np.random.randn): 40 | K = np.ceil(10*rand()) 41 | return log_f(theta) + np.sum(randn(K)) - 0.5*K 42 | 43 | # Then most of the code is the same as before. Three new lines are marked below. 44 | # There are no tuning parameters, and the only problem-specific detail is 45 | # knowing which random number generators (rand and/or randn) need replacing. 46 | 47 | print('Running PM-Slice MCMC...') 48 | D = 5 # dimensionality of demo 49 | theta = np.random.randn(D) # initial condition 50 | # NEW: the next two lines set up pseudo-marginal slice sampling: 51 | pm_rand = {'rand':pmslice.RandClass(), 'randn':pmslice.RandnClass()} 52 | log_clamped_fn = pmslice.clamp_rand(log_fhat, pm_rand) # used instead of log_f 53 | Lp = log_clamped_fn(theta) 54 | S = int(1e4) 55 | pm_samples = np.zeros((S, D)) 56 | for ss in range(S): 57 | if not ((ss+1) % 10): 58 | print('Iteration %d / %d' % (ss+1, S), end='\r') 59 | theta, Lp = slice_sweep(theta, log_clamped_fn, Lp=Lp) # theta updated here 60 | Lp = pmslice.update_rand(log_fhat, pm_rand, Lp, theta) # NEW: update pm_rand 61 | pm_samples[ss,:] = theta 62 | print('Iteration %d / %d' % (ss+1, S)) 63 | 64 | # To sanity check: 65 | # Both samples[:,0] and pm_samples[:,0] should marginally come from N(0,1). 66 | 67 | # You probably noticed that the pseudo-marginal demo is a lot slower than the 68 | # conventional MCMC on! That's mainly because in this toy demo log_fhat is more 69 | # expensive than the true function log_f. In real applications, computing the 70 | # true function is expensive and the whole point of using an estimator is that 71 | # it's cheaper. If update_rand were the bottle-neck it could be run every 10 72 | # iterations instead of after every update. 73 | 74 | -------------------------------------------------------------------------------- /pmslice.py: -------------------------------------------------------------------------------- 1 | """ 2 | A demo implementation of pseudo-marginal slice-sampling 3 | 4 | Pseudo-marginal Markov chain Monte Carlo (MCMC) methods sample from a 5 | distribution given only an unbiased estimator of the target probability 6 | density function. This module allows you to perform pseudo-marginal slice 7 | sampling, with very little modification to the code you would write if you 8 | could evaluate the target probability function exactly. The main work for a 9 | user is to expose the random number generators used by their estimator as 10 | keyword arguments, so they can be replaced. 11 | 12 | In the pseudo-marginal MCMC setup, let "fhat" be the unbiased estimator of 13 | a distribution over variables theta. It is assumed that the log of this 14 | estimator can be evaluated with a user-provided function of the form: 15 | 16 | log_fhat(theta, rand=np.random.rand, randn=np.random.randn) 17 | 18 | That is, the user writes code using standard random number generators, 19 | rand() and/or randn(), such that exp(log_fhat(...)) is an unbiased estimate 20 | of the probability of theta. All random number generators used are exposed as 21 | keyword arguments so they can be replaced. 22 | 23 | Pseudo-marginal slice sampling replaces the random number generators with 24 | objects that form part of the Markov chain. These objects are put in a 25 | dictionary, using the names from the estimator's keyword arguments: 26 | 27 | pm_rand = {'rand':pmslice.RandClass(), 'randn': pmslice.RandnClass()} 28 | 29 | The objects can be updated in the Markov chain by calling: 30 | 31 | pmslice.update_rand(log_fhat, pm_rand, Lp=None, theta) 32 | 33 | In between these updates, you use your conventional MCMC code to update your 34 | variables of interest. That code should be given a function 35 | 36 | log_fhat_clamped = pmslice.clamp_rand(log_fhat, pm_rand) 37 | 38 | in place of a function that could provide the true log of the unnormalized 39 | target distribution. Call pmslice.update_rand again after each update, or 40 | each several updates, of your Markov chain. 41 | 42 | See the separate demo.py for a full example. 43 | 44 | For a more detailed explanation of the method, see the paper: 45 | 46 | Pseudo-Marginal Slice Sampling, 47 | Iain Murray and Matthew M. Graham, 48 | JMLR: W&CP, 51:911-919, 2016. 49 | http://homepages.inf.ed.ac.uk/imurray2/pub/16pmss/ 50 | 51 | Being generic code in pure python, this module carries some time overhead. 52 | In some applications, more computations could be cached. When function 53 | evaluations are cheap, python-level book-keeping can dominate and rewriting 54 | in another language would be appropriate. This code was written after the 55 | paper was published. The code that was used to produce the original results 56 | is available separately. 57 | 58 | 59 | More advanced usage: 60 | 61 | The signature of the estimator can be arbitrary: 62 | log_fhat(*args, randX=..., randY=..., randZ=..., **kwargs) 63 | As long as all random number generators used in the code are exposed as 64 | keyword arguments. If you're using generators other than Uniform[0,1] and 65 | N(0,1), you'll need to either rewrite your code to use these primitives, or 66 | extend this module appropriately. 67 | 68 | If you wish to update certain blocks of random number draws separately in 69 | the Markov chain, that is easy to do. Some of the draws could call randX() 70 | and others randY(). Then make the signature of the log-estimator: 71 | 72 | log_fhat(theta, randX=np.random.rand, randY=np.random.rand, ...) 73 | 74 | Then pmslice.update_rand can update a dictionary with multiple objects: 75 | 76 | pm_rand = {'randX':pmslice.RandClass(), 'randY':pmslice.RandClass(), ...} 77 | 78 | You can use an arbitrary number of generators, and they can be of multiple 79 | types (RandClass/RandnClass or any other pmslice-compatible type you create). 80 | 81 | What if you wanted to run Hamiltonian Monte Carlo (HMC), or some other 82 | method that uses gradients, on the main variables theta? That's fine. When 83 | calling pmslice.update_rand, pass a log_fhat function that only returns a 84 | single scalar, a log-unnormalized-probability estimate. Then when calling 85 | pmslice.clamp_rand, pass a different log_fhat function that also returns 86 | gradients, and use the resulting clamped function in HMC. 87 | 88 | What if you want to do HMC, or some other MCMC method, jointly on the 89 | random number draws and theta? That's an interesting idea, but outside the 90 | scope of this module. 91 | """ 92 | 93 | # Iain Murray, June 2016. 94 | # http://iainmurray.net/ 95 | 96 | 97 | import numpy as _np 98 | 99 | def _restart_rands(rand_dict): 100 | for kk in rand_dict: 101 | rand_dict[kk].pos = 0 102 | 103 | def update_rand(log_fhat, rand_dict, Lp=None, *args, **kwargs): 104 | """Update the auxiliary objects rand_dict in a pseudo-marginal Markov chain. 105 | 106 | Lp should be the log-unnormalized probability of the joint auxiliary state, 107 | Lp = clamp_rand(log_fhat, rand_dict)(*args, **kwargs) 108 | Often just: 109 | Lp = clamp_rand(log_fhat, rand_dict)(theta) 110 | Lp has usually been computed in the previous Markov chain update. 111 | However, you can leave Lp=None, and it will be recomputed for you. 112 | 113 | The log-unnormalized-probability of the final auxiliary state is returned, 114 | so it can be provided to the function for the next MCMC update. 115 | """ 116 | if Lp is None: 117 | _restart_rands(rand_dict) 118 | Lp = log_fhat(*args, **dict(kwargs, **rand_dict)) 119 | for kk in rand_dict: 120 | rr = rand_dict[kk] 121 | Lp_threshold = Lp + _np.log(_np.random.rand()) 122 | while True: 123 | rr.slice_propose() 124 | _restart_rands(rand_dict) 125 | Lp_prop = log_fhat(*args, **dict(kwargs, **rand_dict)) 126 | if Lp_prop >= Lp_threshold: 127 | break 128 | rr.slice_shrink() 129 | rr.accept() 130 | Lp = Lp_prop 131 | return Lp 132 | 133 | def clamp_rand(log_fhat, rand_dict): 134 | """Return a deterministic function from a random estimator for MCMC updates 135 | 136 | This function helps run a pseudo-marginal Markov chain on variables theta 137 | with a log-unbiased-estimator function with signature: 138 | 139 | log_fhat(theta, rand=np.random.rand, randn=np.random.randn) 140 | 141 | or more generally: 142 | 143 | log_fhat(*args, randX=..., randY=..., randZ=..., **kwargs) 144 | 145 | where all of the random number generators used by the estimator have been 146 | exposed as keyword arguments. 147 | 148 | The pseudo-marginal Markov chain will use a dictionary of special objects 149 | that replace the random number generators. For the first example: 150 | 151 | pm_rand = {'rand':pmslice.RandClass(), 'randn':pmslice.RandnClass()} 152 | 153 | This function then creates a version of the estimator that will use the same 154 | random number draws between updates to pmslice.update_rand. 155 | 156 | log_fhat_clamped = pmslice.clamp_rand(log_fhat, pm_rand) 157 | 158 | This clamped estimator can be used in any conventional Markov chain code 159 | (expecting a log-unnormalized-probability) to update the variables of 160 | interest theta. The clamped function takes the same arguments as log_fhat, 161 | except the random number generators should not be specified. 162 | """ 163 | def clamped_log_fhat(*args, **kwargs): 164 | _restart_rands(rand_dict) 165 | return log_fhat(*args, **dict(kwargs, **rand_dict)) 166 | return clamped_log_fhat 167 | 168 | class RandClass(object): 169 | """ 170 | Objects of this class are used in pseudo-marginal slice sampling to replace 171 | the np.random.rand() function in the code for an unbiased estimator. See the 172 | documentation for the rest of the pmslice module. 173 | """ 174 | # u_prop is an array of values to be emitted, which is maintained to be the 175 | # same length as uu, previous values, and nu_prop, Gaussian values used in 176 | # proposal mechanism. 177 | _rand = _np.random.rand 178 | def __init__(self): 179 | self.u_prop = _np.zeros(0) 180 | self.pos = 0; # the number emitted so far / position next u_prop 181 | self.accept() 182 | def accept(self): 183 | # Copy current proposal to "old values" uu, and set step to zero, so 184 | # same values will still be emitted. 185 | self.uu = self.u_prop[:self.pos] 186 | self.u_prop = self.uu.copy() 187 | self.step = 0 188 | # Set up new search direction and bracket 189 | self.nu = _np.random.randn(self.pos) 190 | self.mx = _np.random.rand() 191 | self.mn = self.mx - 1.0 192 | self.pos = 0 193 | def _combine(self, uu, nu): 194 | """Returns reflected_around_inside_hypercube(uu + nu*self.step)""" 195 | target = _np.abs(uu + nu*self.step) 196 | ipart = _np.floor(target) 197 | fpart = target - ipart 198 | is_odd = (ipart % 2) > 0.9 199 | fpart[is_odd] = 1 - fpart[is_odd] 200 | return fpart 201 | def slice_shrink(self): 202 | if self.step > 0: 203 | self.mx = self.step 204 | else: 205 | self.mn = self.step 206 | # In intended uses, algorithms only collapse to point if there's a bug: 207 | assert(self.mx != self.mn) 208 | def slice_propose(self): 209 | self.step = self.mn + (self.mx - self.mn)*_np.random.rand() 210 | self.u_prop = self._combine(self.uu, self.nu) 211 | self.pos = 0 212 | def __call__(self, *args): 213 | args = _np.array(args, dtype='int64') 214 | num_needed = _np.prod(args) 215 | uu_left = self.uu.size - self.pos 216 | if num_needed > uu_left: 217 | # Double resevoir, or at least enough to cater current request: 218 | num_extend = max(num_needed, self.uu.size) 219 | new_uu = self._rand(num_extend) 220 | new_nu = _np.random.randn(num_extend) 221 | new_u_prop = self._combine(new_uu, new_nu) 222 | self.uu = _np.hstack((self.uu, new_uu)) 223 | self.nu = _np.hstack((self.nu, new_nu)) 224 | self.u_prop = _np.hstack((self.u_prop, new_u_prop)) 225 | # The copy is to prevent user changing random numbers for future calls 226 | ans = self.u_prop[self.pos:self.pos+num_needed].reshape(args).copy() 227 | self.pos += num_needed 228 | return ans 229 | 230 | class RandnClass(RandClass): 231 | """ 232 | Objects of this class are used in pseudo-marginal slice sampling to replace 233 | the np.random.randn() function in the code for an unbiased estimator. See 234 | the documentation for the rest of the pmslice module. 235 | """ 236 | _rand = _np.random.randn 237 | def _combine(self, uu, nu): 238 | # Initial interval in RandClass is of width 1, so here get initial 239 | # interval of width 2\pi, the whole ellipse of combinations. 240 | beta = 2*_np.pi*self.step 241 | return uu*_np.cos(beta) + nu*_np.sin(beta) 242 | 243 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from distutils.core import setup 4 | 5 | if __name__ == '__main__': 6 | setup(name='pmslice', 7 | description='Pseudo-Marginal Slice Sampling', 8 | author='Iain Murray', 9 | url='https://github.com/imurray/pmslice-python/', 10 | py_modules=['pmslice']) 11 | 12 | -------------------------------------------------------------------------------- /simple_slice.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | 5 | def slice_sweep(xx, logdist, widths=1.0, step_out=True, Lp=None): 6 | """simple axis-aligned implementation of slice sampling for vectors 7 | 8 | xx_next = slice_sample(xx, logdist) 9 | samples = slice_sample(xx, logdist, N=200, burn=20) 10 | 11 | Inputs: 12 | xx D, initial state (or array with D elements) 13 | logdist fn function: log of unnormalized probability of xx 14 | widths D, or 1x1, step sizes for slice sampling (default 1.0) 15 | step_out bool set to True (default) if widths sometimes far too small 16 | Lp 1, Optional: logdist(xx) if have already evaluated it 17 | 18 | Outputs: 19 | xx D, final state (same shape as at start) 20 | If Lp was provided as an input, then return tuple with second element: 21 | Lp 1, final log-prob, logdist(xx) 22 | """ 23 | # Iain Murray 2004, 2009, 2010, 2013, 2016 24 | # Algorithm orginally by Radford Neal, e.g., Annals of Statistic (2003) 25 | # See also pseudo-code in David MacKay's text book p375 26 | 27 | # startup stuff 28 | D = xx.size 29 | widths = np.array(widths) 30 | if widths.size == 1: 31 | widths = np.tile(widths, D) 32 | output_Lp = Lp is not None 33 | if Lp is None: 34 | log_Px = logdist(xx) 35 | else: 36 | log_Px = Lp 37 | perm = np.array(range(D)) 38 | 39 | # Force xx into vector for ease of use: 40 | xx_shape = xx.shape 41 | logdist_vec = lambda x: logdist(np.reshape(x, xx_shape)) 42 | xx = xx.ravel().copy() 43 | x_l = xx.copy() 44 | x_r = xx.copy() 45 | xprime = xx.copy() 46 | 47 | # Random scan through axes 48 | np.random.shuffle(perm) 49 | for dd in perm: 50 | log_uprime = log_Px + np.log(np.random.rand()) 51 | # Create a horizontal interval (x_l, x_r) enclosing xx 52 | rr = np.random.rand() 53 | x_l[dd] = xx[dd] - rr*widths[dd] 54 | x_r[dd] = xx[dd] + (1-rr)*widths[dd] 55 | if step_out: 56 | # Typo in early book editions: said compare to u, should be u' 57 | while logdist_vec(x_l) > log_uprime: 58 | x_l[dd] = x_l[dd] - widths[dd] 59 | while logdist_vec(x_r) > log_uprime: 60 | x_r[dd] = x_r[dd] + widths[dd] 61 | 62 | # Inner loop: 63 | # Propose xprimes and shrink interval until good one found 64 | while True: 65 | xprime[dd] = np.random.rand()*(x_r[dd] - x_l[dd]) + x_l[dd] 66 | log_Px = logdist_vec(xprime) 67 | if log_Px > log_uprime: 68 | break # this is the only way to leave the while loop 69 | else: 70 | # Shrink in 71 | if xprime[dd] > xx[dd]: 72 | x_r[dd] = xprime[dd] 73 | elif xprime[dd] < xx[dd]: 74 | x_l[dd] = xprime[dd] 75 | else: 76 | raise Exception('BUG DETECTED: Shrunk to current ' 77 | + 'position and still not acceptable.') 78 | xx[dd] = xprime[dd] 79 | x_l[dd] = xprime[dd] 80 | x_r[dd] = xprime[dd] 81 | 82 | if output_Lp: 83 | return xx, log_Px 84 | else: 85 | return xx 86 | 87 | --------------------------------------------------------------------------------