├── .gitignore
├── LICENSE
├── README.md
├── demo.py
├── pmslice.py
├── setup.py
└── simple_slice.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Iain Murray
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Pseudo-Marginal Slice Sampling
 2 | 
 3 | `pmslice.py` is a python module for running
 4 | [pseudo-marginal slice sampling](http://homepages.inf.ed.ac.uk/imurray2/pub/16pmss/)
 5 | It is documented in the module itself.
 6 | 
 7 | Pseudo-Marginal slice sampling takes a Markov chain Monte Carlo (MCMC)
 8 | method that evaluates the log of an unnormalized probability function, and
 9 | turns it into a method that only needs the log of an unbiased estimator of
10 | the function.
11 | 
12 | This small Python module makes it easy to drop pseudo-marginal slice
13 | sampling into whatever (Python) MCMC code you already have. In our
14 | demonstration `demo.py` a pseudo-marginal slice-sampling scheme only
15 | needs three extra lines of code compared to its conventional MCMC version.
16 | None of these lines required deriving anything problem-specific.
17 | 
18 | `simple_slice.py` is a helper used in the demonstration. The only file you
19 | need to drop into your MCMC codebase (and all that setup.py installs) is
20 | the module `pmslice.py`.
21 | 
22 | Please refer to [the paper](http://homepages.inf.ed.ac.uk/imurray2/pub/16pmss/)
23 | for more details, and links to other implementations.
24 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | ### First a demo of conventional MCMC for comparison:
 2 | ################################################################################
 3 | 
 4 | from __future__ import print_function
 5 | 
 6 | import numpy as np
 7 | from simple_slice import slice_sweep
 8 | 
 9 | def log_f(theta):
10 |     """log of unnormalized target probability density function"""
11 |     log_var = theta[0]
12 |     var = np.exp(log_var)
13 |     xx = theta[1:]
14 |     return -0.5*log_var**2 - (0.5/var)*np.dot(xx, xx) - 0.5*xx.size*log_var
15 | 
16 | print('Running conventional MCMC...')
17 | D = 5 # dimensionality of demo
18 | theta = np.random.randn(D) # initial condition
19 | Lp = log_f(theta)
20 | S = int(1e4)
21 | samples = np.zeros((S, D))
22 | for ss in range(S):
23 |     if not ((ss+1) % 10):
24 |         print('Iteration %d / %d' % (ss+1, S), end='\r')
25 |     theta, Lp = slice_sweep(theta, log_f, Lp=Lp) # theta updated here
26 |     samples[ss,:] = theta
27 | print('Iteration %d / %d' % (ss+1, S))
28 | 
29 | 
30 | ### Now a pseudo-marginal version:
31 | ################################################################################
32 | 
33 | import pmslice
34 | 
35 | # In the pseudo-marginal setup we only have an unbiased random estimate of the
36 | # probability. As a demo, here is some function where exp(log_fhat(theta)) is an
37 | # unbiased estimate of exp(log_f(theta)) above. The pmslice module needs the
38 | # estimator to expose its sources of randomness as keyword arguments.
39 | def log_fhat(theta, rand=np.random.rand, randn=np.random.randn):
40 |     K = np.ceil(10*rand())
41 |     return log_f(theta) + np.sum(randn(K)) - 0.5*K
42 | 
43 | # Then most of the code is the same as before. Three new lines are marked below.
44 | # There are no tuning parameters, and the only problem-specific detail is
45 | # knowing which random number generators (rand and/or randn) need replacing.
46 | 
47 | print('Running PM-Slice MCMC...')
48 | D = 5 # dimensionality of demo
49 | theta = np.random.randn(D) # initial condition
50 | # NEW: the next two lines set up pseudo-marginal slice sampling:
51 | pm_rand = {'rand':pmslice.RandClass(), 'randn':pmslice.RandnClass()}
52 | log_clamped_fn = pmslice.clamp_rand(log_fhat, pm_rand) # used instead of log_f
53 | Lp = log_clamped_fn(theta)
54 | S = int(1e4)
55 | pm_samples = np.zeros((S, D))
56 | for ss in range(S):
57 |     if not ((ss+1) % 10):
58 |         print('Iteration %d / %d' % (ss+1, S), end='\r')
59 |     theta, Lp = slice_sweep(theta, log_clamped_fn, Lp=Lp) # theta updated here
60 |     Lp = pmslice.update_rand(log_fhat, pm_rand, Lp, theta) # NEW: update pm_rand
61 |     pm_samples[ss,:] = theta
62 | print('Iteration %d / %d' % (ss+1, S))
63 | 
64 | # To sanity check:
65 | # Both samples[:,0] and pm_samples[:,0] should marginally come from N(0,1).
66 | 
67 | # You probably noticed that the pseudo-marginal demo is a lot slower than the
68 | # conventional MCMC on! That's mainly because in this toy demo log_fhat is more
69 | # expensive than the true function log_f. In real applications, computing the
70 | # true function is expensive and the whole point of using an estimator is that
71 | # it's cheaper. If update_rand were the bottle-neck it could be run every 10
72 | # iterations instead of after every update.
73 | 
74 | 


--------------------------------------------------------------------------------
/pmslice.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A demo implementation of pseudo-marginal slice-sampling
  3 | 
  4 | Pseudo-marginal Markov chain Monte Carlo (MCMC) methods sample from a
  5 | distribution given only an unbiased estimator of the target probability
  6 | density function. This module allows you to perform pseudo-marginal slice
  7 | sampling, with very little modification to the code you would write if you
  8 | could evaluate the target probability function exactly. The main work for a
  9 | user is to expose the random number generators used by their estimator as
 10 | keyword arguments, so they can be replaced.
 11 | 
 12 | In the pseudo-marginal MCMC setup, let "fhat" be the unbiased estimator of
 13 | a distribution over variables theta. It is assumed that the log of this
 14 | estimator can be evaluated with a user-provided function of the form:
 15 | 
 16 |     log_fhat(theta, rand=np.random.rand, randn=np.random.randn)
 17 | 
 18 | That is, the user writes code using standard random number generators,
 19 | rand() and/or randn(), such that exp(log_fhat(...)) is an unbiased estimate
 20 | of the probability of theta. All random number generators used are exposed as
 21 | keyword arguments so they can be replaced.
 22 | 
 23 | Pseudo-marginal slice sampling replaces the random number generators with
 24 | objects that form part of the Markov chain. These objects are put in a
 25 | dictionary, using the names from the estimator's keyword arguments:
 26 | 
 27 |     pm_rand = {'rand':pmslice.RandClass(), 'randn': pmslice.RandnClass()}
 28 | 
 29 | The objects can be updated in the Markov chain by calling:
 30 | 
 31 |     pmslice.update_rand(log_fhat, pm_rand, Lp=None, theta)
 32 | 
 33 | In between these updates, you use your conventional MCMC code to update your
 34 | variables of interest. That code should be given a function
 35 | 
 36 |     log_fhat_clamped = pmslice.clamp_rand(log_fhat, pm_rand)
 37 | 
 38 | in place of a function that could provide the true log of the unnormalized
 39 | target distribution. Call pmslice.update_rand again after each update, or
 40 | each several updates, of your Markov chain.
 41 | 
 42 | See the separate demo.py for a full example.
 43 | 
 44 | For a more detailed explanation of the method, see the paper:
 45 | 
 46 |     Pseudo-Marginal Slice Sampling,
 47 |     Iain Murray and Matthew M. Graham,
 48 |     JMLR: W&CP, 51:911-919, 2016.
 49 |     http://homepages.inf.ed.ac.uk/imurray2/pub/16pmss/
 50 | 
 51 | Being generic code in pure python, this module carries some time overhead.
 52 | In some applications, more computations could be cached. When function
 53 | evaluations are cheap, python-level book-keeping can dominate and rewriting
 54 | in another language would be appropriate. This code was written after the
 55 | paper was published. The code that was used to produce the original results
 56 | is available separately.
 57 | 
 58 | 
 59 | More advanced usage:
 60 | 
 61 | The signature of the estimator can be arbitrary:
 62 |     log_fhat(*args, randX=..., randY=..., randZ=..., **kwargs)
 63 | As long as all random number generators used in the code are exposed as
 64 | keyword arguments. If you're using generators other than Uniform[0,1] and
 65 | N(0,1), you'll need to either rewrite your code to use these primitives, or
 66 | extend this module appropriately.
 67 | 
 68 | If you wish to update certain blocks of random number draws separately in
 69 | the Markov chain, that is easy to do. Some of the draws could call randX()
 70 | and others randY(). Then make the signature of the log-estimator:
 71 | 
 72 |     log_fhat(theta, randX=np.random.rand, randY=np.random.rand, ...)
 73 | 
 74 | Then pmslice.update_rand can update a dictionary with multiple objects:
 75 | 
 76 |     pm_rand = {'randX':pmslice.RandClass(), 'randY':pmslice.RandClass(), ...}
 77 | 
 78 | You can use an arbitrary number of generators, and they can be of multiple
 79 | types (RandClass/RandnClass or any other pmslice-compatible type you create).
 80 | 
 81 | What if you wanted to run Hamiltonian Monte Carlo (HMC), or some other
 82 | method that uses gradients, on the main variables theta? That's fine. When
 83 | calling pmslice.update_rand, pass a log_fhat function that only returns a
 84 | single scalar, a log-unnormalized-probability estimate. Then when calling
 85 | pmslice.clamp_rand, pass a different log_fhat function that also returns
 86 | gradients, and use the resulting clamped function in HMC.
 87 | 
 88 | What if you want to do HMC, or some other MCMC method, jointly on the
 89 | random number draws and theta? That's an interesting idea, but outside the
 90 | scope of this module.
 91 | """
 92 | 
 93 | # Iain Murray, June 2016.
 94 | # http://iainmurray.net/
 95 | 
 96 | 
 97 | import numpy as _np
 98 | 
 99 | def _restart_rands(rand_dict):
100 |     for kk in rand_dict:
101 |         rand_dict[kk].pos = 0
102 | 
103 | def update_rand(log_fhat, rand_dict, Lp=None, *args, **kwargs):
104 |     """Update the auxiliary objects rand_dict in a pseudo-marginal Markov chain.
105 | 
106 |     Lp should be the log-unnormalized probability of the joint auxiliary state,
107 |         Lp = clamp_rand(log_fhat, rand_dict)(*args, **kwargs)
108 |     Often just:
109 |         Lp = clamp_rand(log_fhat, rand_dict)(theta)
110 |     Lp has usually been computed in the previous Markov chain update.
111 |     However, you can leave Lp=None, and it will be recomputed for you.
112 | 
113 |     The log-unnormalized-probability of the final auxiliary state is returned,
114 |     so it can be provided to the function for the next MCMC update.
115 |     """
116 |     if Lp is None:
117 |         _restart_rands(rand_dict)
118 |         Lp = log_fhat(*args, **dict(kwargs, **rand_dict))
119 |     for kk in rand_dict:
120 |         rr = rand_dict[kk]
121 |         Lp_threshold = Lp + _np.log(_np.random.rand())
122 |         while True:
123 |             rr.slice_propose()
124 |             _restart_rands(rand_dict)
125 |             Lp_prop = log_fhat(*args, **dict(kwargs, **rand_dict))
126 |             if Lp_prop >= Lp_threshold:
127 |                 break
128 |             rr.slice_shrink()
129 |         rr.accept()
130 |         Lp = Lp_prop
131 |     return Lp
132 | 
133 | def clamp_rand(log_fhat, rand_dict):
134 |     """Return a deterministic function from a random estimator for MCMC updates
135 | 
136 |     This function helps run a pseudo-marginal Markov chain on variables theta
137 |     with a log-unbiased-estimator function with signature:
138 | 
139 |         log_fhat(theta, rand=np.random.rand, randn=np.random.randn)
140 | 
141 |     or more generally:
142 | 
143 |         log_fhat(*args, randX=..., randY=..., randZ=..., **kwargs)
144 | 
145 |     where all of the random number generators used by the estimator have been
146 |     exposed as keyword arguments.
147 | 
148 |     The pseudo-marginal Markov chain will use a dictionary of special objects
149 |     that replace the random number generators. For the first example:
150 |         
151 |         pm_rand = {'rand':pmslice.RandClass(), 'randn':pmslice.RandnClass()}
152 | 
153 |     This function then creates a version of the estimator that will use the same
154 |     random number draws between updates to pmslice.update_rand.
155 | 
156 |         log_fhat_clamped = pmslice.clamp_rand(log_fhat, pm_rand)
157 | 
158 |     This clamped estimator can be used in any conventional Markov chain code
159 |     (expecting a log-unnormalized-probability) to update the variables of
160 |     interest theta. The clamped function takes the same arguments as log_fhat,
161 |     except the random number generators should not be specified.
162 |     """
163 |     def clamped_log_fhat(*args, **kwargs):
164 |         _restart_rands(rand_dict)
165 |         return log_fhat(*args, **dict(kwargs, **rand_dict))
166 |     return clamped_log_fhat
167 | 
168 | class RandClass(object):
169 |     """
170 |     Objects of this class are used in pseudo-marginal slice sampling to replace
171 |     the np.random.rand() function in the code for an unbiased estimator. See the
172 |     documentation for the rest of the pmslice module.
173 |     """
174 |     # u_prop is an array of values to be emitted, which is maintained to be the
175 |     # same length as uu, previous values, and nu_prop, Gaussian values used in
176 |     # proposal mechanism.
177 |     _rand = _np.random.rand
178 |     def __init__(self):
179 |         self.u_prop = _np.zeros(0)
180 |         self.pos = 0; # the number emitted so far / position next u_prop
181 |         self.accept()
182 |     def accept(self):
183 |         # Copy current proposal to "old values" uu, and set step to zero, so
184 |         # same values will still be emitted.
185 |         self.uu = self.u_prop[:self.pos]
186 |         self.u_prop = self.uu.copy()
187 |         self.step = 0
188 |         # Set up new search direction and bracket
189 |         self.nu = _np.random.randn(self.pos)
190 |         self.mx = _np.random.rand()
191 |         self.mn = self.mx - 1.0
192 |         self.pos = 0
193 |     def _combine(self, uu, nu):
194 |         """Returns reflected_around_inside_hypercube(uu + nu*self.step)"""
195 |         target = _np.abs(uu + nu*self.step)
196 |         ipart = _np.floor(target)
197 |         fpart = target - ipart
198 |         is_odd = (ipart % 2) > 0.9
199 |         fpart[is_odd] = 1 - fpart[is_odd]
200 |         return fpart
201 |     def slice_shrink(self):
202 |         if self.step > 0:
203 |             self.mx = self.step
204 |         else:
205 |             self.mn = self.step
206 |         # In intended uses, algorithms only collapse to point if there's a bug:
207 |         assert(self.mx != self.mn)
208 |     def slice_propose(self):
209 |         self.step = self.mn + (self.mx - self.mn)*_np.random.rand()
210 |         self.u_prop = self._combine(self.uu, self.nu)
211 |         self.pos = 0
212 |     def __call__(self, *args):
213 |         args = _np.array(args, dtype='int64')
214 |         num_needed = _np.prod(args)
215 |         uu_left = self.uu.size - self.pos
216 |         if num_needed > uu_left:
217 |             # Double resevoir, or at least enough to cater current request:
218 |             num_extend = max(num_needed, self.uu.size)
219 |             new_uu = self._rand(num_extend)
220 |             new_nu = _np.random.randn(num_extend)
221 |             new_u_prop = self._combine(new_uu, new_nu)
222 |             self.uu = _np.hstack((self.uu, new_uu))
223 |             self.nu = _np.hstack((self.nu, new_nu))
224 |             self.u_prop = _np.hstack((self.u_prop, new_u_prop))
225 |         # The copy is to prevent user changing random numbers for future calls
226 |         ans = self.u_prop[self.pos:self.pos+num_needed].reshape(args).copy()
227 |         self.pos += num_needed
228 |         return ans
229 | 
230 | class RandnClass(RandClass):
231 |     """
232 |     Objects of this class are used in pseudo-marginal slice sampling to replace
233 |     the np.random.randn() function in the code for an unbiased estimator. See
234 |     the documentation for the rest of the pmslice module.
235 |     """
236 |     _rand = _np.random.randn
237 |     def _combine(self, uu, nu):
238 |         # Initial interval in RandClass is of width 1, so here get initial
239 |         # interval of width 2\pi, the whole ellipse of combinations.
240 |         beta = 2*_np.pi*self.step
241 |         return uu*_np.cos(beta) + nu*_np.sin(beta)
242 | 
243 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from distutils.core import setup
 4 | 
 5 | if __name__ == '__main__':
 6 |     setup(name='pmslice',
 7 |           description='Pseudo-Marginal Slice Sampling',
 8 |           author='Iain Murray',
 9 |           url='https://github.com/imurray/pmslice-python/',
10 |           py_modules=['pmslice'])
11 | 
12 | 


--------------------------------------------------------------------------------
/simple_slice.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import numpy as np
 4 | 
 5 | def slice_sweep(xx, logdist, widths=1.0, step_out=True, Lp=None):
 6 |     """simple axis-aligned implementation of slice sampling for vectors
 7 | 
 8 |          xx_next = slice_sample(xx, logdist)
 9 |          samples = slice_sample(xx, logdist, N=200, burn=20)
10 | 
11 |      Inputs:
12 |                 xx  D,  initial state (or array with D elements)
13 |            logdist  fn  function: log of unnormalized probability of xx
14 |             widths  D,  or 1x1, step sizes for slice sampling (default 1.0)
15 |           step_out bool set to True (default) if widths sometimes far too small
16 |                 Lp  1,  Optional: logdist(xx) if have already evaluated it
17 | 
18 |      Outputs:
19 |                 xx  D,  final state (same shape as at start)
20 |      If Lp was provided as an input, then return tuple with second element:
21 |                 Lp  1,  final log-prob, logdist(xx)
22 |     """
23 |     # Iain Murray 2004, 2009, 2010, 2013, 2016
24 |     # Algorithm orginally by Radford Neal, e.g., Annals of Statistic (2003)
25 |     # See also pseudo-code in David MacKay's text book p375
26 | 
27 |     # startup stuff
28 |     D = xx.size
29 |     widths = np.array(widths)
30 |     if widths.size == 1:
31 |         widths = np.tile(widths, D)
32 |     output_Lp = Lp is not None
33 |     if Lp is None:
34 |         log_Px = logdist(xx)
35 |     else:
36 |         log_Px = Lp
37 |     perm = np.array(range(D))
38 | 
39 |     # Force xx into vector for ease of use:
40 |     xx_shape = xx.shape
41 |     logdist_vec = lambda x: logdist(np.reshape(x, xx_shape))
42 |     xx = xx.ravel().copy()
43 |     x_l = xx.copy()
44 |     x_r = xx.copy()
45 |     xprime = xx.copy()
46 | 
47 |     # Random scan through axes
48 |     np.random.shuffle(perm)
49 |     for dd in perm:
50 |         log_uprime = log_Px + np.log(np.random.rand())
51 |         # Create a horizontal interval (x_l, x_r) enclosing xx
52 |         rr = np.random.rand()
53 |         x_l[dd] = xx[dd] - rr*widths[dd]
54 |         x_r[dd] = xx[dd] + (1-rr)*widths[dd]
55 |         if step_out:
56 |             # Typo in early book editions: said compare to u, should be u'
57 |             while logdist_vec(x_l) > log_uprime:
58 |                 x_l[dd] = x_l[dd] - widths[dd]
59 |             while logdist_vec(x_r) > log_uprime:
60 |                 x_r[dd] = x_r[dd] + widths[dd]
61 | 
62 |         # Inner loop:
63 |         # Propose xprimes and shrink interval until good one found
64 |         while True:
65 |             xprime[dd] = np.random.rand()*(x_r[dd] - x_l[dd]) + x_l[dd]
66 |             log_Px = logdist_vec(xprime)
67 |             if log_Px > log_uprime:
68 |                 break # this is the only way to leave the while loop
69 |             else:
70 |                 # Shrink in
71 |                 if xprime[dd] > xx[dd]:
72 |                     x_r[dd] = xprime[dd]
73 |                 elif xprime[dd] < xx[dd]:
74 |                     x_l[dd] = xprime[dd]
75 |                 else:
76 |                     raise Exception('BUG DETECTED: Shrunk to current '
77 |                         + 'position and still not acceptable.')
78 |         xx[dd] = xprime[dd]
79 |         x_l[dd] = xprime[dd]
80 |         x_r[dd] = xprime[dd]
81 | 
82 |     if output_Lp:
83 |         return xx, log_Px
84 |     else:
85 |         return xx
86 | 
87 | 


--------------------------------------------------------------------------------