├── .gitignore
├── README.md
├── probabilistic
    ├── __init__.py
    ├── control.py
    ├── erp.py
    ├── inference.py
    ├── memoize.py
    ├── test.py
    └── trace.py
└── sandbox.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | 
21 | # Installer logs
22 | pip-log.txt
23 | 
24 | # Unit test / coverage reports
25 | .coverage
26 | .tox
27 | nosetests.xml
28 | 
29 | # Translations
30 | *.mo
31 | 
32 | # Mr Developer
33 | .mr.developer.cfg
34 | .project
35 | .pydevproject
36 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | probabilistic-python
2 | ====================
3 | 
4 | Turning Python into a probabilistic programming language


--------------------------------------------------------------------------------
/probabilistic/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | Random variable generators
 4 | """
 5 | from erp import flip, gaussian, gamma, beta, binomial, poisson, dirichlet, multinomial, uniform, multinomialDraw, uniformDraw
 6 | 
 7 | 
 8 | """
 9 | Hard and soft constraints
10 | """
11 | from trace import condition, factor
12 | def softEq(a, b, tolerance):
13 | 	return erp.gaussian_logprob(a-b, 0, tolerance)
14 | 
15 | 
16 | """
17 | Inference procedures
18 | """
19 | from inference import mean, distrib, expectation, MAP, rejectionSample, traceMH, LARJMH
20 | 
21 | 
22 | """
23 | Control structures
24 | """
25 | from control import ntimes, foreach, until, repeat
26 | 
27 | 
28 | """
29 | Stochastic memoization
30 | """
31 | from memoize import mem


--------------------------------------------------------------------------------
/probabilistic/control.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def ntimes(times, block):
 3 | 	"""
 4 | 	Repeat a computation n times
 5 | 	"""
 6 | 	for i in xrange(times):
 7 | 		block(i)
 8 | 
 9 | def foreach(iterable, block):
10 | 	"""
11 | 	'for' loop control structure suitable for use inside probabilistic programs.
12 | 	Invokes block for every element in iterable.
13 | 	"""
14 | 	for elem in iterable:
15 | 		block(elem)
16 | 
17 | def until(condition, block):
18 | 	"""
19 | 	'while' loop control structure suitable for use inside probabilistic programs.
20 | 	Invokes block until condition is true.
21 | 	"""
22 | 	cond = condition()
23 | 	while not cond:
24 | 		block()
25 | 		cond = condition()
26 | 
27 | def repeat(times, proc):
28 | 	"""
29 | 	Evaluate proc() 'times' times and build a list out of the results
30 | 	"""
31 | 	return map(lambda x: proc(), range(times))


--------------------------------------------------------------------------------
/probabilistic/erp.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import trace
  3 | import math
  4 | import copy
  5 | 
  6 | """
  7 | A bunch of sampling/pdf code adapted from jschurch:
  8 | https://github.com/stuhlmueller/jschurch
  9 | """
 10 | 
 11 | class RandomPrimitive:
 12 | 	"""
 13 | 	Abstract base class for all ERPs
 14 | 	"""
 15 | 
 16 | 	def _sample_impl(self, params):
 17 | 		pass
 18 | 
 19 | 	def _sample(self, params, isStructural, conditionedValue=None):
 20 | 		# Assumes _sample is called from __call__ in
 21 | 		# conrete subclasses
 22 | 		return trace.lookupVariableValue(self, params, isStructural, 2, conditionedValue)
 23 | 
 24 | 	def _logprob(self, val, params):
 25 | 		pass
 26 | 
 27 | 	def _proposal(self, currval, params):
 28 | 		"""
 29 | 		Subclasses can override to do more efficient proposals
 30 | 		"""
 31 | 		return self._sample_impl(params)
 32 | 
 33 | 	def _logProposalProb(self, currval, propval, params):
 34 | 		"""
 35 | 		Subclasses can override to do more efficient proposals
 36 | 		"""
 37 | 		return self._logprob(propval, params)
 38 | 
 39 | 
 40 | class FlipRandomPrimitive(RandomPrimitive):
 41 | 	"""
 42 | 	ERP with Bernoulli distribution
 43 | 	"""
 44 | 
 45 | 	def __init__(self):
 46 | 		pass
 47 | 
 48 | 	def __call__(self, p=0.5, isStructural=False, conditionedValue=None):
 49 | 		return self._sample([p], isStructural, conditionedValue)
 50 | 
 51 | 	def _sample_impl(self, params):
 52 | 		p = params[0]
 53 | 		randval = random.random()
 54 | 		return randval < p
 55 | 
 56 | 	def _logprob(self, val, params):
 57 | 		p = params[0]
 58 | 		val = bool(val)
 59 | 		prob = (p if val else 1.0-p)
 60 | 		return math.log(prob)
 61 | 
 62 | 	def _proposal(self, currval, params):
 63 | 		return not(currval)
 64 | 
 65 | 	def _logProposalProb(self, currval, propval, params):
 66 | 		return 0.0 		# There's only one way to flip a binary variable
 67 | 
 68 | 
 69 | def gaussian_logprob(x, mu, sigma):
 70 | 	return -.5*(1.8378770664093453 + 2*math.log(sigma) + (x - mu)*(x - mu)/(sigma*sigma))
 71 | 
 72 | def gaussian_logprob_sigmaSq(x, mu, sigmaSq):
 73 | 	return -.5*(1.8378770664093453 + math.log(sigmaSq) + (x - mu)*(x - mu)/sigmaSq)
 74 | 
 75 | class GaussianRandomPrimitive(RandomPrimitive):
 76 | 	"""
 77 | 	ERP with Gaussian distribution
 78 | 	"""
 79 | 
 80 | 	def __init__(self):
 81 | 		pass
 82 | 
 83 | 	def __call__(self, mu, sigma, isStructural=False, conditionedValue=None):
 84 | 		return self._sample([mu,sigma], isStructural, conditionedValue)
 85 | 
 86 | 	def _sample_impl(self, params):
 87 | 		return random.gauss(params[0], params[1])
 88 | 
 89 | 	def _logprob(self, val, params):
 90 | 		return gaussian_logprob(val, params[0], params[1])
 91 | 
 92 | 	# Drift kernel
 93 | 	def _proposal(self, currval, params):
 94 | 		return random.gauss(currval, params[1])
 95 | 
 96 | 	# Drift kernel
 97 | 	def _logProposalProb(self, currval, propval, params):
 98 | 		return gaussian_logprob(propval, currval, params[1])
 99 | 
100 | 
101 | gamma_cof = [76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5]
102 | def log_gamma(xx):
103 | 	global gamma_cof
104 | 	x = xx - 1.0
105 | 	tmp = x + 5.5
106 | 	tmp -= (x + 0.5)*math.log(tmp)
107 | 	ser = 1.000000000190015
108 | 	for j in xrange(5):
109 | 		x += 1
110 | 		ser += gamma_cof[j] / x
111 | 	return -tmp + math.log(2.5066282746310005*ser)
112 | 
113 | def gamma_logprob(x, a, b):
114 | 	return (a - 1)*math.log(x) - float(x)/b - log_gamma(a) - a*math.log(b);
115 | 
116 | class GammaRandomPrimitive(RandomPrimitive):
117 | 	"""
118 | 	ERP with Gamma distribution
119 | 	"""
120 | 
121 | 	def __init__(self):
122 | 		pass
123 | 
124 | 	def __call__(self, a, b, isStructural=False, conditionedValue=None):
125 | 		return self._sample([a,b], isStructural, conditionedValue)
126 | 
127 | 	def _sample_impl(self, params):
128 | 		return random.gammavariate(params[0], params[1])
129 | 
130 | 	def _logprob(self, val, params):
131 | 		return gamma_logprob(val, params[0], params[1])
132 | 	
133 | 	# TODO: Custom proposal kernel?
134 | 	
135 | def log_beta(a, b):
136 | 	return log_gamma(a) + log_gamma(b) - log_gamma(a+b)
137 | 
138 | def beta_logprob(x, a, b):
139 | 	if x > 0 and x < 1:
140 | 		return (a-1)*math.log(x) + (b-1)*math.log(1-x) - log_beta(a,b)
141 | 	else:
142 | 		return -float('inf')
143 | 
144 | class BetaRandomPrimitive(RandomPrimitive):
145 | 	"""
146 | 	ERP with Beta distribution
147 | 	"""
148 | 
149 | 	def __init__(self):
150 | 		pass
151 | 
152 | 	def __call__(self, a, b, isStructural=False, conditionedValue=None):
153 | 		return self._sample([a,b], isStructural, conditionedValue)
154 | 
155 | 	def _sample_impl(self, params):
156 | 		return random.betavariate(params[0], params[1])
157 | 
158 | 	def _logprob(self, val, params):
159 | 		return beta_logprob(val, params[0], params[1])
160 | 
161 | 	# TODO: Custom proposal kernel?
162 | 
163 | def binomial_sample(p, n):
164 | 	k = 0
165 | 	N = 10
166 | 	a = 0
167 | 	b = 0
168 | 	while n > N:
169 | 		a = 1 + n/2
170 | 		b = 1 + n-a
171 | 		x = random.betavariate(a, b)
172 | 		if x >= p:
173 | 			n = a-1
174 | 			p /= x
175 | 		else:
176 | 			k += a
177 | 			n = b-1
178 | 			p = (p-x) / (1.0-x)
179 | 	u = 0
180 | 	for i in xrange(n):
181 | 		u = random.random()
182 | 		if u < p:
183 | 			k += 1
184 | 	return int(k)
185 | 
186 | def g(x):
187 | 	if x == 0:
188 | 		return 1
189 | 	if x == 1:
190 | 		return 0
191 | 	d = 1 - x
192 | 	return (1 - (x * x) + (2 * x * math.log(x))) / (d * d)
193 | 
194 | def binomial_logprob(s, p, n):
195 | 	inv2 = 1.0/2
196 | 	inv3 = 1.0/3
197 | 	inv6 = 1.0/6
198 | 	if s >= n:
199 | 		return -float('inf')
200 | 	q = 1-p
201 | 	S = s + inv2
202 | 	T = n - s - inv2
203 | 	d1 = s + inv6 - (n + inv3) * p
204 | 	d2 = q/(s+inv2) - p/(T+inv2) + (q-inv2)/(n+1)
205 | 	d2 = d1 + 0.02*d2
206 | 	num = 1 + q * g(S/(n*p)) + p * g(T/(n*q))
207 | 	den = (n + inv6) * p * q
208 | 	z = num / den
209 | 	invsd = math.sqrt(z)
210 | 	z = d2 * invsd
211 | 	return gaussian_logprob(z, 0, 1) + math.log(invsd)
212 | 
213 | class BinomialRandomPrimitive(RandomPrimitive):
214 | 	"""
215 | 	ERP with binomial distribution
216 | 	"""
217 | 
218 | 	def __init__(self):
219 | 		pass
220 | 
221 | 	def __call__(self, p, n, isStructural=False, conditionedValue=None):
222 | 		return self._sample([p,n], isStructural, conditionedValue)
223 | 
224 | 	def _sample_impl(self, params):
225 | 		return binomial_sample(params[0], params[1])
226 | 
227 | 	def _logprob(self, val, params):
228 | 		return binomial_logprob(val, params[0], params[1])
229 | 
230 | 	# TODO: Custom proposal kernel?
231 | 
232 | def poisson_sample(mu):
233 | 	k = 0
234 | 	while mu > 10:
235 | 		m = 7.0/8*mu
236 | 		x = random.gammavariate(m, 1)
237 | 		if x > mu:
238 | 			return int(k + binomial_sample(mu/x, int(m-1)))
239 | 		else:
240 | 			mu -= x
241 | 			k += m
242 | 	emu = math.exp(-mu)
243 | 	p = 1
244 | 	while p > emu:
245 | 		p *= random.random()
246 | 		k += 1
247 | 	return int(k-1)
248 | 
249 | def fact(x):
250 | 	t = 1
251 | 	while x > 1:
252 | 		t *= x
253 | 		x -= 1
254 | 	return t
255 | 
256 | def lnfact(x):
257 | 	if x < 1:
258 | 		x = 1
259 | 	if x < 12:
260 | 		return math.log(fact(round(x)))
261 | 	invx = 1.0 / x
262 | 	invx2 = invx*invx
263 | 	invx3 = invx2*invx
264 | 	invx5 = invx3*invx2
265 | 	invx7 = invx5*invx2
266 | 	ssum = ((x + 0.5) * math.log(x)) - x
267 | 	ssum += math.log(2*math.pi) / 2.0
268 | 	ssum += (invx / 12) - (invx / 360)
269 | 	ssum += (invx5 / 1260) - (invx7 / 1680)
270 | 	return ssum
271 | 
272 | def poisson_logprob(k, mu):
273 | 	return k * math.log(mu) - mu - lnfact(k)
274 | 
275 | class PoissonRandomPrimitive(RandomPrimitive):
276 | 	"""
277 | 	ERP with poisson distribution
278 | 	"""
279 | 
280 | 	def __init__(self):
281 | 		pass
282 | 
283 | 	def __call__(self, mu, isStructural=False, conditionedValue=None):
284 | 		return self._sample([mu], isStructural, conditionedValue)
285 | 
286 | 	def _sample_impl(self, params):
287 | 		return poisson_sample(params[0])
288 | 
289 | 	def _logprob(self, val, params):
290 | 		return poisson_logprob(val, params[0])
291 | 
292 | 	# TODO: Custom proposal kernel?
293 | 
294 | def dirichlet_sample(alpha):
295 | 	ssum = 0
296 | 	theta = []
297 | 	for a in alpha:
298 | 		t = random.gammavariate(a, 1)
299 | 		theta.append(t)
300 | 		ssum += t
301 | 	for i in xrange(len(theta)):
302 | 		theta[i] /= ssum
303 | 	return theta
304 | 
305 | def dirichlet_logprob(theta, alpha):
306 | 	lopg = log_gamma(sum(alpha))
307 | 	for i in xrange(len(alpha)):
308 | 		logp += (alpha[i] - 1)*math.log(theta[i])
309 | 		logp -= log_gamma(alpha[i])
310 | 	return logp
311 | 
312 | class DirichletRandomPrimitive(RandomPrimitive):
313 | 	"""
314 | 	ERP with dirichlet distribution
315 | 	"""
316 | 
317 | 	def __init__(self):
318 | 		pass
319 | 
320 | 	def __call__(self, alpha, isStructural=False, conditionedValue=None):
321 | 		return self._sample(alpha, isStructural, conditionedValue)
322 | 
323 | 	def _sample_impl(self, params):
324 | 		return dirichlet_sample(params)
325 | 
326 | 	def _logprob(self, val, params):
327 | 		return dirichlet_logprob(val, params)
328 | 
329 | 	# TODO: Custom proposal kernel?
330 | 
331 | 
332 | def multinomial_sample(theta):
333 | 	result = 0
334 | 	x = random.random() * sum(theta)
335 | 	probAccum = 1e-6
336 | 	k = len(theta)
337 | 	while result < k and x > probAccum:
338 | 		probAccum += theta[result]
339 | 		result += 1
340 | 	return result - 1
341 | 
342 | def multinomial_logprob(n, theta):
343 | 	if n < 0 or n >= len(theta):
344 | 		return -float('inf')
345 | 	n = int(round(n))
346 | 	return math.log(theta[n]/sum(theta))
347 | 
348 | class MultinomialRandomPrimitive(RandomPrimitive):
349 | 	"""
350 | 	ERP with multinomial distribution
351 | 	"""
352 | 
353 | 	def __init__(self):
354 | 		pass
355 | 
356 | 	def __call__(self, theta, isStructural=False, conditionedValue=None):
357 | 		return self._sample(theta, isStructural, conditionedValue)
358 | 
359 | 	def _sample_impl(self, params):
360 | 		return multinomial_sample(params)
361 | 
362 | 	def _logprob(self, val, params):
363 | 		return multinomial_logprob(val, params)
364 | 
365 | 	# Multinomial with currval projected out
366 | 	def _proposal(self, currval, params):
367 | 		newparams = copy.copy(params)
368 | 		newparams[currval] = 0.0
369 | 		return multinomial_sample(newparams)
370 | 
371 | 	# Multinomial with currval projected out
372 | 	def _logProposalProb(self, currval, propval, params):
373 | 		newparams = copy.copy(params)
374 | 		newparams[currval] = 0.0
375 | 		return multinomial_logprob(propval, newparams)
376 | 
377 | 
378 | class UniformRandomPrimitive(RandomPrimitive):
379 | 	"""
380 | 	ERP with uniform distribution
381 | 	"""
382 | 
383 | 	def __init__(self):
384 | 		pass
385 | 
386 | 	def __call__(self, lo, hi, isStructural=False, conditionedValue=None):
387 | 		return self._sample([lo, hi], isStructural, conditionedValue)
388 | 
389 | 	def _sample_impl(self, params):
390 | 		return random.uniform(params[0], params[1])
391 | 
392 | 	def _logprob(self, val, params):
393 | 		if val < params[0] or val > params[1]:
394 | 			return -float('inf')
395 | 		else:
396 | 			return -math.log(params[1] - params[0])
397 | 
398 | 	# TODO: Custom proposal kernel?
399 | 
400 | 
401 | 
402 | """
403 | Singleton instances of all the ERP gerneators
404 | """
405 | 
406 | flip = FlipRandomPrimitive()
407 | gaussian = GaussianRandomPrimitive()
408 | gamma = GammaRandomPrimitive()
409 | beta = BetaRandomPrimitive()
410 | binomial = BinomialRandomPrimitive()
411 | poisson = PoissonRandomPrimitive()
412 | dirichlet = DirichletRandomPrimitive()
413 | multinomial = MultinomialRandomPrimitive()
414 | uniform = UniformRandomPrimitive()
415 | 
416 | 
417 | """
418 | Random utilies built on top of ERPs
419 | """
420 | 
421 | def multinomialDraw(items, probs, isStructural=False):
422 | 	return items[multinomial(probs, isStructural=isStructural)]
423 | 
424 | def uniformDraw(items, isStructural=False):
425 | 	n = len(items)
426 | 	return items[multinomial(map(lambda x: 1.0/n, range(n)), isStructural=isStructural)]


--------------------------------------------------------------------------------
/probabilistic/inference.py:
--------------------------------------------------------------------------------
  1 | import trace
  2 | import copy
  3 | import random
  4 | import math
  5 | from collections import Counter
  6 | 
  7 | 
  8 | def distrib(computation, samplingFn, *samplerArgs):
  9 | 	"""
 10 | 	Compute the discrete distribution over the given computation
 11 | 	Only appropriate for computations that return a discrete value
 12 | 	"""
 13 | 	hist = Counter()
 14 | 	samps = samplingFn(computation, *samplerArgs)
 15 | 	for s in samps:
 16 | 		hist[s[0]] += 1
 17 | 	flnumsamps = float(len(samps))
 18 | 	for s in hist:
 19 | 		hist[s] /= flnumsamps
 20 | 	return hist
 21 | 
 22 | 
 23 | def expectation(computation, samplingFn, *samplerArgs):
 24 | 	"""
 25 | 	Compute the expected value of a computation.
 26 | 	Only appropriate for computations whose return value overloads the += and / operators
 27 | 	"""
 28 | 	samps = samplingFn(computation, *samplerArgs)
 29 | 	return mean(map(lambda s: s[0], samps))
 30 | 
 31 | 
 32 | def mean(values):
 33 | 	"""
 34 | 	Compute the mean of a set of values
 35 | 	"""
 36 | 	mean = values[0]
 37 | 	for v in values[1:]:
 38 | 		mean += v
 39 | 	return mean / float(len(values))
 40 | 
 41 | 
 42 | def MAP(computation, samplingFn, *samplerArgs):
 43 | 	"""
 44 | 	Maximum a posteriori inference (returns the highest probability sample)
 45 | 	"""
 46 | 	samps = samplingFn(computation, *samplerArgs)
 47 | 	maxelem = max(samps, key=lambda s: s[1])
 48 | 	return maxelem[0]
 49 | 
 50 | 
 51 | def rejectionSample(computation):
 52 | 	"""
 53 | 	Rejection sample a result from computation that satsifies
 54 | 	all conditioning expressions.
 55 | 	"""
 56 | 	tr = trace.newTrace(computation)
 57 | 	return tr.returnValue
 58 | 
 59 | 
 60 | def _randomChoice(items):
 61 | 	"""
 62 | 	Like random.choice, but returns None if items is empty
 63 | 	"""
 64 | 	if len(items) == 0:
 65 | 		return None
 66 | 	else:
 67 | 		return random.choice(items)
 68 | 
 69 | 
 70 | class RandomWalkKernel:
 71 | 	"""
 72 | 	MCMC transition kernel that takes random walks
 73 | 	by tweaking a single variable at a time
 74 | 	"""
 75 | 
 76 | 	def __init__(self, structural=True, nonstructural=True):
 77 | 		self.structural = structural
 78 | 		self.nonstructural = nonstructural
 79 | 		self.proposalsMade = 0
 80 | 		self.proposalsAccepted = 0
 81 | 
 82 | 	def next(self, currTrace):
 83 | 
 84 | 		self.proposalsMade += 1
 85 | 		name = _randomChoice(currTrace.freeVarNames(self.structural, self.nonstructural))
 86 | 
 87 | 		# If we have no free random variables, then just run the computation
 88 | 		# and generate another sample (this may not actually be deterministic,
 89 | 		# in the case of nested query)
 90 | 		if name == None:
 91 | 			currTrace.traceUpdate(not structural)
 92 | 			return currTrace
 93 | 		# Otherwise, make a proposal for a randomly-chosen variable, probabilistically
 94 | 		# accept it
 95 | 		else:
 96 | 			nextTrace, fwdPropLP, rvsPropLP = currTrace.proposeChange(name)
 97 | 			fwdPropLP -= math.log(len(currTrace.freeVarNames(self.structural, self.nonstructural)))
 98 | 			rvsPropLP -= math.log(len(nextTrace.freeVarNames(self.structural, self.nonstructural)))
 99 | 			acceptThresh = nextTrace.logprob - currTrace.logprob + rvsPropLP - fwdPropLP
100 | 			if nextTrace.conditionsSatisfied and math.log(random.random()) < acceptThresh:
101 | 				self.proposalsAccepted += 1
102 | 				return nextTrace
103 | 			else:
104 | 				return currTrace
105 | 
106 | 	def stats(self):
107 | 		print "Acceptance ratio: {0} ({1}/{2})".format(float(self.proposalsAccepted)/self.proposalsMade, \
108 | 													   self.proposalsAccepted, self.proposalsMade)
109 | 
110 | 
111 | class LARJInterpolationTrace(object):
112 | 	"""
113 | 	Abstraction for the linear interpolation of two execution traces
114 | 	"""
115 | 
116 | 	def __init__(self, trace1, trace2, alpha=0.0):
117 | 		self.trace1 = trace1
118 | 		self.trace2 = trace2
119 | 		self.alpha = alpha
120 | 
121 | 	@property
122 | 	def logprob(self):
123 | 		return (1-self.alpha)*self.trace1.logprob + self.alpha*self.trace2.logprob
124 | 
125 | 	@property
126 | 	def conditionsSatisfied(self):
127 | 		return self.trace1.conditionsSatisfied and self.trace2.conditionsSatisfied
128 | 
129 | 	@property
130 | 	def returnValue(self):
131 | 		return trace2.returnValue
132 | 
133 | 	def freeVarNames(self, structural=True, nonstructural=True):
134 | 		return list(set(self.trace1.freeVarNames(structural, nonstructural) + \
135 | 						self.trace2.freeVarNames(structural, nonstructural)))
136 | 
137 | 	def proposeChange(self, varname):
138 | 		var1 = self.trace1.getRecord(varname)
139 | 		var2 = self.trace2.getRecord(varname)
140 | 		nextTrace = LARJInterpolationTrace(copy.deepcopy(self.trace1) if var1 else self.trace1, \
141 | 										   copy.deepcopy(self.trace2) if var2 else self.trace2, \
142 | 										   self.alpha)
143 | 		var1 = nextTrace.trace1.getRecord(varname)
144 | 		var2 = nextTrace.trace2.getRecord(varname)
145 | 		var = (var1 if var1 else var2)
146 | 		assert(not var.structural)		# We're only supposed to be making changes to non-structurals here
147 | 		propval = var.erp._proposal(var.val, var.params)
148 | 		fwdPropLP = var.erp._logProposalProb(var.val, propval, var.params)
149 | 		rvsPropLP = var.erp._logProposalProb(propval, var.val, var.params)
150 | 		if var1:
151 | 			var1.val = propval
152 | 			var1.logprob = var1.erp._logprob(var1.val, var1.params)
153 | 			nextTrace.trace1.traceUpdate(not var1.structural)
154 | 		if var2:
155 | 			var2.val = propval
156 | 			var2.logprob = var2.erp._logprob(var2.val, var2.params)
157 | 			nextTrace.trace2.traceUpdate(not var2.structural)
158 | 		return nextTrace, fwdPropLP, rvsPropLP
159 | 
160 | 
161 | class LARJKernel:
162 | 	"""
163 | 	MCMC transition kernel that does reversible jumps
164 | 	using the LARJ algorithm.
165 | 	"""
166 | 
167 | 	def __init__(self, diffusionKernel, annealSteps, jumpFreq=None):
168 | 		self.diffusionKernel = diffusionKernel
169 | 		self.annealSteps = annealSteps
170 | 		self.jumpFreq = jumpFreq
171 | 		self.jumpProposalsMade = 0
172 | 		self.jumpProposalsAccepted = 0
173 | 		self.diffusionProposalsMade = 0
174 | 		self.diffusionProposalsAccepted = 0
175 | 		self.annealingProposalsMade = 0
176 | 		self.annealingProposalsAccepted = 0
177 | 
178 | 	def next(self, currTrace):
179 | 
180 | 		numStruct = len(currTrace.freeVarNames(nonstructural=False))
181 | 		numNonStruct = len(currTrace.freeVarNames(structural=False))
182 | 
183 | 		# If we have no free random variables, then just run the computation
184 | 		# and generate another sample (this may not actually be deterministic,
185 | 		# in the case of nested query)
186 | 		if numStruct + numNonStruct == 0:
187 | 			currTrace.traceUpdate()
188 | 			return currTrace
189 | 		# Decide whether to jump or diffuse
190 | 		structChoiceProb = (self.jumpFreq if self.jumpFreq else float(numStruct)/(numStruct + numNonStruct))
191 | 		if random.random() < structChoiceProb:
192 | 			# Make a structural proposal
193 | 			return self.jumpStep(currTrace)
194 | 		else:
195 | 			# Make a nonstructural proposal
196 | 			prevAccepted = self.diffusionKernel.proposalsAccepted
197 | 			nextTrace = self.diffusionKernel.next(currTrace)
198 | 			self.diffusionProposalsMade += 1
199 | 			self.diffusionProposalsAccepted += (self.diffusionKernel.proposalsAccepted - prevAccepted)
200 | 			return nextTrace
201 | 
202 | 	def jumpStep(self, currTrace):
203 | 		
204 | 		self.jumpProposalsMade += 1
205 | 		oldStructTrace = copy.deepcopy(currTrace)
206 | 		newStructTrace = copy.deepcopy(currTrace)
207 | 
208 | 		# Randomly choose a structural variable to change
209 | 		structVars = newStructTrace.freeVarNames(nonstructural=False)
210 | 		name = _randomChoice(structVars)
211 | 		var = newStructTrace.getRecord(name)
212 | 		origval = var.val
213 | 		propval = var.erp._proposal(var.val, var.params)
214 | 		fwdPropLP = var.erp._logProposalProb(var.val, propval, var.params)
215 | 		var.val = propval
216 | 		var.logprob = var.erp._logprob(var.val, var.params)
217 | 		newStructTrace.traceUpdate()
218 | 		oldNumVars = len(oldStructTrace.freeVarNames(nonstructural=False))
219 | 		newNumVars = len(newStructTrace.freeVarNames(nonstructural=False))
220 | 		fwdPropLP += newStructTrace.newlogprob - math.log(oldNumVars)
221 | 
222 | 		# We only actually do annealing if we have any non-structural variables and we're doing more than
223 | 		# zero annealing steps
224 | 		annealingLpRatio = 0.0
225 | 		if len(oldStructTrace.freeVarNames(structural=False)) + len(newStructTrace.freeVarNames(structural=False)) != 0  and \
226 | 		   self.annealSteps > 0:
227 | 		 	aStep = 0
228 | 		 	lerpTrace = LARJInterpolationTrace(oldStructTrace, newStructTrace)
229 | 		 	prevAccepted = self.diffusionKernel.proposalsAccepted
230 | 			while aStep < self.annealSteps:
231 | 				lerpTrace.alpha = float(aStep)/(self.annealSteps-1)
232 | 				annealingLpRatio += lerpTrace.logprob
233 | 				lerpTrace = self.diffusionKernel.next(lerpTrace)
234 | 				annealingLpRatio -= lerpTrace.logprob
235 | 				aStep += 1
236 | 			self.annealingProposalsMade += self.annealSteps
237 | 			self.annealingProposalsAccepted += (self.diffusionKernel.proposalsAccepted - prevAccepted)
238 | 			oldStructTrace = lerpTrace.trace1
239 | 			newStructTrace = lerpTrace.trace2
240 | 
241 | 		# Finalize accept/reject decision
242 | 		var = newStructTrace.getRecord(name)
243 | 		rvsPropLP = var.erp._logProposalProb(propval, origval, var.params) + oldStructTrace.lpDiff(newStructTrace) - math.log(newNumVars)
244 | 		acceptanceProb = newStructTrace.logprob - currTrace.logprob + rvsPropLP - fwdPropLP + annealingLpRatio
245 | 		if newStructTrace.conditionsSatisfied and math.log(random.random()) < acceptanceProb:
246 | 			self.jumpProposalsAccepted += 1
247 | 			return newStructTrace
248 | 		else:
249 | 			return currTrace
250 | 
251 | 	def stats(self):
252 | 		overallProposalsMade = self.jumpProposalsMade + self.diffusionProposalsMade
253 | 		overallProposalsAccepted = self.jumpProposalsAccepted + self.diffusionProposalsAccepted
254 | 		if self.diffusionProposalsMade > 0:
255 | 			print "Diffusion acceptance ratio: {0} ({1}/{2})".format(float(self.diffusionProposalsAccepted)/self.diffusionProposalsMade, \
256 | 																	 self.diffusionProposalsAccepted, self.diffusionProposalsMade)
257 | 		if self.jumpProposalsMade > 0:
258 | 			print "Jump acceptance ratio: {0} ({1}/{2})".format(float(self.jumpProposalsAccepted)/self.jumpProposalsMade, \
259 | 																self.jumpProposalsAccepted, self.jumpProposalsMade)
260 | 		if self.annealingProposalsMade > 0:
261 | 			print "Annealing acceptance ratio: {0} ({1}/{2})".format(float(self.annealingProposalsAccepted)/self.annealingProposalsMade, \
262 | 																	 self.annealingProposalsAccepted, self.annealingProposalsMade)
263 | 		print "Overall acceptance ratio: {0} ({1}/{2})".format(float(overallProposalsAccepted)/overallProposalsMade, \
264 | 													 		   overallProposalsAccepted, overallProposalsMade)
265 | 
266 | 
267 | def mcmc(computation, kernel, numsamps, lag=1, verbose=False):
268 | 	"""
269 | 	Do MCMC for 'numsamps' iterations using a given transition kernel
270 | 	"""
271 | 	currentTrace = trace.newTrace(computation)
272 | 	samps = []
273 | 	i = 0
274 | 	iters = numsamps * lag
275 | 	while i < iters:
276 | 		currentTrace = kernel.next(currentTrace)
277 | 		if i % lag == 0:
278 | 			if verbose:
279 | 				print "iteration {0}\r".format(i),
280 | 			samps.append((currentTrace.returnValue, currentTrace.logprob))
281 | 		i += 1
282 | 	if verbose:
283 | 		print ""
284 | 		kernel.stats()
285 | 	return samps
286 | 
287 | 
288 | def traceMH(computation, numsamps, lag=1, verbose=False):
289 | 	"""
290 | 	Sample from a probabilistic computation for some
291 | 	number of iterations using single-variable-proposal
292 | 	Metropolis-Hastings
293 | 	"""
294 | 	return mcmc(computation, RandomWalkKernel(), numsamps, lag, verbose)
295 | 
296 | 
297 | def LARJMH(computation, numsamps, annealSteps, jumpFreq=None, lag=1, verbose=False):
298 | 	"""
299 | 	Sample from a probabilistic computation using locally annealed
300 | 	reversible jump mcmc
301 | 	"""
302 | 	return mcmc(computation, \
303 | 				LARJKernel(RandomWalkKernel(structural=False), annealSteps, jumpFreq), \
304 | 				numsamps, lag, verbose)
305 | 
306 | 


--------------------------------------------------------------------------------
/probabilistic/memoize.py:
--------------------------------------------------------------------------------
 1 | import cPickle
 2 | 
 3 | 
 4 | class _MemoizedFunction:
 5 | 	"""
 6 | 	Wrapper around a function to memoize its results
 7 | 	Source: http://stackoverflow.com/questions/4669391/python-anyone-have-a-memoizing-decorator-that-can-handle-unhashable-arguments
 8 | 	This implementation allows us to memoize functions whose arguments can be arbitrary Python structures.
 9 | 	However, it is slower for simple argument types such as numbers or strings.
10 | 	"""
11 | 
12 | 	def __init__(self, func):
13 | 		self.func = func
14 | 		self.cache = {}
15 | 
16 | 	def __call__(self, *args, **kwds):
17 | 		str = cPickle.dumps(args, 1)+cPickle.dumps(kwds, 1)
18 | 		if not self.cache.has_key(str):
19 | 			val =  self.func(*args, **kwds)
20 | 			self.cache[str] = val
21 | 			return val
22 | 		else:
23 | 			return self.cache[str]
24 | 
25 | def mem(func):
26 | 	return _MemoizedFunction(func)
27 | 


--------------------------------------------------------------------------------
/probabilistic/test.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from inference import *
  3 | from control import *
  4 | from trace import *
  5 | from erp import *
  6 | from memoize import *
  7 | 
  8 | from datetime import datetime
  9 | 
 10 | samples = 150
 11 | lag = 20
 12 | runs = 5
 13 | errorTolerance = 0.07
 14 | 
 15 | def test(name, estimates, trueExpectation, tolerance=errorTolerance):
 16 | 
 17 | 	print "test: {0} ...".format(name),
 18 | 
 19 | 	errors = map(lambda estimate: abs(estimate - trueExpectation), estimates)
 20 | 	meanAbsError = mean(errors)
 21 | 	if meanAbsError > tolerance:
 22 | 		print "failed! True mean: {0} | Test mean: {1}".format(trueExpectation, mean(estimates))
 23 | 	else:
 24 | 		print "passed."
 25 | 
 26 | def mhtest(name, computation, trueExpectation, tolerance=errorTolerance):
 27 | 	#test(name, repeat(runs, lambda: expectation(computation, traceMH, samples, lag)), trueExpectation, tolerance)
 28 | 	test(name, repeat(runs, lambda: expectation(computation, LARJMH, samples, 0, None, lag)), trueExpectation, tolerance)
 29 | 
 30 | def larjtest(name, computation, trueExpectation, tolerance=errorTolerance):
 31 | 	test(name, repeat(runs, lambda: expectation(computation, LARJMH, samples, 10, None, lag)), trueExpectation, tolerance)
 32 | 
 33 | def eqtest(name, estvalues, truevalues, tolerance=errorTolerance):
 34 | 	print "test: {0} ...".format(name),
 35 | 	assert(len(estvalues) == len(truevalues))
 36 | 	for i in xrange(len(estvalues)):
 37 | 		estvalue = estvalues[i]
 38 | 		truevalue = truevalues[i]
 39 | 		if abs(estvalue - truevalue) > tolerance:
 40 | 			print "failed! True value: {0} | Test value: {1}".format(truevalue, estvalue)
 41 | 			return
 42 | 	print "passed."
 43 | 
 44 | if __name__ == "__main__":
 45 | 
 46 | 	d1 = datetime.now()
 47 | 
 48 | 	print "starting tests..."
 49 | 
 50 | 
 51 | 	"""
 52 | 	ERP tests
 53 | 	"""
 54 | 
 55 | 	test("flip sample", \
 56 | 		  repeat(runs, lambda: mean(repeat(samples, lambda: flip(0.7)))), \
 57 | 		  0.7)
 58 | 
 59 | 	mhtest("flip query", \
 60 | 			lambda: flip(0.7), \
 61 | 			0.7)
 62 | 
 63 | 	test("uniform sample", \
 64 | 		  repeat(runs, lambda: mean(repeat(samples, lambda: uniform(0.1, 0.4)))), \
 65 | 		  0.5*(.1+.4))
 66 | 
 67 | 	mhtest("uniform query", \
 68 | 			lambda: uniform(.1, .4), \
 69 | 			0.5*(.1+.4))
 70 | 
 71 | 	test("multinomial sample", \
 72 | 		  repeat(runs, lambda: mean(repeat(samples, lambda: multinomialDraw([.2,.3,.4], [0.2, 0.6, 0.2])))), \
 73 | 		  0.2*.2 + 0.6*.3 + 0.2*.4)
 74 | 
 75 | 	mhtest("multinomial query", \
 76 | 			lambda: multinomialDraw([.2,.3,.4], [0.2, 0.6, 0.2]), \
 77 | 			0.2*.2 + 0.6*.3 + 0.2*.4)
 78 | 
 79 | 	eqtest("multinomial lp", \
 80 | 		[multinomial_logprob(0, [0.2, 0.6, 0.2]), \
 81 | 		 multinomial_logprob(1, [0.2, 0.6, 0.2]), \
 82 | 		 multinomial_logprob(2, [0.2, 0.6, 0.2])], \
 83 | 		[math.log(0.2), math.log(0.6), math.log(0.2)])
 84 | 
 85 | 	test("gaussian sample", \
 86 | 		  repeat(runs, lambda: mean(repeat(samples, lambda: gaussian(0.1, 0.5)))), \
 87 | 		  0.1)
 88 | 
 89 | 	mhtest("gaussian query", \
 90 | 			lambda: gaussian(0.1, 0.5), \
 91 | 			0.1)
 92 | 
 93 | 	eqtest("gaussian lp", \
 94 | 		[gaussian_logprob(0, 0.1, 0.5), \
 95 | 		 gaussian_logprob(0.25, 0.1, 0.5), \
 96 | 		 gaussian_logprob(0.6, 0.1, 0.5)], \
 97 | 		[-0.2457913526447274, -0.27079135264472737, -0.7257913526447274])
 98 | 
 99 | 	test("gamma sample", \
100 | 		  repeat(runs, lambda: mean(repeat(samples, lambda: gamma(2, 2)/10))), \
101 | 		  0.4)
102 | 
103 | 	mhtest("gamma query", \
104 | 			lambda: gamma(2, 2)/10, \
105 | 			0.4)
106 | 
107 | 	eqtest("gamma lp", \
108 | 		[gamma_logprob(1, 2, 2), \
109 | 		 gamma_logprob(4, 2, 2), \
110 | 		 gamma_logprob(8, 2, 2)], \
111 | 		[-1.8862944092546166, -2.000000048134726, -3.306852867574781])
112 | 
113 | 	test("beta sample", \
114 | 		  repeat(runs, lambda: mean(repeat(samples, lambda: beta(2, 5)))), \
115 | 		  2.0/(2+5))
116 | 
117 | 	mhtest("beta query", \
118 | 			lambda: beta(2, 5), \
119 | 			2.0/(2+5))
120 | 
121 | 	eqtest("beta lp", \
122 | 		[beta_logprob(.1, 2, 5), \
123 | 		 beta_logprob(.2, 2, 5), \
124 | 		 beta_logprob(.6, 2, 5)], \
125 | 		[0.677170196389683, 0.899185234324094, -0.7747911992475776])
126 | 
127 | 	test("binomial sample", \
128 | 		  repeat(runs, lambda: mean(repeat(samples, lambda: binomial(.5, 40)/40.0))), \
129 | 		  0.5)
130 | 
131 | 	mhtest("binomial query", \
132 | 			lambda: binomial(.5, 40)/40.0, \
133 | 			0.5)
134 | 
135 | 	eqtest("binomial lp", \
136 | 		[binomial_logprob(15, .5, 40), \
137 | 		 binomial_logprob(20, .5, 40), \
138 | 		 binomial_logprob(30, .5, 40)], \
139 | 		[-3.3234338674089985, -2.0722579911387817, -7.2840211276953575])
140 | 
141 | 	test("poisson sample", \
142 | 		  repeat(runs, lambda: mean(repeat(samples, lambda: poisson(4)/10.0))), \
143 | 		  0.4)
144 | 
145 | 	mhtest("poisson query", \
146 | 			lambda: poisson(4)/10.0, \
147 | 			0.4)
148 | 
149 | 	eqtest("poisson lp", \
150 | 		[poisson_logprob(2, 4), \
151 | 		 poisson_logprob(5, 4), \
152 | 		 poisson_logprob(7, 4)], \
153 | 		[-1.9205584583201643, -1.8560199371825927, -2.821100833226181])
154 | 
155 | 
156 | 	"""
157 | 	Tests adapted from Church
158 | 	"""
159 | 
160 | 	def flipSetTest():
161 | 		a = 1.0 / 1000
162 | 		condition(flip(a))
163 | 		return a
164 | 	mhtest("setting a flip", \
165 | 			flipSetTest, \
166 | 			1.0/1000, \
167 | 			tolerance=1e-15)
168 | 
169 | 
170 | 	def andConditionedOnOrTest():
171 | 		a = flip()
172 | 		b = flip()
173 | 		condition(a or b)
174 | 		return a and b
175 | 	mhtest("and conditioned on or", \
176 | 			andConditionedOnOrTest, \
177 | 			1.0/3)
178 | 
179 | 	def biasedFlipTest():
180 | 		a = flip(0.3)
181 | 		b = flip(0.3)
182 | 		condition(a or b)
183 | 		return a and b
184 | 	mhtest("and conditioned on or, biased flip", \
185 | 			biasedFlipTest, \
186 | 			(0.3*0.3) / (0.3*0.3 + 0.7*0.3 + 0.3*0.7))
187 | 
188 | 
189 | 	def conditionedFlipTest():
190 | 		bitFlip = lambda fidelity, x: flip(fidelity if x else 1 - fidelity)
191 | 		hyp = flip(0.7)
192 | 		condition(bitFlip(0.8, hyp))
193 | 		return hyp
194 | 	mhtest("conditioned flip", \
195 | 			conditionedFlipTest, \
196 | 			(0.7*0.8) / (0.7*0.8 + 0.3*0.2))
197 | 
198 | 
199 | 	def randomIfBranchTest():
200 | 		if flip(0.7):
201 | 			return flip(0.2)
202 | 		else:
203 | 			return flip(0.8)
204 | 	mhtest("random 'if' with random branches, unconditioned", \
205 | 			randomIfBranchTest, \
206 | 			0.7*0.2 + 0.3*0.8)
207 | 
208 | 
209 | 	mhtest("flip with random weight, unconditioned", \
210 | 			lambda: flip(0.2 if flip(0.7) else 0.8), \
211 | 			0.7*0.2 + 0.3*0.8)
212 | 
213 | 
214 | 	def randomProcAppTest():
215 | 		proc = (lambda x: flip(0.2)) if flip(0.7) else (lambda x: flip(0.8))
216 | 		return proc(1)
217 | 	mhtest("random procedure application, unconditioned", \
218 | 			randomProcAppTest, \
219 | 			0.7*0.2 + 0.3*0.8)
220 | 
221 | 
222 | 	def conditionedMultinomialTest():
223 | 		hyp = multinomialDraw(['b', 'c', 'd'], [0.1, 0.6, 0.3])
224 | 		def observe(x):
225 | 			if flip(0.8):
226 | 				return x
227 | 			else:
228 | 				return 'b'
229 | 		condition(observe(hyp) == 'b')
230 | 		return hyp == 'b'
231 | 	mhtest("conditioned multinomial", \
232 | 			conditionedMultinomialTest, \
233 | 			0.357)
234 | 
235 | 
236 | 	def recursiveStochasticTailTest():
237 | 		def powerLaw(prob, x):
238 | 			if flip(prob, isStructural=True):
239 | 				return x
240 | 			else:
241 | 				return powerLaw(prob, x+1)
242 | 		a = powerLaw(0.3, 1)
243 | 		return a < 5
244 | 	mhtest("recursive stochastic fn, unconditioned (tail recursive)", \
245 | 			recursiveStochasticTailTest, \
246 | 			0.7599)
247 | 
248 | 	def recursiveStochasticTest():
249 | 		def powerLaw(prob, x):
250 | 			if flip(prob, isStructural=True):
251 | 				return x
252 | 			else:
253 | 				return 0 + powerLaw(prob, x+1)
254 | 		a = powerLaw(0.3, 1)
255 | 		return a < 5
256 | 	mhtest("recursive stochastic fn, unconditioned", \
257 | 			recursiveStochasticTest, \
258 | 			0.7599)
259 | 
260 | 
261 | 	def memoizedFlipTest():
262 | 		proc = mem(lambda x: flip(0.8))
263 | 		return all([proc(1), proc(2), proc(1), proc(2)])
264 | 	mhtest("memoized flip, unconditioned", \
265 | 			memoizedFlipTest, \
266 | 			0.64)
267 | 
268 | 
269 | 	def memoizedFlipConditionedTest():
270 | 		proc = mem(lambda x: flip(0.2))
271 | 		condition(any([proc(1), proc(2), proc(2), proc(2)]))
272 | 		return proc(1)
273 | 	mhtest("memoized flip, conditioned", \
274 | 			memoizedFlipConditionedTest, \
275 | 			0.5555555555555555)
276 | 
277 | 
278 | 	def boundSymbolInMemoizerTest():
279 | 		a = flip(0.8)
280 | 		proc = mem(lambda x: a)
281 | 		return all([proc(1), proc(1)])
282 | 	mhtest("bound symbol used inside memoizer, unconditioned", \
283 | 			boundSymbolInMemoizerTest, \
284 | 			0.8)
285 | 
286 | 
287 | 	def memRandomArgTest():
288 | 		proc = mem(lambda x: flip(0.8))
289 | 		return all([proc(uniformDraw([1,2,3], isStructural=True)), proc(uniformDraw([1,2,3], isStructural=True))])
290 | 	mhtest("memoized flip with random argument, unconditioned", \
291 | 			memRandomArgTest, \
292 | 			0.6933333333333334)
293 | 
294 | 
295 | 	def memRandomProc():
296 | 		proc = (lambda x: flip(0.2)) if flip(0.7) else (lambda x: flip(0.8))
297 | 		memproc = mem(proc)
298 | 		return all([memproc(1), memproc(2)])
299 | 	mhtest("memoized random procedure, unconditioned", \
300 | 			memRandomProc, \
301 | 			0.22)
302 | 
303 | 
304 | 	def mhOverRejectionTest():
305 | 		def bitFlip(fidelity, x):
306 | 			return flip(fidelity if x else (1-fidelity))
307 | 		def innerQuery():
308 | 			a = flip(0.7)
309 | 			condition(bitFlip(0.8, a))
310 | 			return a
311 | 		return rejectionSample(innerQuery)
312 | 	mhtest("mh-query over rejection query for conditioned flip", \
313 | 			mhOverRejectionTest, \
314 | 			0.903225806451613)
315 | 
316 | 
317 | 	def transDimensionalTest():
318 | 		a = beta(1, 5) if flip(0.9, isStructural=True) else 0.7
319 | 		b = flip(a)
320 | 		condition(b)
321 | 		return a
322 | 	mhtest("trans-dimensional", \
323 | 			transDimensionalTest, \
324 | 			0.417)
325 | 
326 | 
327 | 	def transDimensionalLARJTest():
328 | 			a = beta(1, 5) if flip(0.9, isStructural=True) else 0.7
329 | 			b = flip(a)
330 | 			condition(b)
331 | 			return a
332 | 	larjtest("trans-dimensional (LARJ)", \
333 | 			  transDimensionalLARJTest, \
334 | 			  0.417)
335 | 
336 | 
337 | 	def memFlipInIfTest():
338 | 		a = mem(flip) if flip() else mem(flip)
339 | 		b = a()
340 | 		return b
341 | 	mhtest("memoized flip in if branch (create/destroy memprocs), unconditioned", \
342 | 			memFlipInIfTest, \
343 | 			0.5)
344 | 
345 | 
346 | 	"""
347 | 	Tests for things specific to new implementation
348 | 	"""
349 | 
350 | 
351 | 	def nativeLoopTest():
352 | 		accum  = 0
353 | 		for i in xrange(4):
354 | 			accum += flip()
355 | 		return accum / 4.0
356 | 	mhtest("native for loop", \
357 | 			nativeLoopTest, \
358 | 			0.5)
359 | 
360 | 
361 | 	def directConditionTest():
362 | 		accum = [0]
363 | 		def block(i):
364 | 			if i < 5:
365 | 				accum[0] += flip(0.5, conditionedValue=True)
366 | 			else:
367 | 				accum[0] += flip(0.5)
368 | 		ntimes(10, block)
369 | 		return accum[0] / 10.0
370 | 	mhtest("directly conditioning variable values", \
371 | 			directConditionTest, \
372 | 			0.75)
373 | 
374 | 
375 | 	print "tests done!"
376 | 
377 | 	d2 = datetime.now()
378 | 	print "time: {0}".format((d2 - d1).total_seconds())
379 | 
380 | 


--------------------------------------------------------------------------------
/probabilistic/trace.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import copy
  3 | from collections import Counter
  4 | 
  5 | class RandomVariableRecord:
  6 | 	"""
  7 | 	Variables generated by ERPs.
  8 | 	These form the 'choice points' in a probabilistic program trace.
  9 | 	"""
 10 | 
 11 | 	def __init__(self, name, erp, params, val, logprob, structural, conditioned=False):
 12 | 		self.name = name
 13 | 		self.erp = erp
 14 | 		self.params = params
 15 | 		self.val = val
 16 | 		self.logprob = logprob
 17 | 		self.active = True
 18 | 		self.conditioned = conditioned
 19 | 		self.structural = structural
 20 | 
 21 | class RandomExecutionTrace:
 22 | 	"""
 23 | 	Execution trace generated by a probabilistic program.
 24 | 	Tracks the random choices made and accumulates probabilities
 25 | 	"""
 26 | 
 27 | 	def __init__(self, computation, doRejectionInit=True):
 28 | 		self.computation = computation
 29 | 		self._vars = {}
 30 | 		self.varlist = []
 31 | 		self.currVarIndex = 0
 32 | 		self.logprob = 0
 33 | 		self.newlogprob = 0		# From newly-added variables
 34 | 		self.oldlogprob = 0		# From unreachable variables
 35 | 		self.rootframe = None
 36 | 		self.loopcounters = Counter()
 37 | 		self.conditionsSatisfied = False
 38 | 		self.returnValue = None
 39 | 		if doRejectionInit:
 40 | 			while not self.conditionsSatisfied:
 41 | 				self._vars.clear()
 42 | 				self.traceUpdate()
 43 | 
 44 | 	def __deepcopy__(self, memo):
 45 | 		newdb = RandomExecutionTrace(self.computation, doRejectionInit=False)
 46 | 		newdb.logprob = self.logprob
 47 | 		newdb.oldlogprob = self.oldlogprob
 48 | 		newdb.newlogprob = self.newlogprob
 49 | 		newdb.varlist = [copy.copy(record) for record in self.varlist]
 50 | 		newdb._vars = {record.name:record for record in newdb.varlist}
 51 | 		newdb.conditionsSatisfied = self.conditionsSatisfied
 52 | 		newdb.returnValue = self.returnValue
 53 | 		return newdb
 54 | 
 55 | 	def freeVarNames(self, structural=True, nonstructural=True):
 56 | 		return map(lambda tup: tup[0], \
 57 | 				   filter(lambda tup: not tup[1].conditioned and \
 58 | 				   					  ((structural and tup[1].structural) or (nonstructural and not tup[1].structural)), \
 59 | 						  self._vars.iteritems()))
 60 | 
 61 | 	def varDiff(self, other):
 62 | 		"""
 63 | 		The names of the variables that this trace has that the other trace does not have
 64 | 		"""
 65 | 		return list(set(self._vars.keys()) - set(other._vars.keys()))
 66 | 
 67 | 	def lpDiff(self, other):
 68 | 		"""
 69 | 		The difference in log probability between this trace and the other resulting
 70 | 		from the variables that this has that the other does not
 71 | 		"""
 72 | 		return sum(map(lambda name: self._vars[name].logprob, self.varDiff(other)))
 73 | 
 74 | 	def traceUpdate(self, structureIsFixed=False):
 75 | 		"""
 76 | 		Run computation and update this trace accordingly
 77 | 		"""
 78 | 
 79 | 		global _trace
 80 | 		originalTrace = _trace
 81 | 		_trace = self
 82 | 
 83 | 		self.logprob = 0.0
 84 | 		self.newlogprob = 0.0
 85 | 		self.loopcounters.clear()
 86 | 		self.conditionsSatisfied = True
 87 | 		self.currVarIndex = 0
 88 | 
 89 | 		# If updating this trace can change the variable structure, then we
 90 | 		# clear out the flat list of variables beforehand
 91 | 		if not structureIsFixed:
 92 | 			self.varlist = []
 93 | 
 94 | 		# First, mark all random values as 'inactive'; only
 95 | 		# those reeached by the computation will become 'active'
 96 | 		for record in self._vars.values():
 97 | 			record.active = False
 98 | 
 99 | 		# Mark that this is the 'root' of the current execution trace
100 | 		self.rootframe = sys._getframe()
101 | 
102 | 		# Run the computation, which will create/lookup random variables
103 | 		self.returnValue = self.computation()
104 | 
105 | 		# Clear out the root frame, etc.
106 | 		self.rootframe = None
107 | 		self.loopcounters.clear()
108 | 
109 | 		# Clean up any random values that are no longer reachable
110 | 		self.oldlogprob = 0.0
111 | 		for record in self._vars.values():
112 | 			if not record.active:
113 | 				self.oldlogprob += record.logprob
114 | 		self._vars = {name:record for name,record in self._vars.iteritems() if record.active}
115 | 
116 | 		_trace = originalTrace
117 | 
118 | 	def proposeChange(self, varname):
119 | 		"""
120 | 		Propose a random change to the variable name 'varname'
121 | 		Returns a new sample trace from the computation and the
122 | 			forward and reverse probabilities of proposing this change
123 | 		"""
124 | 		nextTrace = copy.deepcopy(self)
125 | 		var = nextTrace.getRecord(varname)
126 | 		propval = var.erp._proposal(var.val, var.params)
127 | 		fwdPropLP = var.erp._logProposalProb(var.val, propval, var.params)
128 | 		rvsPropLP = var.erp._logProposalProb(propval, var.val, var.params)
129 | 		var.val = propval
130 | 		var.logprob = var.erp._logprob(var.val, var.params)
131 | 		nextTrace.traceUpdate(not var.structural)
132 | 		fwdPropLP += nextTrace.newlogprob
133 | 		rvsPropLP += nextTrace.oldlogprob
134 | 		return nextTrace, fwdPropLP, rvsPropLP
135 | 
136 | 	def currentName(self, numFrameSkip):
137 | 		"""
138 | 		Return the current name, as determined by the interpreter
139 | 			stack of the current program.
140 | 		Skips the top 'numFrameSkip' stack frames that precede this
141 | 			function's stack frame (numFrameSkip+1 frames total)
142 | 		"""
143 | 
144 | 		# Get list of frames from the root to the current frame
145 | 		f = sys._getframe(numFrameSkip+1)
146 | 		flst = []
147 | 		while f and f is not self.rootframe:
148 | 			flst.insert(0, f)
149 | 			f = f.f_back
150 | 
151 | 		# Build up name string, checking loop counters along the way
152 | 		name = ""
153 | 		for i in xrange(len(flst)-1):
154 | 			f = flst[i]
155 | 			name += "{0}:{1}".format(id(f.f_code), f.f_lasti)
156 | 			loopnum = self.loopcounters[name]
157 | 			name += ":{0}|".format(loopnum)
158 | 		# For the last (topmost) frame, also increment the loop counter
159 | 		f = flst[-1]
160 | 		name += "{0}:{1}".format(id(f.f_code), f.f_lasti)
161 | 		loopnum = self.loopcounters[name]
162 | 		self.loopcounters[name] += 1
163 | 		name += ":{0}|".format(loopnum)
164 | 
165 | 		return name
166 | 
167 | 	def lookup(self, erp, params, numFrameSkip, isStructural, conditionedValue=None):
168 | 		"""
169 | 		Looks up the value of a random variable.
170 | 		If this random variable does not exist, create it
171 | 		"""
172 | 
173 | 		record = None
174 | 		name = None
175 | 		# Try to find the variable (first check the flat list, then do
176 | 		# slower structural name lookup)
177 | 		varIsInFlatList = self.currVarIndex < len(self.varlist)
178 | 		if varIsInFlatList:
179 | 			record = self.varlist[self.currVarIndex]
180 | 		else:
181 | 			name = self.currentName(numFrameSkip+1)
182 | 			record = self._vars.get(name)
183 | 			if (not record or record.erp is not erp or isStructural != record.structural):
184 | 				record = None
185 | 		# If we didn't find the variable, create a new one
186 | 		if not record:
187 | 			val = (conditionedValue if conditionedValue else erp._sample_impl(params))
188 | 			ll = erp._logprob(val, params)
189 | 			self.newlogprob += ll
190 | 			record = RandomVariableRecord(name, erp, params, val, ll, isStructural, conditionedValue != None)
191 | 			self._vars[name] = record
192 | 		# Otherwise, reuse the variable we found, but check if its parameters/conditioning
193 | 		# status have changed
194 | 		else:
195 | 			record.conditioned = (conditionedValue != None)
196 | 			hasChanges = False
197 | 			if record.params != params:
198 | 				record.params = params
199 | 				hasChanges = True
200 | 			if conditionedValue and conditionedValue != record.val:
201 | 				record.val = conditionedValue
202 | 				record.conditioned = True
203 | 				hasChanges = True
204 | 			if hasChanges:
205 | 				record.logprob = erp._logprob(record.val, record.params)
206 | 
207 | 		# Finish up and return
208 | 		if not varIsInFlatList:
209 | 			self.varlist.append(record)
210 | 		self.currVarIndex += 1
211 | 		self.logprob += record.logprob
212 | 		record.active = True
213 | 		return record.val
214 | 
215 | 	def getRecord(self, name):
216 | 		"""
217 | 		Simply retrieve the variable record associated with name
218 | 		"""
219 | 		return self._vars.get(name)
220 | 
221 | 	def addFactor(self, num):
222 | 		"""
223 | 		Add a new factor into the log likelihood of the current trace
224 | 		"""
225 | 		self.logprob += num
226 | 
227 | 	def conditionOn(self, boolexpr):
228 | 		"""
229 | 		Condition the trace on the value of a boolean expression
230 | 		"""
231 | 		self.conditionsSatisfied = self.conditionsSatisfied and boolexpr
232 | 
233 | """
234 | Global singleton instance
235 | """
236 | _trace = None
237 | 
238 | def lookupVariableValue(erp, params, isStructural, numFrameSkip, conditionedValue=None):
239 | 	global _trace
240 | 	if not _trace:
241 | 		return (conditionedValue if conditionedValue else erp._sample_impl(params))
242 | 	else:
243 | 		return _trace.lookup(erp, params, numFrameSkip+1, isStructural, conditionedValue)
244 | 
245 | def newTrace(computation):
246 | 	return RandomExecutionTrace(computation)
247 | 
248 | def factor(num):
249 | 	global _trace
250 | 	if _trace:
251 | 		_trace.addFactor(num)
252 | 
253 | def condition(boolexpr):
254 | 	global _trace
255 | 	if _trace:
256 | 		_trace.conditionOn(boolexpr)


--------------------------------------------------------------------------------
/sandbox.py:
--------------------------------------------------------------------------------
  1 | from probabilistic import *
  2 | import math
  3 | from collections import Counter
  4 | import cProfile
  5 | import pstats
  6 | 
  7 | ###############################
  8 | 
  9 | def distribForward(computation, iters):
 10 | 	hist = Counter()
 11 | 	i = 0
 12 | 	while i < iters:
 13 | 		i += 1
 14 | 		hist[computation()] += 1
 15 | 	for x in hist:
 16 | 		hist[x] /= float(iters)
 17 | 	return hist
 18 | 
 19 | def compareForwardToMHDists(computation, iters):
 20 | 	forwardhist = distribForward(computation, iters)
 21 | 	print "Forward hist:"
 22 | 	print forwardhist
 23 | 	mhhist = distrib(computation, traceMH, iters)
 24 | 	print "MH hist:"
 25 | 	print mhhist
 26 | 
 27 | def meanForward(computation, iters):
 28 | 	mean = computation()
 29 | 	i = 0
 30 | 	while i < iters-1:
 31 | 		i += 1
 32 | 		mean += computation()
 33 | 	return mean / iters
 34 | 
 35 | def compareForwardToMHMeans(computation, iters):
 36 | 	forwardmean = meanForward(computation, iters)
 37 | 	print "Forward mean: {0}".format(forwardmean)
 38 | 	mhmean = expectation(computation, traceMH, iters)
 39 | 	print "MH mean: {0}".format(mhmean)
 40 | 
 41 | ###############################
 42 | 
 43 | def ones():
 44 | 	if flip(0.75):
 45 | 		return [1] + ones()
 46 | 	else:
 47 | 		return []
 48 | 
 49 | def numOnes():
 50 | 	return len(ones())
 51 | 
 52 | def constrainedOnes():
 53 | 	seq = ones()
 54 | 	factor(-math.pow(abs(len(seq) - 4), 10))
 55 | 	return seq
 56 | 
 57 | def sumOfTen():
 58 | 	num = 0
 59 | 	num += flip(0.5, conditionedValue=True)
 60 | 	num += flip(0.5, conditionedValue=True)
 61 | 	# num += flip(0.5)
 62 | 	# num += flip(0.5)
 63 | 	num += flip(0.5)
 64 | 	num += flip(0.5)
 65 | 	num += flip(0.5)
 66 | 	num += flip(0.5)
 67 | 	num += flip(0.5)
 68 | 	num += flip(0.5)
 69 | 	num += flip(0.5)
 70 | 	num += flip(0.5)
 71 | 	return num
 72 | 
 73 | def constrainedSumOfTen():
 74 | 	num = sumOfTen()
 75 | 	condition(num >= 5)
 76 | 	return num
 77 | 
 78 | def sumOfTenWhile():
 79 | 	num = [0]
 80 | 	i = [0]
 81 | 	def block():
 82 | 		i[0] += 1
 83 | 		num[0] += flip(0.5)
 84 | 	until(lambda: i[0] == 10, block)
 85 | 	return num[0]
 86 | 
 87 | def sumOfTenFor():
 88 | 	num = [0]
 89 | 	def block(i):
 90 | 		num[0] += flip(0.5)
 91 | 	foreach(xrange(10), block)
 92 | 	return num[0]
 93 | 
 94 | def sumOfTenMap():
 95 | 	return sum(map(lambda x: flip(0.5), range(10)))
 96 | 
 97 | def oneGaussian():
 98 | 	return gaussian(10, 0.5)
 99 | 
100 | def oneGamma():
101 | 	return gamma(9, 0.5)
102 | 
103 | def oneBeta():
104 | 	return beta(2, 2)
105 | 
106 | def oneBinomial():
107 | 	return binomial(0.5, 40)
108 | 
109 | def onePoisson():
110 | 	return poisson(10)
111 | 
112 | def memTest():
113 | 	func = mem(lambda x: flip(x))
114 | 	result1 = func(0.5)
115 | 	result2 = func(0.5)
116 | 	return result1 == result2
117 | 
118 | def noisyOr(a, astrength, b, bstrength, baserate):
119 | 	return (flip(astrength) and a) or \
120 | 		   (flip(bstrength) and b) or \
121 | 		   flip(baserate)
122 | 
123 | def sprinklerTest():
124 | 
125 | 	weights = {"rain-str": 0.9, \
126 | 			   "rain-prior": 0.3, \
127 | 			   "sprinkler-str": 0.9, \
128 | 			   "sprinkler-prior": 0.2, \
129 | 			   "grass-baserate": 0.1}
130 | 
131 | 	@mem
132 | 	def rain(day):
133 | 		return flip(weights["rain-prior"])
134 | 
135 | 	@mem
136 | 	def sprinkler(day):
137 | 		return flip(weights["sprinkler-prior"])
138 | 
139 | 	@mem
140 | 	def grassIsWet(day):
141 | 		return noisyOr(rain(day), weights["rain-str"], \
142 | 					   sprinkler(day), weights["sprinkler-str"], \
143 | 					   weights["grass-baserate"])
144 | 
145 | 	condition(grassIsWet("day2"))
146 | 
147 | 	return rain("day2")
148 | 
149 | ## ChurchServ version of the above test, for comparison:
150 | # (define (noisy-or a astrength b bstrength baserate)
151 | #   (or (and (flip astrength) a)
152 | #       (and (flip bstrength) b)
153 | #       (flip baserate)))
154 | # (define sprinklerTest
155 | #   (mh-query 100 100
156 | #      (define weight (lambda (ofwhat)
157 | #        (case ofwhat
158 | #          (('rain-str) 0.9)
159 | #          (('rain-prior) 0.3)
160 | #          (('sprinkler-str) 0.9)
161 | #          (('sprinkler-prior) 0.2)
162 | #          (('grass-baserate) 0.1))))
163 | #      (define grass-is-wet (mem (lambda (day)
164 | #        (noisy-or
165 | #         (rain day) (weight 'rain-str)
166 | #         (sprinkler day) (weight 'sprinkler-str)
167 | #         (weight 'grass-baserate)))))
168 | #      (define rain (mem (lambda (day)
169 | #        (flip (weight 'rain-prior)))))
170 | #      (define sprinkler (mem (lambda (day)
171 | #        (flip (weight 'sprinkler-prior)))))
172 |      
173 | #      (rain 'day2)
174 |      
175 | #      (grass-is-wet 'day2)
176 | #   )
177 | # )
178 | # (hist sprinklerTest "Rained on Day2?")
179 | 
180 | # stringLengthProbs = repeat(6, lambda: 1.0)
181 | # stringLengths = range(5, 11)
182 | stringLengthProbs = [0.5, 0.5]
183 | stringLengths = [3, 4]
184 | penaltyMultiplier = 5
185 | 
186 | def stringsOfLength(length, numvals):
187 | 	def helper(n, seqSoFar):
188 | 		if n == 0:
189 | 			yield tuple(seqSoFar)
190 | 		else:
191 | 			for i in xrange(numvals):
192 | 				for tup in helper(n-1, seqSoFar + [i]):
193 | 					yield tup
194 | 	for tup in helper(length, []):
195 | 		yield tup
196 | 
197 | def constrainedStringA():
198 | 	numelems = multinomialDraw(stringLengths, stringLengthProbs, isStructural=True)
199 | 	seq = repeat(numelems, lambda: int(flip(0.5)))
200 | 	if numelems % 2 == 0:
201 | 		factor(-penaltyMultiplier * len(filter(lambda num: num == 1, seq)))
202 | 	else:
203 | 		factor(-penaltyMultiplier * len(filter(lambda num: num == 0, seq)))
204 | 	return tuple(seq)
205 | 
206 | def constrainedStringATrueDist():
207 | 	hist = {}
208 | 	for numelems in stringLengths:
209 | 		# Probability of choosing this many elements
210 | 		numlp = erp.multinomial_logprob(stringLengths.index(numelems), stringLengthProbs)
211 | 		for seq in stringsOfLength(numelems, 2):
212 | 			# Prior probability of each element value
213 | 			lp = -numelems*math.log(2)
214 | 			# Penalties
215 | 			if numelems % 2 == 0:
216 | 				lp -= penaltyMultiplier * len(filter(lambda num: num == 1, seq))
217 | 			else:
218 | 				lp -= penaltyMultiplier * len(filter(lambda num: num == 0, seq))
219 | 			hist[seq] = numlp + lp
220 | 	# Normalize by partition function
221 | 	logz = math.log(sum(map(lambda lp: math.exp(lp), hist.values())))
222 | 	for seq in hist:
223 | 		hist[seq] = math.exp(hist[seq] - logz)
224 | 	return hist
225 | 
226 | 
227 | def constrainedStringB():
228 | 	onethird = 1.0/3
229 | 	numelems = multinomialDraw(stringLengths, stringLengthProbs, isStructural=True)
230 | 	seq = repeat(numelems, lambda: multinomial([onethird, onethird, onethird]))
231 | 	numIdenticalConsec = 0
232 | 	for i in xrange(numelems-1):
233 | 		numIdenticalConsec += (seq[i] == seq[i+1])
234 | 	factor(-penaltyMultiplier * numIdenticalConsec)
235 | 	numDifferentOpposing = 0
236 | 	for i in xrange(numelems/2):
237 | 		numDifferentOpposing += (seq[i] != seq[numelems-1-i])
238 | 	factor(-penaltyMultiplier * numDifferentOpposing)
239 | 	return tuple(seq)
240 | 
241 | def constrainedStringBTrueDist():
242 | 	hist = {}
243 | 	for numelems in stringLengths:
244 | 		# Probability of choosing this many elements
245 | 		numlp = erp.multinomial_logprob(stringLengths.index(numelems), stringLengthProbs)
246 | 		for seq in stringsOfLength(numelems, 3):
247 | 			# Prior probability of each element value
248 | 			lp = -numelems*math.log(3)
249 | 			# Identical consecutive element penalty
250 | 			numIdenticalConsec = 0
251 | 			for i in xrange(numelems-1):
252 | 				numIdenticalConsec += (seq[i] == seq[i+1])
253 | 			lp -= penaltyMultiplier * numIdenticalConsec
254 | 			# Different opposing element penalty
255 | 			numDifferentOpposing = 0
256 | 			for i in xrange(numelems/2):
257 | 				numDifferentOpposing += (seq[i] != seq[numelems-1-i])
258 | 			lp -= penaltyMultiplier * numDifferentOpposing
259 | 			hist[seq] = numlp + lp
260 | 	# Normalize by partition function
261 | 	logz = math.log(sum(map(lambda lp: math.exp(lp), hist.values())))
262 | 	for seq in hist:
263 | 		hist[seq] = math.exp(hist[seq] - logz)
264 | 	return hist
265 | 
266 | def klDivergence(P, Q):
267 | 	kldiv = 0.0
268 | 	for x in P:
269 | 		p = P[x]
270 | 		q = Q[x]
271 | 		if p != 0.0:
272 | 			logq = math.log(q) if q != 0.0 else -float('inf')
273 | 			kldiv += (math.log(p) - logq) * p
274 | 	return kldiv
275 | 
276 | def totalVariationDist(P, Q):
277 | 	total = 0.0
278 | 	for x in P:
279 | 		total += abs(P[x] - Q[x])
280 | 	return 0.5*total
281 | 
282 | 
283 | ###############################
284 | 
285 | if __name__ == "__main__":
286 | 
287 | 	# compareForwardToMHDists(numOnes, 1000)
288 | 	# compareForwardToMHDists(sumOfTen, 1000)
289 | 	# compareForwardToMHDists(sumOfTenWhile, 1000)
290 | 	# compareForwardToMHDists(sumOfTenFor, 1000)
291 | 	# compareForwardToMHDists(sumOfTenMap, 1000)
292 | 	# compareForwardToMHMeans(oneGaussian, 10000)
293 | 	# compareForwardToMHMeans(oneGamma, 10000)
294 | 	# compareForwardToMHMeans(oneBeta, 10000)
295 | 	# compareForwardToMHMeans(oneBinomial, 10000)
296 | 	# compareForwardToMHMeans(onePoisson, 10000)
297 | 
298 | 	# print memTest()
299 | 
300 | 	# print distrib(sprinklerTest, traceMH, 10000)
301 | 
302 | 	# print MAP(oneGaussian, traceMH, 10000)
303 | 
304 | 	# print distrib(constrainedStringA, traceMH, 10000)
305 | 	# print "-------------------------------------------"
306 | 	# print constrainedStringATrueDist()
307 | 	# print totalVariationDist(constrainedStringATrueDist(), distrib(constrainedStringA, traceMH, 1000, 1, True))
308 | 	# print totalVariationDist(constrainedStringATrueDist(), distrib(constrainedStringA, LARJMH, 1000, 20, None, 1, True))
309 | 	# print totalVariationDist(constrainedStringBTrueDist(), distrib(constrainedStringB, traceMH, 1000, 1, True))
310 | 	# print totalVariationDist(constrainedStringBTrueDist(), distrib(constrainedStringB, LARJMH, 1000, 10, None, 1, True))
311 | 	cProfile.run('distrib(constrainedStringA, LARJMH, 1000, 20)', 'prof')
312 | 	p = pstats.Stats('prof')
313 | 	p.strip_dirs().sort_stats('cumulative').print_stats(10)
314 | 	p.strip_dirs().sort_stats('time').print_stats(10)
315 | 


--------------------------------------------------------------------------------