├── .gitignore ├── README.md ├── probabilistic ├── __init__.py ├── control.py ├── erp.py ├── inference.py ├── memoize.py ├── test.py └── trace.py └── sandbox.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | 29 | # Translations 30 | *.mo 31 | 32 | # Mr Developer 33 | .mr.developer.cfg 34 | .project 35 | .pydevproject 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | probabilistic-python 2 | ==================== 3 | 4 | Turning Python into a probabilistic programming language -------------------------------------------------------------------------------- /probabilistic/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Random variable generators 4 | """ 5 | from erp import flip, gaussian, gamma, beta, binomial, poisson, dirichlet, multinomial, uniform, multinomialDraw, uniformDraw 6 | 7 | 8 | """ 9 | Hard and soft constraints 10 | """ 11 | from trace import condition, factor 12 | def softEq(a, b, tolerance): 13 | return erp.gaussian_logprob(a-b, 0, tolerance) 14 | 15 | 16 | """ 17 | Inference procedures 18 | """ 19 | from inference import mean, distrib, expectation, MAP, rejectionSample, traceMH, LARJMH 20 | 21 | 22 | """ 23 | Control structures 24 | """ 25 | from control import ntimes, foreach, until, repeat 26 | 27 | 28 | """ 29 | Stochastic memoization 30 | """ 31 | from memoize import mem -------------------------------------------------------------------------------- /probabilistic/control.py: -------------------------------------------------------------------------------- 1 | 2 | def ntimes(times, block): 3 | """ 4 | Repeat a computation n times 5 | """ 6 | for i in xrange(times): 7 | block(i) 8 | 9 | def foreach(iterable, block): 10 | """ 11 | 'for' loop control structure suitable for use inside probabilistic programs. 12 | Invokes block for every element in iterable. 13 | """ 14 | for elem in iterable: 15 | block(elem) 16 | 17 | def until(condition, block): 18 | """ 19 | 'while' loop control structure suitable for use inside probabilistic programs. 20 | Invokes block until condition is true. 21 | """ 22 | cond = condition() 23 | while not cond: 24 | block() 25 | cond = condition() 26 | 27 | def repeat(times, proc): 28 | """ 29 | Evaluate proc() 'times' times and build a list out of the results 30 | """ 31 | return map(lambda x: proc(), range(times)) -------------------------------------------------------------------------------- /probabilistic/erp.py: -------------------------------------------------------------------------------- 1 | import random 2 | import trace 3 | import math 4 | import copy 5 | 6 | """ 7 | A bunch of sampling/pdf code adapted from jschurch: 8 | https://github.com/stuhlmueller/jschurch 9 | """ 10 | 11 | class RandomPrimitive: 12 | """ 13 | Abstract base class for all ERPs 14 | """ 15 | 16 | def _sample_impl(self, params): 17 | pass 18 | 19 | def _sample(self, params, isStructural, conditionedValue=None): 20 | # Assumes _sample is called from __call__ in 21 | # conrete subclasses 22 | return trace.lookupVariableValue(self, params, isStructural, 2, conditionedValue) 23 | 24 | def _logprob(self, val, params): 25 | pass 26 | 27 | def _proposal(self, currval, params): 28 | """ 29 | Subclasses can override to do more efficient proposals 30 | """ 31 | return self._sample_impl(params) 32 | 33 | def _logProposalProb(self, currval, propval, params): 34 | """ 35 | Subclasses can override to do more efficient proposals 36 | """ 37 | return self._logprob(propval, params) 38 | 39 | 40 | class FlipRandomPrimitive(RandomPrimitive): 41 | """ 42 | ERP with Bernoulli distribution 43 | """ 44 | 45 | def __init__(self): 46 | pass 47 | 48 | def __call__(self, p=0.5, isStructural=False, conditionedValue=None): 49 | return self._sample([p], isStructural, conditionedValue) 50 | 51 | def _sample_impl(self, params): 52 | p = params[0] 53 | randval = random.random() 54 | return randval < p 55 | 56 | def _logprob(self, val, params): 57 | p = params[0] 58 | val = bool(val) 59 | prob = (p if val else 1.0-p) 60 | return math.log(prob) 61 | 62 | def _proposal(self, currval, params): 63 | return not(currval) 64 | 65 | def _logProposalProb(self, currval, propval, params): 66 | return 0.0 # There's only one way to flip a binary variable 67 | 68 | 69 | def gaussian_logprob(x, mu, sigma): 70 | return -.5*(1.8378770664093453 + 2*math.log(sigma) + (x - mu)*(x - mu)/(sigma*sigma)) 71 | 72 | def gaussian_logprob_sigmaSq(x, mu, sigmaSq): 73 | return -.5*(1.8378770664093453 + math.log(sigmaSq) + (x - mu)*(x - mu)/sigmaSq) 74 | 75 | class GaussianRandomPrimitive(RandomPrimitive): 76 | """ 77 | ERP with Gaussian distribution 78 | """ 79 | 80 | def __init__(self): 81 | pass 82 | 83 | def __call__(self, mu, sigma, isStructural=False, conditionedValue=None): 84 | return self._sample([mu,sigma], isStructural, conditionedValue) 85 | 86 | def _sample_impl(self, params): 87 | return random.gauss(params[0], params[1]) 88 | 89 | def _logprob(self, val, params): 90 | return gaussian_logprob(val, params[0], params[1]) 91 | 92 | # Drift kernel 93 | def _proposal(self, currval, params): 94 | return random.gauss(currval, params[1]) 95 | 96 | # Drift kernel 97 | def _logProposalProb(self, currval, propval, params): 98 | return gaussian_logprob(propval, currval, params[1]) 99 | 100 | 101 | gamma_cof = [76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5] 102 | def log_gamma(xx): 103 | global gamma_cof 104 | x = xx - 1.0 105 | tmp = x + 5.5 106 | tmp -= (x + 0.5)*math.log(tmp) 107 | ser = 1.000000000190015 108 | for j in xrange(5): 109 | x += 1 110 | ser += gamma_cof[j] / x 111 | return -tmp + math.log(2.5066282746310005*ser) 112 | 113 | def gamma_logprob(x, a, b): 114 | return (a - 1)*math.log(x) - float(x)/b - log_gamma(a) - a*math.log(b); 115 | 116 | class GammaRandomPrimitive(RandomPrimitive): 117 | """ 118 | ERP with Gamma distribution 119 | """ 120 | 121 | def __init__(self): 122 | pass 123 | 124 | def __call__(self, a, b, isStructural=False, conditionedValue=None): 125 | return self._sample([a,b], isStructural, conditionedValue) 126 | 127 | def _sample_impl(self, params): 128 | return random.gammavariate(params[0], params[1]) 129 | 130 | def _logprob(self, val, params): 131 | return gamma_logprob(val, params[0], params[1]) 132 | 133 | # TODO: Custom proposal kernel? 134 | 135 | def log_beta(a, b): 136 | return log_gamma(a) + log_gamma(b) - log_gamma(a+b) 137 | 138 | def beta_logprob(x, a, b): 139 | if x > 0 and x < 1: 140 | return (a-1)*math.log(x) + (b-1)*math.log(1-x) - log_beta(a,b) 141 | else: 142 | return -float('inf') 143 | 144 | class BetaRandomPrimitive(RandomPrimitive): 145 | """ 146 | ERP with Beta distribution 147 | """ 148 | 149 | def __init__(self): 150 | pass 151 | 152 | def __call__(self, a, b, isStructural=False, conditionedValue=None): 153 | return self._sample([a,b], isStructural, conditionedValue) 154 | 155 | def _sample_impl(self, params): 156 | return random.betavariate(params[0], params[1]) 157 | 158 | def _logprob(self, val, params): 159 | return beta_logprob(val, params[0], params[1]) 160 | 161 | # TODO: Custom proposal kernel? 162 | 163 | def binomial_sample(p, n): 164 | k = 0 165 | N = 10 166 | a = 0 167 | b = 0 168 | while n > N: 169 | a = 1 + n/2 170 | b = 1 + n-a 171 | x = random.betavariate(a, b) 172 | if x >= p: 173 | n = a-1 174 | p /= x 175 | else: 176 | k += a 177 | n = b-1 178 | p = (p-x) / (1.0-x) 179 | u = 0 180 | for i in xrange(n): 181 | u = random.random() 182 | if u < p: 183 | k += 1 184 | return int(k) 185 | 186 | def g(x): 187 | if x == 0: 188 | return 1 189 | if x == 1: 190 | return 0 191 | d = 1 - x 192 | return (1 - (x * x) + (2 * x * math.log(x))) / (d * d) 193 | 194 | def binomial_logprob(s, p, n): 195 | inv2 = 1.0/2 196 | inv3 = 1.0/3 197 | inv6 = 1.0/6 198 | if s >= n: 199 | return -float('inf') 200 | q = 1-p 201 | S = s + inv2 202 | T = n - s - inv2 203 | d1 = s + inv6 - (n + inv3) * p 204 | d2 = q/(s+inv2) - p/(T+inv2) + (q-inv2)/(n+1) 205 | d2 = d1 + 0.02*d2 206 | num = 1 + q * g(S/(n*p)) + p * g(T/(n*q)) 207 | den = (n + inv6) * p * q 208 | z = num / den 209 | invsd = math.sqrt(z) 210 | z = d2 * invsd 211 | return gaussian_logprob(z, 0, 1) + math.log(invsd) 212 | 213 | class BinomialRandomPrimitive(RandomPrimitive): 214 | """ 215 | ERP with binomial distribution 216 | """ 217 | 218 | def __init__(self): 219 | pass 220 | 221 | def __call__(self, p, n, isStructural=False, conditionedValue=None): 222 | return self._sample([p,n], isStructural, conditionedValue) 223 | 224 | def _sample_impl(self, params): 225 | return binomial_sample(params[0], params[1]) 226 | 227 | def _logprob(self, val, params): 228 | return binomial_logprob(val, params[0], params[1]) 229 | 230 | # TODO: Custom proposal kernel? 231 | 232 | def poisson_sample(mu): 233 | k = 0 234 | while mu > 10: 235 | m = 7.0/8*mu 236 | x = random.gammavariate(m, 1) 237 | if x > mu: 238 | return int(k + binomial_sample(mu/x, int(m-1))) 239 | else: 240 | mu -= x 241 | k += m 242 | emu = math.exp(-mu) 243 | p = 1 244 | while p > emu: 245 | p *= random.random() 246 | k += 1 247 | return int(k-1) 248 | 249 | def fact(x): 250 | t = 1 251 | while x > 1: 252 | t *= x 253 | x -= 1 254 | return t 255 | 256 | def lnfact(x): 257 | if x < 1: 258 | x = 1 259 | if x < 12: 260 | return math.log(fact(round(x))) 261 | invx = 1.0 / x 262 | invx2 = invx*invx 263 | invx3 = invx2*invx 264 | invx5 = invx3*invx2 265 | invx7 = invx5*invx2 266 | ssum = ((x + 0.5) * math.log(x)) - x 267 | ssum += math.log(2*math.pi) / 2.0 268 | ssum += (invx / 12) - (invx / 360) 269 | ssum += (invx5 / 1260) - (invx7 / 1680) 270 | return ssum 271 | 272 | def poisson_logprob(k, mu): 273 | return k * math.log(mu) - mu - lnfact(k) 274 | 275 | class PoissonRandomPrimitive(RandomPrimitive): 276 | """ 277 | ERP with poisson distribution 278 | """ 279 | 280 | def __init__(self): 281 | pass 282 | 283 | def __call__(self, mu, isStructural=False, conditionedValue=None): 284 | return self._sample([mu], isStructural, conditionedValue) 285 | 286 | def _sample_impl(self, params): 287 | return poisson_sample(params[0]) 288 | 289 | def _logprob(self, val, params): 290 | return poisson_logprob(val, params[0]) 291 | 292 | # TODO: Custom proposal kernel? 293 | 294 | def dirichlet_sample(alpha): 295 | ssum = 0 296 | theta = [] 297 | for a in alpha: 298 | t = random.gammavariate(a, 1) 299 | theta.append(t) 300 | ssum += t 301 | for i in xrange(len(theta)): 302 | theta[i] /= ssum 303 | return theta 304 | 305 | def dirichlet_logprob(theta, alpha): 306 | lopg = log_gamma(sum(alpha)) 307 | for i in xrange(len(alpha)): 308 | logp += (alpha[i] - 1)*math.log(theta[i]) 309 | logp -= log_gamma(alpha[i]) 310 | return logp 311 | 312 | class DirichletRandomPrimitive(RandomPrimitive): 313 | """ 314 | ERP with dirichlet distribution 315 | """ 316 | 317 | def __init__(self): 318 | pass 319 | 320 | def __call__(self, alpha, isStructural=False, conditionedValue=None): 321 | return self._sample(alpha, isStructural, conditionedValue) 322 | 323 | def _sample_impl(self, params): 324 | return dirichlet_sample(params) 325 | 326 | def _logprob(self, val, params): 327 | return dirichlet_logprob(val, params) 328 | 329 | # TODO: Custom proposal kernel? 330 | 331 | 332 | def multinomial_sample(theta): 333 | result = 0 334 | x = random.random() * sum(theta) 335 | probAccum = 1e-6 336 | k = len(theta) 337 | while result < k and x > probAccum: 338 | probAccum += theta[result] 339 | result += 1 340 | return result - 1 341 | 342 | def multinomial_logprob(n, theta): 343 | if n < 0 or n >= len(theta): 344 | return -float('inf') 345 | n = int(round(n)) 346 | return math.log(theta[n]/sum(theta)) 347 | 348 | class MultinomialRandomPrimitive(RandomPrimitive): 349 | """ 350 | ERP with multinomial distribution 351 | """ 352 | 353 | def __init__(self): 354 | pass 355 | 356 | def __call__(self, theta, isStructural=False, conditionedValue=None): 357 | return self._sample(theta, isStructural, conditionedValue) 358 | 359 | def _sample_impl(self, params): 360 | return multinomial_sample(params) 361 | 362 | def _logprob(self, val, params): 363 | return multinomial_logprob(val, params) 364 | 365 | # Multinomial with currval projected out 366 | def _proposal(self, currval, params): 367 | newparams = copy.copy(params) 368 | newparams[currval] = 0.0 369 | return multinomial_sample(newparams) 370 | 371 | # Multinomial with currval projected out 372 | def _logProposalProb(self, currval, propval, params): 373 | newparams = copy.copy(params) 374 | newparams[currval] = 0.0 375 | return multinomial_logprob(propval, newparams) 376 | 377 | 378 | class UniformRandomPrimitive(RandomPrimitive): 379 | """ 380 | ERP with uniform distribution 381 | """ 382 | 383 | def __init__(self): 384 | pass 385 | 386 | def __call__(self, lo, hi, isStructural=False, conditionedValue=None): 387 | return self._sample([lo, hi], isStructural, conditionedValue) 388 | 389 | def _sample_impl(self, params): 390 | return random.uniform(params[0], params[1]) 391 | 392 | def _logprob(self, val, params): 393 | if val < params[0] or val > params[1]: 394 | return -float('inf') 395 | else: 396 | return -math.log(params[1] - params[0]) 397 | 398 | # TODO: Custom proposal kernel? 399 | 400 | 401 | 402 | """ 403 | Singleton instances of all the ERP gerneators 404 | """ 405 | 406 | flip = FlipRandomPrimitive() 407 | gaussian = GaussianRandomPrimitive() 408 | gamma = GammaRandomPrimitive() 409 | beta = BetaRandomPrimitive() 410 | binomial = BinomialRandomPrimitive() 411 | poisson = PoissonRandomPrimitive() 412 | dirichlet = DirichletRandomPrimitive() 413 | multinomial = MultinomialRandomPrimitive() 414 | uniform = UniformRandomPrimitive() 415 | 416 | 417 | """ 418 | Random utilies built on top of ERPs 419 | """ 420 | 421 | def multinomialDraw(items, probs, isStructural=False): 422 | return items[multinomial(probs, isStructural=isStructural)] 423 | 424 | def uniformDraw(items, isStructural=False): 425 | n = len(items) 426 | return items[multinomial(map(lambda x: 1.0/n, range(n)), isStructural=isStructural)] -------------------------------------------------------------------------------- /probabilistic/inference.py: -------------------------------------------------------------------------------- 1 | import trace 2 | import copy 3 | import random 4 | import math 5 | from collections import Counter 6 | 7 | 8 | def distrib(computation, samplingFn, *samplerArgs): 9 | """ 10 | Compute the discrete distribution over the given computation 11 | Only appropriate for computations that return a discrete value 12 | """ 13 | hist = Counter() 14 | samps = samplingFn(computation, *samplerArgs) 15 | for s in samps: 16 | hist[s[0]] += 1 17 | flnumsamps = float(len(samps)) 18 | for s in hist: 19 | hist[s] /= flnumsamps 20 | return hist 21 | 22 | 23 | def expectation(computation, samplingFn, *samplerArgs): 24 | """ 25 | Compute the expected value of a computation. 26 | Only appropriate for computations whose return value overloads the += and / operators 27 | """ 28 | samps = samplingFn(computation, *samplerArgs) 29 | return mean(map(lambda s: s[0], samps)) 30 | 31 | 32 | def mean(values): 33 | """ 34 | Compute the mean of a set of values 35 | """ 36 | mean = values[0] 37 | for v in values[1:]: 38 | mean += v 39 | return mean / float(len(values)) 40 | 41 | 42 | def MAP(computation, samplingFn, *samplerArgs): 43 | """ 44 | Maximum a posteriori inference (returns the highest probability sample) 45 | """ 46 | samps = samplingFn(computation, *samplerArgs) 47 | maxelem = max(samps, key=lambda s: s[1]) 48 | return maxelem[0] 49 | 50 | 51 | def rejectionSample(computation): 52 | """ 53 | Rejection sample a result from computation that satsifies 54 | all conditioning expressions. 55 | """ 56 | tr = trace.newTrace(computation) 57 | return tr.returnValue 58 | 59 | 60 | def _randomChoice(items): 61 | """ 62 | Like random.choice, but returns None if items is empty 63 | """ 64 | if len(items) == 0: 65 | return None 66 | else: 67 | return random.choice(items) 68 | 69 | 70 | class RandomWalkKernel: 71 | """ 72 | MCMC transition kernel that takes random walks 73 | by tweaking a single variable at a time 74 | """ 75 | 76 | def __init__(self, structural=True, nonstructural=True): 77 | self.structural = structural 78 | self.nonstructural = nonstructural 79 | self.proposalsMade = 0 80 | self.proposalsAccepted = 0 81 | 82 | def next(self, currTrace): 83 | 84 | self.proposalsMade += 1 85 | name = _randomChoice(currTrace.freeVarNames(self.structural, self.nonstructural)) 86 | 87 | # If we have no free random variables, then just run the computation 88 | # and generate another sample (this may not actually be deterministic, 89 | # in the case of nested query) 90 | if name == None: 91 | currTrace.traceUpdate(not structural) 92 | return currTrace 93 | # Otherwise, make a proposal for a randomly-chosen variable, probabilistically 94 | # accept it 95 | else: 96 | nextTrace, fwdPropLP, rvsPropLP = currTrace.proposeChange(name) 97 | fwdPropLP -= math.log(len(currTrace.freeVarNames(self.structural, self.nonstructural))) 98 | rvsPropLP -= math.log(len(nextTrace.freeVarNames(self.structural, self.nonstructural))) 99 | acceptThresh = nextTrace.logprob - currTrace.logprob + rvsPropLP - fwdPropLP 100 | if nextTrace.conditionsSatisfied and math.log(random.random()) < acceptThresh: 101 | self.proposalsAccepted += 1 102 | return nextTrace 103 | else: 104 | return currTrace 105 | 106 | def stats(self): 107 | print "Acceptance ratio: {0} ({1}/{2})".format(float(self.proposalsAccepted)/self.proposalsMade, \ 108 | self.proposalsAccepted, self.proposalsMade) 109 | 110 | 111 | class LARJInterpolationTrace(object): 112 | """ 113 | Abstraction for the linear interpolation of two execution traces 114 | """ 115 | 116 | def __init__(self, trace1, trace2, alpha=0.0): 117 | self.trace1 = trace1 118 | self.trace2 = trace2 119 | self.alpha = alpha 120 | 121 | @property 122 | def logprob(self): 123 | return (1-self.alpha)*self.trace1.logprob + self.alpha*self.trace2.logprob 124 | 125 | @property 126 | def conditionsSatisfied(self): 127 | return self.trace1.conditionsSatisfied and self.trace2.conditionsSatisfied 128 | 129 | @property 130 | def returnValue(self): 131 | return trace2.returnValue 132 | 133 | def freeVarNames(self, structural=True, nonstructural=True): 134 | return list(set(self.trace1.freeVarNames(structural, nonstructural) + \ 135 | self.trace2.freeVarNames(structural, nonstructural))) 136 | 137 | def proposeChange(self, varname): 138 | var1 = self.trace1.getRecord(varname) 139 | var2 = self.trace2.getRecord(varname) 140 | nextTrace = LARJInterpolationTrace(copy.deepcopy(self.trace1) if var1 else self.trace1, \ 141 | copy.deepcopy(self.trace2) if var2 else self.trace2, \ 142 | self.alpha) 143 | var1 = nextTrace.trace1.getRecord(varname) 144 | var2 = nextTrace.trace2.getRecord(varname) 145 | var = (var1 if var1 else var2) 146 | assert(not var.structural) # We're only supposed to be making changes to non-structurals here 147 | propval = var.erp._proposal(var.val, var.params) 148 | fwdPropLP = var.erp._logProposalProb(var.val, propval, var.params) 149 | rvsPropLP = var.erp._logProposalProb(propval, var.val, var.params) 150 | if var1: 151 | var1.val = propval 152 | var1.logprob = var1.erp._logprob(var1.val, var1.params) 153 | nextTrace.trace1.traceUpdate(not var1.structural) 154 | if var2: 155 | var2.val = propval 156 | var2.logprob = var2.erp._logprob(var2.val, var2.params) 157 | nextTrace.trace2.traceUpdate(not var2.structural) 158 | return nextTrace, fwdPropLP, rvsPropLP 159 | 160 | 161 | class LARJKernel: 162 | """ 163 | MCMC transition kernel that does reversible jumps 164 | using the LARJ algorithm. 165 | """ 166 | 167 | def __init__(self, diffusionKernel, annealSteps, jumpFreq=None): 168 | self.diffusionKernel = diffusionKernel 169 | self.annealSteps = annealSteps 170 | self.jumpFreq = jumpFreq 171 | self.jumpProposalsMade = 0 172 | self.jumpProposalsAccepted = 0 173 | self.diffusionProposalsMade = 0 174 | self.diffusionProposalsAccepted = 0 175 | self.annealingProposalsMade = 0 176 | self.annealingProposalsAccepted = 0 177 | 178 | def next(self, currTrace): 179 | 180 | numStruct = len(currTrace.freeVarNames(nonstructural=False)) 181 | numNonStruct = len(currTrace.freeVarNames(structural=False)) 182 | 183 | # If we have no free random variables, then just run the computation 184 | # and generate another sample (this may not actually be deterministic, 185 | # in the case of nested query) 186 | if numStruct + numNonStruct == 0: 187 | currTrace.traceUpdate() 188 | return currTrace 189 | # Decide whether to jump or diffuse 190 | structChoiceProb = (self.jumpFreq if self.jumpFreq else float(numStruct)/(numStruct + numNonStruct)) 191 | if random.random() < structChoiceProb: 192 | # Make a structural proposal 193 | return self.jumpStep(currTrace) 194 | else: 195 | # Make a nonstructural proposal 196 | prevAccepted = self.diffusionKernel.proposalsAccepted 197 | nextTrace = self.diffusionKernel.next(currTrace) 198 | self.diffusionProposalsMade += 1 199 | self.diffusionProposalsAccepted += (self.diffusionKernel.proposalsAccepted - prevAccepted) 200 | return nextTrace 201 | 202 | def jumpStep(self, currTrace): 203 | 204 | self.jumpProposalsMade += 1 205 | oldStructTrace = copy.deepcopy(currTrace) 206 | newStructTrace = copy.deepcopy(currTrace) 207 | 208 | # Randomly choose a structural variable to change 209 | structVars = newStructTrace.freeVarNames(nonstructural=False) 210 | name = _randomChoice(structVars) 211 | var = newStructTrace.getRecord(name) 212 | origval = var.val 213 | propval = var.erp._proposal(var.val, var.params) 214 | fwdPropLP = var.erp._logProposalProb(var.val, propval, var.params) 215 | var.val = propval 216 | var.logprob = var.erp._logprob(var.val, var.params) 217 | newStructTrace.traceUpdate() 218 | oldNumVars = len(oldStructTrace.freeVarNames(nonstructural=False)) 219 | newNumVars = len(newStructTrace.freeVarNames(nonstructural=False)) 220 | fwdPropLP += newStructTrace.newlogprob - math.log(oldNumVars) 221 | 222 | # We only actually do annealing if we have any non-structural variables and we're doing more than 223 | # zero annealing steps 224 | annealingLpRatio = 0.0 225 | if len(oldStructTrace.freeVarNames(structural=False)) + len(newStructTrace.freeVarNames(structural=False)) != 0 and \ 226 | self.annealSteps > 0: 227 | aStep = 0 228 | lerpTrace = LARJInterpolationTrace(oldStructTrace, newStructTrace) 229 | prevAccepted = self.diffusionKernel.proposalsAccepted 230 | while aStep < self.annealSteps: 231 | lerpTrace.alpha = float(aStep)/(self.annealSteps-1) 232 | annealingLpRatio += lerpTrace.logprob 233 | lerpTrace = self.diffusionKernel.next(lerpTrace) 234 | annealingLpRatio -= lerpTrace.logprob 235 | aStep += 1 236 | self.annealingProposalsMade += self.annealSteps 237 | self.annealingProposalsAccepted += (self.diffusionKernel.proposalsAccepted - prevAccepted) 238 | oldStructTrace = lerpTrace.trace1 239 | newStructTrace = lerpTrace.trace2 240 | 241 | # Finalize accept/reject decision 242 | var = newStructTrace.getRecord(name) 243 | rvsPropLP = var.erp._logProposalProb(propval, origval, var.params) + oldStructTrace.lpDiff(newStructTrace) - math.log(newNumVars) 244 | acceptanceProb = newStructTrace.logprob - currTrace.logprob + rvsPropLP - fwdPropLP + annealingLpRatio 245 | if newStructTrace.conditionsSatisfied and math.log(random.random()) < acceptanceProb: 246 | self.jumpProposalsAccepted += 1 247 | return newStructTrace 248 | else: 249 | return currTrace 250 | 251 | def stats(self): 252 | overallProposalsMade = self.jumpProposalsMade + self.diffusionProposalsMade 253 | overallProposalsAccepted = self.jumpProposalsAccepted + self.diffusionProposalsAccepted 254 | if self.diffusionProposalsMade > 0: 255 | print "Diffusion acceptance ratio: {0} ({1}/{2})".format(float(self.diffusionProposalsAccepted)/self.diffusionProposalsMade, \ 256 | self.diffusionProposalsAccepted, self.diffusionProposalsMade) 257 | if self.jumpProposalsMade > 0: 258 | print "Jump acceptance ratio: {0} ({1}/{2})".format(float(self.jumpProposalsAccepted)/self.jumpProposalsMade, \ 259 | self.jumpProposalsAccepted, self.jumpProposalsMade) 260 | if self.annealingProposalsMade > 0: 261 | print "Annealing acceptance ratio: {0} ({1}/{2})".format(float(self.annealingProposalsAccepted)/self.annealingProposalsMade, \ 262 | self.annealingProposalsAccepted, self.annealingProposalsMade) 263 | print "Overall acceptance ratio: {0} ({1}/{2})".format(float(overallProposalsAccepted)/overallProposalsMade, \ 264 | overallProposalsAccepted, overallProposalsMade) 265 | 266 | 267 | def mcmc(computation, kernel, numsamps, lag=1, verbose=False): 268 | """ 269 | Do MCMC for 'numsamps' iterations using a given transition kernel 270 | """ 271 | currentTrace = trace.newTrace(computation) 272 | samps = [] 273 | i = 0 274 | iters = numsamps * lag 275 | while i < iters: 276 | currentTrace = kernel.next(currentTrace) 277 | if i % lag == 0: 278 | if verbose: 279 | print "iteration {0}\r".format(i), 280 | samps.append((currentTrace.returnValue, currentTrace.logprob)) 281 | i += 1 282 | if verbose: 283 | print "" 284 | kernel.stats() 285 | return samps 286 | 287 | 288 | def traceMH(computation, numsamps, lag=1, verbose=False): 289 | """ 290 | Sample from a probabilistic computation for some 291 | number of iterations using single-variable-proposal 292 | Metropolis-Hastings 293 | """ 294 | return mcmc(computation, RandomWalkKernel(), numsamps, lag, verbose) 295 | 296 | 297 | def LARJMH(computation, numsamps, annealSteps, jumpFreq=None, lag=1, verbose=False): 298 | """ 299 | Sample from a probabilistic computation using locally annealed 300 | reversible jump mcmc 301 | """ 302 | return mcmc(computation, \ 303 | LARJKernel(RandomWalkKernel(structural=False), annealSteps, jumpFreq), \ 304 | numsamps, lag, verbose) 305 | 306 | -------------------------------------------------------------------------------- /probabilistic/memoize.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | 3 | 4 | class _MemoizedFunction: 5 | """ 6 | Wrapper around a function to memoize its results 7 | Source: http://stackoverflow.com/questions/4669391/python-anyone-have-a-memoizing-decorator-that-can-handle-unhashable-arguments 8 | This implementation allows us to memoize functions whose arguments can be arbitrary Python structures. 9 | However, it is slower for simple argument types such as numbers or strings. 10 | """ 11 | 12 | def __init__(self, func): 13 | self.func = func 14 | self.cache = {} 15 | 16 | def __call__(self, *args, **kwds): 17 | str = cPickle.dumps(args, 1)+cPickle.dumps(kwds, 1) 18 | if not self.cache.has_key(str): 19 | val = self.func(*args, **kwds) 20 | self.cache[str] = val 21 | return val 22 | else: 23 | return self.cache[str] 24 | 25 | def mem(func): 26 | return _MemoizedFunction(func) 27 | -------------------------------------------------------------------------------- /probabilistic/test.py: -------------------------------------------------------------------------------- 1 | 2 | from inference import * 3 | from control import * 4 | from trace import * 5 | from erp import * 6 | from memoize import * 7 | 8 | from datetime import datetime 9 | 10 | samples = 150 11 | lag = 20 12 | runs = 5 13 | errorTolerance = 0.07 14 | 15 | def test(name, estimates, trueExpectation, tolerance=errorTolerance): 16 | 17 | print "test: {0} ...".format(name), 18 | 19 | errors = map(lambda estimate: abs(estimate - trueExpectation), estimates) 20 | meanAbsError = mean(errors) 21 | if meanAbsError > tolerance: 22 | print "failed! True mean: {0} | Test mean: {1}".format(trueExpectation, mean(estimates)) 23 | else: 24 | print "passed." 25 | 26 | def mhtest(name, computation, trueExpectation, tolerance=errorTolerance): 27 | #test(name, repeat(runs, lambda: expectation(computation, traceMH, samples, lag)), trueExpectation, tolerance) 28 | test(name, repeat(runs, lambda: expectation(computation, LARJMH, samples, 0, None, lag)), trueExpectation, tolerance) 29 | 30 | def larjtest(name, computation, trueExpectation, tolerance=errorTolerance): 31 | test(name, repeat(runs, lambda: expectation(computation, LARJMH, samples, 10, None, lag)), trueExpectation, tolerance) 32 | 33 | def eqtest(name, estvalues, truevalues, tolerance=errorTolerance): 34 | print "test: {0} ...".format(name), 35 | assert(len(estvalues) == len(truevalues)) 36 | for i in xrange(len(estvalues)): 37 | estvalue = estvalues[i] 38 | truevalue = truevalues[i] 39 | if abs(estvalue - truevalue) > tolerance: 40 | print "failed! True value: {0} | Test value: {1}".format(truevalue, estvalue) 41 | return 42 | print "passed." 43 | 44 | if __name__ == "__main__": 45 | 46 | d1 = datetime.now() 47 | 48 | print "starting tests..." 49 | 50 | 51 | """ 52 | ERP tests 53 | """ 54 | 55 | test("flip sample", \ 56 | repeat(runs, lambda: mean(repeat(samples, lambda: flip(0.7)))), \ 57 | 0.7) 58 | 59 | mhtest("flip query", \ 60 | lambda: flip(0.7), \ 61 | 0.7) 62 | 63 | test("uniform sample", \ 64 | repeat(runs, lambda: mean(repeat(samples, lambda: uniform(0.1, 0.4)))), \ 65 | 0.5*(.1+.4)) 66 | 67 | mhtest("uniform query", \ 68 | lambda: uniform(.1, .4), \ 69 | 0.5*(.1+.4)) 70 | 71 | test("multinomial sample", \ 72 | repeat(runs, lambda: mean(repeat(samples, lambda: multinomialDraw([.2,.3,.4], [0.2, 0.6, 0.2])))), \ 73 | 0.2*.2 + 0.6*.3 + 0.2*.4) 74 | 75 | mhtest("multinomial query", \ 76 | lambda: multinomialDraw([.2,.3,.4], [0.2, 0.6, 0.2]), \ 77 | 0.2*.2 + 0.6*.3 + 0.2*.4) 78 | 79 | eqtest("multinomial lp", \ 80 | [multinomial_logprob(0, [0.2, 0.6, 0.2]), \ 81 | multinomial_logprob(1, [0.2, 0.6, 0.2]), \ 82 | multinomial_logprob(2, [0.2, 0.6, 0.2])], \ 83 | [math.log(0.2), math.log(0.6), math.log(0.2)]) 84 | 85 | test("gaussian sample", \ 86 | repeat(runs, lambda: mean(repeat(samples, lambda: gaussian(0.1, 0.5)))), \ 87 | 0.1) 88 | 89 | mhtest("gaussian query", \ 90 | lambda: gaussian(0.1, 0.5), \ 91 | 0.1) 92 | 93 | eqtest("gaussian lp", \ 94 | [gaussian_logprob(0, 0.1, 0.5), \ 95 | gaussian_logprob(0.25, 0.1, 0.5), \ 96 | gaussian_logprob(0.6, 0.1, 0.5)], \ 97 | [-0.2457913526447274, -0.27079135264472737, -0.7257913526447274]) 98 | 99 | test("gamma sample", \ 100 | repeat(runs, lambda: mean(repeat(samples, lambda: gamma(2, 2)/10))), \ 101 | 0.4) 102 | 103 | mhtest("gamma query", \ 104 | lambda: gamma(2, 2)/10, \ 105 | 0.4) 106 | 107 | eqtest("gamma lp", \ 108 | [gamma_logprob(1, 2, 2), \ 109 | gamma_logprob(4, 2, 2), \ 110 | gamma_logprob(8, 2, 2)], \ 111 | [-1.8862944092546166, -2.000000048134726, -3.306852867574781]) 112 | 113 | test("beta sample", \ 114 | repeat(runs, lambda: mean(repeat(samples, lambda: beta(2, 5)))), \ 115 | 2.0/(2+5)) 116 | 117 | mhtest("beta query", \ 118 | lambda: beta(2, 5), \ 119 | 2.0/(2+5)) 120 | 121 | eqtest("beta lp", \ 122 | [beta_logprob(.1, 2, 5), \ 123 | beta_logprob(.2, 2, 5), \ 124 | beta_logprob(.6, 2, 5)], \ 125 | [0.677170196389683, 0.899185234324094, -0.7747911992475776]) 126 | 127 | test("binomial sample", \ 128 | repeat(runs, lambda: mean(repeat(samples, lambda: binomial(.5, 40)/40.0))), \ 129 | 0.5) 130 | 131 | mhtest("binomial query", \ 132 | lambda: binomial(.5, 40)/40.0, \ 133 | 0.5) 134 | 135 | eqtest("binomial lp", \ 136 | [binomial_logprob(15, .5, 40), \ 137 | binomial_logprob(20, .5, 40), \ 138 | binomial_logprob(30, .5, 40)], \ 139 | [-3.3234338674089985, -2.0722579911387817, -7.2840211276953575]) 140 | 141 | test("poisson sample", \ 142 | repeat(runs, lambda: mean(repeat(samples, lambda: poisson(4)/10.0))), \ 143 | 0.4) 144 | 145 | mhtest("poisson query", \ 146 | lambda: poisson(4)/10.0, \ 147 | 0.4) 148 | 149 | eqtest("poisson lp", \ 150 | [poisson_logprob(2, 4), \ 151 | poisson_logprob(5, 4), \ 152 | poisson_logprob(7, 4)], \ 153 | [-1.9205584583201643, -1.8560199371825927, -2.821100833226181]) 154 | 155 | 156 | """ 157 | Tests adapted from Church 158 | """ 159 | 160 | def flipSetTest(): 161 | a = 1.0 / 1000 162 | condition(flip(a)) 163 | return a 164 | mhtest("setting a flip", \ 165 | flipSetTest, \ 166 | 1.0/1000, \ 167 | tolerance=1e-15) 168 | 169 | 170 | def andConditionedOnOrTest(): 171 | a = flip() 172 | b = flip() 173 | condition(a or b) 174 | return a and b 175 | mhtest("and conditioned on or", \ 176 | andConditionedOnOrTest, \ 177 | 1.0/3) 178 | 179 | def biasedFlipTest(): 180 | a = flip(0.3) 181 | b = flip(0.3) 182 | condition(a or b) 183 | return a and b 184 | mhtest("and conditioned on or, biased flip", \ 185 | biasedFlipTest, \ 186 | (0.3*0.3) / (0.3*0.3 + 0.7*0.3 + 0.3*0.7)) 187 | 188 | 189 | def conditionedFlipTest(): 190 | bitFlip = lambda fidelity, x: flip(fidelity if x else 1 - fidelity) 191 | hyp = flip(0.7) 192 | condition(bitFlip(0.8, hyp)) 193 | return hyp 194 | mhtest("conditioned flip", \ 195 | conditionedFlipTest, \ 196 | (0.7*0.8) / (0.7*0.8 + 0.3*0.2)) 197 | 198 | 199 | def randomIfBranchTest(): 200 | if flip(0.7): 201 | return flip(0.2) 202 | else: 203 | return flip(0.8) 204 | mhtest("random 'if' with random branches, unconditioned", \ 205 | randomIfBranchTest, \ 206 | 0.7*0.2 + 0.3*0.8) 207 | 208 | 209 | mhtest("flip with random weight, unconditioned", \ 210 | lambda: flip(0.2 if flip(0.7) else 0.8), \ 211 | 0.7*0.2 + 0.3*0.8) 212 | 213 | 214 | def randomProcAppTest(): 215 | proc = (lambda x: flip(0.2)) if flip(0.7) else (lambda x: flip(0.8)) 216 | return proc(1) 217 | mhtest("random procedure application, unconditioned", \ 218 | randomProcAppTest, \ 219 | 0.7*0.2 + 0.3*0.8) 220 | 221 | 222 | def conditionedMultinomialTest(): 223 | hyp = multinomialDraw(['b', 'c', 'd'], [0.1, 0.6, 0.3]) 224 | def observe(x): 225 | if flip(0.8): 226 | return x 227 | else: 228 | return 'b' 229 | condition(observe(hyp) == 'b') 230 | return hyp == 'b' 231 | mhtest("conditioned multinomial", \ 232 | conditionedMultinomialTest, \ 233 | 0.357) 234 | 235 | 236 | def recursiveStochasticTailTest(): 237 | def powerLaw(prob, x): 238 | if flip(prob, isStructural=True): 239 | return x 240 | else: 241 | return powerLaw(prob, x+1) 242 | a = powerLaw(0.3, 1) 243 | return a < 5 244 | mhtest("recursive stochastic fn, unconditioned (tail recursive)", \ 245 | recursiveStochasticTailTest, \ 246 | 0.7599) 247 | 248 | def recursiveStochasticTest(): 249 | def powerLaw(prob, x): 250 | if flip(prob, isStructural=True): 251 | return x 252 | else: 253 | return 0 + powerLaw(prob, x+1) 254 | a = powerLaw(0.3, 1) 255 | return a < 5 256 | mhtest("recursive stochastic fn, unconditioned", \ 257 | recursiveStochasticTest, \ 258 | 0.7599) 259 | 260 | 261 | def memoizedFlipTest(): 262 | proc = mem(lambda x: flip(0.8)) 263 | return all([proc(1), proc(2), proc(1), proc(2)]) 264 | mhtest("memoized flip, unconditioned", \ 265 | memoizedFlipTest, \ 266 | 0.64) 267 | 268 | 269 | def memoizedFlipConditionedTest(): 270 | proc = mem(lambda x: flip(0.2)) 271 | condition(any([proc(1), proc(2), proc(2), proc(2)])) 272 | return proc(1) 273 | mhtest("memoized flip, conditioned", \ 274 | memoizedFlipConditionedTest, \ 275 | 0.5555555555555555) 276 | 277 | 278 | def boundSymbolInMemoizerTest(): 279 | a = flip(0.8) 280 | proc = mem(lambda x: a) 281 | return all([proc(1), proc(1)]) 282 | mhtest("bound symbol used inside memoizer, unconditioned", \ 283 | boundSymbolInMemoizerTest, \ 284 | 0.8) 285 | 286 | 287 | def memRandomArgTest(): 288 | proc = mem(lambda x: flip(0.8)) 289 | return all([proc(uniformDraw([1,2,3], isStructural=True)), proc(uniformDraw([1,2,3], isStructural=True))]) 290 | mhtest("memoized flip with random argument, unconditioned", \ 291 | memRandomArgTest, \ 292 | 0.6933333333333334) 293 | 294 | 295 | def memRandomProc(): 296 | proc = (lambda x: flip(0.2)) if flip(0.7) else (lambda x: flip(0.8)) 297 | memproc = mem(proc) 298 | return all([memproc(1), memproc(2)]) 299 | mhtest("memoized random procedure, unconditioned", \ 300 | memRandomProc, \ 301 | 0.22) 302 | 303 | 304 | def mhOverRejectionTest(): 305 | def bitFlip(fidelity, x): 306 | return flip(fidelity if x else (1-fidelity)) 307 | def innerQuery(): 308 | a = flip(0.7) 309 | condition(bitFlip(0.8, a)) 310 | return a 311 | return rejectionSample(innerQuery) 312 | mhtest("mh-query over rejection query for conditioned flip", \ 313 | mhOverRejectionTest, \ 314 | 0.903225806451613) 315 | 316 | 317 | def transDimensionalTest(): 318 | a = beta(1, 5) if flip(0.9, isStructural=True) else 0.7 319 | b = flip(a) 320 | condition(b) 321 | return a 322 | mhtest("trans-dimensional", \ 323 | transDimensionalTest, \ 324 | 0.417) 325 | 326 | 327 | def transDimensionalLARJTest(): 328 | a = beta(1, 5) if flip(0.9, isStructural=True) else 0.7 329 | b = flip(a) 330 | condition(b) 331 | return a 332 | larjtest("trans-dimensional (LARJ)", \ 333 | transDimensionalLARJTest, \ 334 | 0.417) 335 | 336 | 337 | def memFlipInIfTest(): 338 | a = mem(flip) if flip() else mem(flip) 339 | b = a() 340 | return b 341 | mhtest("memoized flip in if branch (create/destroy memprocs), unconditioned", \ 342 | memFlipInIfTest, \ 343 | 0.5) 344 | 345 | 346 | """ 347 | Tests for things specific to new implementation 348 | """ 349 | 350 | 351 | def nativeLoopTest(): 352 | accum = 0 353 | for i in xrange(4): 354 | accum += flip() 355 | return accum / 4.0 356 | mhtest("native for loop", \ 357 | nativeLoopTest, \ 358 | 0.5) 359 | 360 | 361 | def directConditionTest(): 362 | accum = [0] 363 | def block(i): 364 | if i < 5: 365 | accum[0] += flip(0.5, conditionedValue=True) 366 | else: 367 | accum[0] += flip(0.5) 368 | ntimes(10, block) 369 | return accum[0] / 10.0 370 | mhtest("directly conditioning variable values", \ 371 | directConditionTest, \ 372 | 0.75) 373 | 374 | 375 | print "tests done!" 376 | 377 | d2 = datetime.now() 378 | print "time: {0}".format((d2 - d1).total_seconds()) 379 | 380 | -------------------------------------------------------------------------------- /probabilistic/trace.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import copy 3 | from collections import Counter 4 | 5 | class RandomVariableRecord: 6 | """ 7 | Variables generated by ERPs. 8 | These form the 'choice points' in a probabilistic program trace. 9 | """ 10 | 11 | def __init__(self, name, erp, params, val, logprob, structural, conditioned=False): 12 | self.name = name 13 | self.erp = erp 14 | self.params = params 15 | self.val = val 16 | self.logprob = logprob 17 | self.active = True 18 | self.conditioned = conditioned 19 | self.structural = structural 20 | 21 | class RandomExecutionTrace: 22 | """ 23 | Execution trace generated by a probabilistic program. 24 | Tracks the random choices made and accumulates probabilities 25 | """ 26 | 27 | def __init__(self, computation, doRejectionInit=True): 28 | self.computation = computation 29 | self._vars = {} 30 | self.varlist = [] 31 | self.currVarIndex = 0 32 | self.logprob = 0 33 | self.newlogprob = 0 # From newly-added variables 34 | self.oldlogprob = 0 # From unreachable variables 35 | self.rootframe = None 36 | self.loopcounters = Counter() 37 | self.conditionsSatisfied = False 38 | self.returnValue = None 39 | if doRejectionInit: 40 | while not self.conditionsSatisfied: 41 | self._vars.clear() 42 | self.traceUpdate() 43 | 44 | def __deepcopy__(self, memo): 45 | newdb = RandomExecutionTrace(self.computation, doRejectionInit=False) 46 | newdb.logprob = self.logprob 47 | newdb.oldlogprob = self.oldlogprob 48 | newdb.newlogprob = self.newlogprob 49 | newdb.varlist = [copy.copy(record) for record in self.varlist] 50 | newdb._vars = {record.name:record for record in newdb.varlist} 51 | newdb.conditionsSatisfied = self.conditionsSatisfied 52 | newdb.returnValue = self.returnValue 53 | return newdb 54 | 55 | def freeVarNames(self, structural=True, nonstructural=True): 56 | return map(lambda tup: tup[0], \ 57 | filter(lambda tup: not tup[1].conditioned and \ 58 | ((structural and tup[1].structural) or (nonstructural and not tup[1].structural)), \ 59 | self._vars.iteritems())) 60 | 61 | def varDiff(self, other): 62 | """ 63 | The names of the variables that this trace has that the other trace does not have 64 | """ 65 | return list(set(self._vars.keys()) - set(other._vars.keys())) 66 | 67 | def lpDiff(self, other): 68 | """ 69 | The difference in log probability between this trace and the other resulting 70 | from the variables that this has that the other does not 71 | """ 72 | return sum(map(lambda name: self._vars[name].logprob, self.varDiff(other))) 73 | 74 | def traceUpdate(self, structureIsFixed=False): 75 | """ 76 | Run computation and update this trace accordingly 77 | """ 78 | 79 | global _trace 80 | originalTrace = _trace 81 | _trace = self 82 | 83 | self.logprob = 0.0 84 | self.newlogprob = 0.0 85 | self.loopcounters.clear() 86 | self.conditionsSatisfied = True 87 | self.currVarIndex = 0 88 | 89 | # If updating this trace can change the variable structure, then we 90 | # clear out the flat list of variables beforehand 91 | if not structureIsFixed: 92 | self.varlist = [] 93 | 94 | # First, mark all random values as 'inactive'; only 95 | # those reeached by the computation will become 'active' 96 | for record in self._vars.values(): 97 | record.active = False 98 | 99 | # Mark that this is the 'root' of the current execution trace 100 | self.rootframe = sys._getframe() 101 | 102 | # Run the computation, which will create/lookup random variables 103 | self.returnValue = self.computation() 104 | 105 | # Clear out the root frame, etc. 106 | self.rootframe = None 107 | self.loopcounters.clear() 108 | 109 | # Clean up any random values that are no longer reachable 110 | self.oldlogprob = 0.0 111 | for record in self._vars.values(): 112 | if not record.active: 113 | self.oldlogprob += record.logprob 114 | self._vars = {name:record for name,record in self._vars.iteritems() if record.active} 115 | 116 | _trace = originalTrace 117 | 118 | def proposeChange(self, varname): 119 | """ 120 | Propose a random change to the variable name 'varname' 121 | Returns a new sample trace from the computation and the 122 | forward and reverse probabilities of proposing this change 123 | """ 124 | nextTrace = copy.deepcopy(self) 125 | var = nextTrace.getRecord(varname) 126 | propval = var.erp._proposal(var.val, var.params) 127 | fwdPropLP = var.erp._logProposalProb(var.val, propval, var.params) 128 | rvsPropLP = var.erp._logProposalProb(propval, var.val, var.params) 129 | var.val = propval 130 | var.logprob = var.erp._logprob(var.val, var.params) 131 | nextTrace.traceUpdate(not var.structural) 132 | fwdPropLP += nextTrace.newlogprob 133 | rvsPropLP += nextTrace.oldlogprob 134 | return nextTrace, fwdPropLP, rvsPropLP 135 | 136 | def currentName(self, numFrameSkip): 137 | """ 138 | Return the current name, as determined by the interpreter 139 | stack of the current program. 140 | Skips the top 'numFrameSkip' stack frames that precede this 141 | function's stack frame (numFrameSkip+1 frames total) 142 | """ 143 | 144 | # Get list of frames from the root to the current frame 145 | f = sys._getframe(numFrameSkip+1) 146 | flst = [] 147 | while f and f is not self.rootframe: 148 | flst.insert(0, f) 149 | f = f.f_back 150 | 151 | # Build up name string, checking loop counters along the way 152 | name = "" 153 | for i in xrange(len(flst)-1): 154 | f = flst[i] 155 | name += "{0}:{1}".format(id(f.f_code), f.f_lasti) 156 | loopnum = self.loopcounters[name] 157 | name += ":{0}|".format(loopnum) 158 | # For the last (topmost) frame, also increment the loop counter 159 | f = flst[-1] 160 | name += "{0}:{1}".format(id(f.f_code), f.f_lasti) 161 | loopnum = self.loopcounters[name] 162 | self.loopcounters[name] += 1 163 | name += ":{0}|".format(loopnum) 164 | 165 | return name 166 | 167 | def lookup(self, erp, params, numFrameSkip, isStructural, conditionedValue=None): 168 | """ 169 | Looks up the value of a random variable. 170 | If this random variable does not exist, create it 171 | """ 172 | 173 | record = None 174 | name = None 175 | # Try to find the variable (first check the flat list, then do 176 | # slower structural name lookup) 177 | varIsInFlatList = self.currVarIndex < len(self.varlist) 178 | if varIsInFlatList: 179 | record = self.varlist[self.currVarIndex] 180 | else: 181 | name = self.currentName(numFrameSkip+1) 182 | record = self._vars.get(name) 183 | if (not record or record.erp is not erp or isStructural != record.structural): 184 | record = None 185 | # If we didn't find the variable, create a new one 186 | if not record: 187 | val = (conditionedValue if conditionedValue else erp._sample_impl(params)) 188 | ll = erp._logprob(val, params) 189 | self.newlogprob += ll 190 | record = RandomVariableRecord(name, erp, params, val, ll, isStructural, conditionedValue != None) 191 | self._vars[name] = record 192 | # Otherwise, reuse the variable we found, but check if its parameters/conditioning 193 | # status have changed 194 | else: 195 | record.conditioned = (conditionedValue != None) 196 | hasChanges = False 197 | if record.params != params: 198 | record.params = params 199 | hasChanges = True 200 | if conditionedValue and conditionedValue != record.val: 201 | record.val = conditionedValue 202 | record.conditioned = True 203 | hasChanges = True 204 | if hasChanges: 205 | record.logprob = erp._logprob(record.val, record.params) 206 | 207 | # Finish up and return 208 | if not varIsInFlatList: 209 | self.varlist.append(record) 210 | self.currVarIndex += 1 211 | self.logprob += record.logprob 212 | record.active = True 213 | return record.val 214 | 215 | def getRecord(self, name): 216 | """ 217 | Simply retrieve the variable record associated with name 218 | """ 219 | return self._vars.get(name) 220 | 221 | def addFactor(self, num): 222 | """ 223 | Add a new factor into the log likelihood of the current trace 224 | """ 225 | self.logprob += num 226 | 227 | def conditionOn(self, boolexpr): 228 | """ 229 | Condition the trace on the value of a boolean expression 230 | """ 231 | self.conditionsSatisfied = self.conditionsSatisfied and boolexpr 232 | 233 | """ 234 | Global singleton instance 235 | """ 236 | _trace = None 237 | 238 | def lookupVariableValue(erp, params, isStructural, numFrameSkip, conditionedValue=None): 239 | global _trace 240 | if not _trace: 241 | return (conditionedValue if conditionedValue else erp._sample_impl(params)) 242 | else: 243 | return _trace.lookup(erp, params, numFrameSkip+1, isStructural, conditionedValue) 244 | 245 | def newTrace(computation): 246 | return RandomExecutionTrace(computation) 247 | 248 | def factor(num): 249 | global _trace 250 | if _trace: 251 | _trace.addFactor(num) 252 | 253 | def condition(boolexpr): 254 | global _trace 255 | if _trace: 256 | _trace.conditionOn(boolexpr) -------------------------------------------------------------------------------- /sandbox.py: -------------------------------------------------------------------------------- 1 | from probabilistic import * 2 | import math 3 | from collections import Counter 4 | import cProfile 5 | import pstats 6 | 7 | ############################### 8 | 9 | def distribForward(computation, iters): 10 | hist = Counter() 11 | i = 0 12 | while i < iters: 13 | i += 1 14 | hist[computation()] += 1 15 | for x in hist: 16 | hist[x] /= float(iters) 17 | return hist 18 | 19 | def compareForwardToMHDists(computation, iters): 20 | forwardhist = distribForward(computation, iters) 21 | print "Forward hist:" 22 | print forwardhist 23 | mhhist = distrib(computation, traceMH, iters) 24 | print "MH hist:" 25 | print mhhist 26 | 27 | def meanForward(computation, iters): 28 | mean = computation() 29 | i = 0 30 | while i < iters-1: 31 | i += 1 32 | mean += computation() 33 | return mean / iters 34 | 35 | def compareForwardToMHMeans(computation, iters): 36 | forwardmean = meanForward(computation, iters) 37 | print "Forward mean: {0}".format(forwardmean) 38 | mhmean = expectation(computation, traceMH, iters) 39 | print "MH mean: {0}".format(mhmean) 40 | 41 | ############################### 42 | 43 | def ones(): 44 | if flip(0.75): 45 | return [1] + ones() 46 | else: 47 | return [] 48 | 49 | def numOnes(): 50 | return len(ones()) 51 | 52 | def constrainedOnes(): 53 | seq = ones() 54 | factor(-math.pow(abs(len(seq) - 4), 10)) 55 | return seq 56 | 57 | def sumOfTen(): 58 | num = 0 59 | num += flip(0.5, conditionedValue=True) 60 | num += flip(0.5, conditionedValue=True) 61 | # num += flip(0.5) 62 | # num += flip(0.5) 63 | num += flip(0.5) 64 | num += flip(0.5) 65 | num += flip(0.5) 66 | num += flip(0.5) 67 | num += flip(0.5) 68 | num += flip(0.5) 69 | num += flip(0.5) 70 | num += flip(0.5) 71 | return num 72 | 73 | def constrainedSumOfTen(): 74 | num = sumOfTen() 75 | condition(num >= 5) 76 | return num 77 | 78 | def sumOfTenWhile(): 79 | num = [0] 80 | i = [0] 81 | def block(): 82 | i[0] += 1 83 | num[0] += flip(0.5) 84 | until(lambda: i[0] == 10, block) 85 | return num[0] 86 | 87 | def sumOfTenFor(): 88 | num = [0] 89 | def block(i): 90 | num[0] += flip(0.5) 91 | foreach(xrange(10), block) 92 | return num[0] 93 | 94 | def sumOfTenMap(): 95 | return sum(map(lambda x: flip(0.5), range(10))) 96 | 97 | def oneGaussian(): 98 | return gaussian(10, 0.5) 99 | 100 | def oneGamma(): 101 | return gamma(9, 0.5) 102 | 103 | def oneBeta(): 104 | return beta(2, 2) 105 | 106 | def oneBinomial(): 107 | return binomial(0.5, 40) 108 | 109 | def onePoisson(): 110 | return poisson(10) 111 | 112 | def memTest(): 113 | func = mem(lambda x: flip(x)) 114 | result1 = func(0.5) 115 | result2 = func(0.5) 116 | return result1 == result2 117 | 118 | def noisyOr(a, astrength, b, bstrength, baserate): 119 | return (flip(astrength) and a) or \ 120 | (flip(bstrength) and b) or \ 121 | flip(baserate) 122 | 123 | def sprinklerTest(): 124 | 125 | weights = {"rain-str": 0.9, \ 126 | "rain-prior": 0.3, \ 127 | "sprinkler-str": 0.9, \ 128 | "sprinkler-prior": 0.2, \ 129 | "grass-baserate": 0.1} 130 | 131 | @mem 132 | def rain(day): 133 | return flip(weights["rain-prior"]) 134 | 135 | @mem 136 | def sprinkler(day): 137 | return flip(weights["sprinkler-prior"]) 138 | 139 | @mem 140 | def grassIsWet(day): 141 | return noisyOr(rain(day), weights["rain-str"], \ 142 | sprinkler(day), weights["sprinkler-str"], \ 143 | weights["grass-baserate"]) 144 | 145 | condition(grassIsWet("day2")) 146 | 147 | return rain("day2") 148 | 149 | ## ChurchServ version of the above test, for comparison: 150 | # (define (noisy-or a astrength b bstrength baserate) 151 | # (or (and (flip astrength) a) 152 | # (and (flip bstrength) b) 153 | # (flip baserate))) 154 | # (define sprinklerTest 155 | # (mh-query 100 100 156 | # (define weight (lambda (ofwhat) 157 | # (case ofwhat 158 | # (('rain-str) 0.9) 159 | # (('rain-prior) 0.3) 160 | # (('sprinkler-str) 0.9) 161 | # (('sprinkler-prior) 0.2) 162 | # (('grass-baserate) 0.1)))) 163 | # (define grass-is-wet (mem (lambda (day) 164 | # (noisy-or 165 | # (rain day) (weight 'rain-str) 166 | # (sprinkler day) (weight 'sprinkler-str) 167 | # (weight 'grass-baserate))))) 168 | # (define rain (mem (lambda (day) 169 | # (flip (weight 'rain-prior))))) 170 | # (define sprinkler (mem (lambda (day) 171 | # (flip (weight 'sprinkler-prior))))) 172 | 173 | # (rain 'day2) 174 | 175 | # (grass-is-wet 'day2) 176 | # ) 177 | # ) 178 | # (hist sprinklerTest "Rained on Day2?") 179 | 180 | # stringLengthProbs = repeat(6, lambda: 1.0) 181 | # stringLengths = range(5, 11) 182 | stringLengthProbs = [0.5, 0.5] 183 | stringLengths = [3, 4] 184 | penaltyMultiplier = 5 185 | 186 | def stringsOfLength(length, numvals): 187 | def helper(n, seqSoFar): 188 | if n == 0: 189 | yield tuple(seqSoFar) 190 | else: 191 | for i in xrange(numvals): 192 | for tup in helper(n-1, seqSoFar + [i]): 193 | yield tup 194 | for tup in helper(length, []): 195 | yield tup 196 | 197 | def constrainedStringA(): 198 | numelems = multinomialDraw(stringLengths, stringLengthProbs, isStructural=True) 199 | seq = repeat(numelems, lambda: int(flip(0.5))) 200 | if numelems % 2 == 0: 201 | factor(-penaltyMultiplier * len(filter(lambda num: num == 1, seq))) 202 | else: 203 | factor(-penaltyMultiplier * len(filter(lambda num: num == 0, seq))) 204 | return tuple(seq) 205 | 206 | def constrainedStringATrueDist(): 207 | hist = {} 208 | for numelems in stringLengths: 209 | # Probability of choosing this many elements 210 | numlp = erp.multinomial_logprob(stringLengths.index(numelems), stringLengthProbs) 211 | for seq in stringsOfLength(numelems, 2): 212 | # Prior probability of each element value 213 | lp = -numelems*math.log(2) 214 | # Penalties 215 | if numelems % 2 == 0: 216 | lp -= penaltyMultiplier * len(filter(lambda num: num == 1, seq)) 217 | else: 218 | lp -= penaltyMultiplier * len(filter(lambda num: num == 0, seq)) 219 | hist[seq] = numlp + lp 220 | # Normalize by partition function 221 | logz = math.log(sum(map(lambda lp: math.exp(lp), hist.values()))) 222 | for seq in hist: 223 | hist[seq] = math.exp(hist[seq] - logz) 224 | return hist 225 | 226 | 227 | def constrainedStringB(): 228 | onethird = 1.0/3 229 | numelems = multinomialDraw(stringLengths, stringLengthProbs, isStructural=True) 230 | seq = repeat(numelems, lambda: multinomial([onethird, onethird, onethird])) 231 | numIdenticalConsec = 0 232 | for i in xrange(numelems-1): 233 | numIdenticalConsec += (seq[i] == seq[i+1]) 234 | factor(-penaltyMultiplier * numIdenticalConsec) 235 | numDifferentOpposing = 0 236 | for i in xrange(numelems/2): 237 | numDifferentOpposing += (seq[i] != seq[numelems-1-i]) 238 | factor(-penaltyMultiplier * numDifferentOpposing) 239 | return tuple(seq) 240 | 241 | def constrainedStringBTrueDist(): 242 | hist = {} 243 | for numelems in stringLengths: 244 | # Probability of choosing this many elements 245 | numlp = erp.multinomial_logprob(stringLengths.index(numelems), stringLengthProbs) 246 | for seq in stringsOfLength(numelems, 3): 247 | # Prior probability of each element value 248 | lp = -numelems*math.log(3) 249 | # Identical consecutive element penalty 250 | numIdenticalConsec = 0 251 | for i in xrange(numelems-1): 252 | numIdenticalConsec += (seq[i] == seq[i+1]) 253 | lp -= penaltyMultiplier * numIdenticalConsec 254 | # Different opposing element penalty 255 | numDifferentOpposing = 0 256 | for i in xrange(numelems/2): 257 | numDifferentOpposing += (seq[i] != seq[numelems-1-i]) 258 | lp -= penaltyMultiplier * numDifferentOpposing 259 | hist[seq] = numlp + lp 260 | # Normalize by partition function 261 | logz = math.log(sum(map(lambda lp: math.exp(lp), hist.values()))) 262 | for seq in hist: 263 | hist[seq] = math.exp(hist[seq] - logz) 264 | return hist 265 | 266 | def klDivergence(P, Q): 267 | kldiv = 0.0 268 | for x in P: 269 | p = P[x] 270 | q = Q[x] 271 | if p != 0.0: 272 | logq = math.log(q) if q != 0.0 else -float('inf') 273 | kldiv += (math.log(p) - logq) * p 274 | return kldiv 275 | 276 | def totalVariationDist(P, Q): 277 | total = 0.0 278 | for x in P: 279 | total += abs(P[x] - Q[x]) 280 | return 0.5*total 281 | 282 | 283 | ############################### 284 | 285 | if __name__ == "__main__": 286 | 287 | # compareForwardToMHDists(numOnes, 1000) 288 | # compareForwardToMHDists(sumOfTen, 1000) 289 | # compareForwardToMHDists(sumOfTenWhile, 1000) 290 | # compareForwardToMHDists(sumOfTenFor, 1000) 291 | # compareForwardToMHDists(sumOfTenMap, 1000) 292 | # compareForwardToMHMeans(oneGaussian, 10000) 293 | # compareForwardToMHMeans(oneGamma, 10000) 294 | # compareForwardToMHMeans(oneBeta, 10000) 295 | # compareForwardToMHMeans(oneBinomial, 10000) 296 | # compareForwardToMHMeans(onePoisson, 10000) 297 | 298 | # print memTest() 299 | 300 | # print distrib(sprinklerTest, traceMH, 10000) 301 | 302 | # print MAP(oneGaussian, traceMH, 10000) 303 | 304 | # print distrib(constrainedStringA, traceMH, 10000) 305 | # print "-------------------------------------------" 306 | # print constrainedStringATrueDist() 307 | # print totalVariationDist(constrainedStringATrueDist(), distrib(constrainedStringA, traceMH, 1000, 1, True)) 308 | # print totalVariationDist(constrainedStringATrueDist(), distrib(constrainedStringA, LARJMH, 1000, 20, None, 1, True)) 309 | # print totalVariationDist(constrainedStringBTrueDist(), distrib(constrainedStringB, traceMH, 1000, 1, True)) 310 | # print totalVariationDist(constrainedStringBTrueDist(), distrib(constrainedStringB, LARJMH, 1000, 10, None, 1, True)) 311 | cProfile.run('distrib(constrainedStringA, LARJMH, 1000, 20)', 'prof') 312 | p = pstats.Stats('prof') 313 | p.strip_dirs().sort_stats('cumulative').print_stats(10) 314 | p.strip_dirs().sort_stats('time').print_stats(10) 315 | --------------------------------------------------------------------------------