├── .gitignore ├── requirements.txt ├── montetheano ├── .gitignore ├── __init__.py ├── README.txt ├── examples │ ├── gaussian_mixture_model.py │ ├── bayes_occams_razor.py │ ├── max_likelihood_logistic_regression.py │ ├── bayesian_linear_regression.py │ ├── bayesian_logistic_regression.py │ ├── hierarchical_dirichlet.py │ └── latent_dirichlet_allocation.py ├── max_lik.py ├── utils.py ├── test_for_theano.py ├── test_rv.py ├── rv.py ├── rstreams.py ├── sample.py ├── for_theano.py ├── test_distributions.py └── distributions.py ├── setup.py └── README.rst /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | theano 2 | -------------------------------------------------------------------------------- /montetheano/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | -------------------------------------------------------------------------------- /montetheano/__init__.py: -------------------------------------------------------------------------------- 1 | from rstreams import RandomStreams 2 | import distributions # populates registry 3 | from rv import energy 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from distutils.core import setup 4 | 5 | setup(name='montetheano', 6 | packages=['montetheano'], 7 | ) 8 | -------------------------------------------------------------------------------- /montetheano/README.txt: -------------------------------------------------------------------------------- 1 | Files in here: 2 | 3 | __init__.py - empty 4 | for_theano.py - things that could maybe migrate upstream. 5 | sample.py - algorithms for drawing samples by MCMC 6 | 7 | rstreams.py - RandomStreams and associated registries 8 | distributions.py - distribution-specific code (normal, bernoulli, etc.) 9 | rv.py - functions for working with random variables. 10 | 11 | -------------------------------------------------------------------------------- /montetheano/examples/gaussian_mixture_model.py: -------------------------------------------------------------------------------- 1 | import numpy, pylab 2 | import theano 3 | from theano import tensor 4 | from rstreams import RandomStreams 5 | import distributions 6 | from sample import mh2_sample 7 | from rv import full_log_likelihood 8 | 9 | s_rng = RandomStreams(3424) 10 | 11 | p = s_rng.dirichlet(numpy.asarray([1, 1]))[0] 12 | m1 = s_rng.uniform(low=-5, high=5) 13 | m2 = s_rng.uniform(low=-5, high=5) 14 | v = s_rng.uniform(low=0, high=1) 15 | 16 | C = s_rng.binomial(1, p, draw_shape=(4,)) 17 | m = tensor.switch(C, m1, m2) 18 | D = s_rng.normal(m, v, draw_shape=(4,)) 19 | 20 | D_data = numpy.asarray([1, 1.2, 3, 3.4], dtype=theano.config.floatX) 21 | 22 | givens = dict([(D, D_data)]) 23 | sampler = mh2_sample(s_rng, [p, m1, m2, v], givens) 24 | 25 | samples = sampler(200, 1000, 100) 26 | print samples[0].mean(), samples[1].mean(), samples[2].mean(), samples[3].mean() 27 | -------------------------------------------------------------------------------- /montetheano/examples/bayes_occams_razor.py: -------------------------------------------------------------------------------- 1 | import numpy, pylab 2 | import theano 3 | from theano import tensor 4 | from rstreams import RandomStreams 5 | import distributions 6 | from sample import mh2_sample 7 | from for_theano import evaluate 8 | from rv import full_log_likelihood 9 | 10 | s_rng = RandomStreams(23424) 11 | 12 | fair_prior = 0.999 13 | 14 | coin_weight = tensor.switch(s_rng.binomial(1, fair_prior) > 0.5, 0.5, s_rng.dirichlet([1, 1])[0]) 15 | 16 | make_coin = lambda p, size: s_rng.binomial(1, p, draw_shape=(size,)) 17 | coin = lambda size: make_coin(coin_weight, size) 18 | 19 | for size in [1, 3, 6, 10, 20, 30, 50, 70, 100]: 20 | data = evaluate(make_coin(0.9, size)) 21 | 22 | sampler = mh2_sample(s_rng, [coin_weight], {coin(size) : data}) 23 | 24 | print "nr of examples", size, ", estimated probability", sampler(nr_samples=400, burnin=20000, lag=10)[0].mean() 25 | -------------------------------------------------------------------------------- /montetheano/max_lik.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import theano 3 | from theano import tensor 4 | from for_theano import ancestors, infer_shape, evaluate_with_assignments, evaluate 5 | from rv import is_raw_rv, full_log_likelihood, lpdf 6 | 7 | def likelihood_gradient(observations = {}, learning_rate = 0.1): 8 | all_vars = ancestors(list(observations.keys())) 9 | 10 | for o in observations: 11 | assert o in all_vars 12 | if not is_raw_rv(o): 13 | raise TypeError(o) 14 | 15 | RVs = [v for v in all_vars if is_raw_rv(v)] 16 | free_RVs = [v for v in RVs if v not in observations] 17 | 18 | # Instantiate actual values for the different random variables: 19 | params = dict() 20 | for v in free_RVs: 21 | f = theano.function([], v, mode=theano.Mode(linker='py', optimizer=None)) 22 | params[v] = theano.shared(f()) 23 | 24 | # Compute the full log likelihood: 25 | full_observations = dict(observations) 26 | full_observations.update(params) 27 | log_likelihood = full_log_likelihood(full_observations) 28 | 29 | # Construct the update equations for learning: 30 | updates = dict() 31 | for frvs in params.values(): 32 | updates[frvs] = frvs + learning_rate * tensor.grad(log_likelihood, frvs) 33 | 34 | return params, updates, log_likelihood 35 | 36 | 37 | -------------------------------------------------------------------------------- /montetheano/examples/max_likelihood_logistic_regression.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import theano 3 | from theano import tensor 4 | from rstreams import RandomStreams 5 | import distributions 6 | from sample import hybridmc_sample 7 | from rv import full_log_likelihood 8 | 9 | from max_lik import likelihood_gradient 10 | 11 | s_rng = RandomStreams(3424) 12 | 13 | # Weight prior: 14 | w = s_rng.normal(0, 2, draw_shape=(3,)) 15 | 16 | # Linear model: 17 | x = tensor.matrix('x') 18 | y = tensor.nnet.sigmoid(tensor.dot(x, w)) 19 | 20 | # Bernouilli observation model: 21 | t = s_rng.binomial(p=y, draw_shape=(4,)) 22 | 23 | # Some data: 24 | X_data = numpy.asarray([[-1.5, -0.4, 1.3, 2.2], [-1.1, -2.2, 1.3, 0], [1., 1., 1., 1.]], dtype=theano.config.floatX).T 25 | Y_data = numpy.asarray([1., 1., 0., 0.], dtype=theano.config.floatX) 26 | 27 | # Compute gradient updates: 28 | observations = dict([(t, Y_data)]) 29 | params, updates, log_likelihood = likelihood_gradient(observations) 30 | 31 | # Compile training function and assign input data as givens: 32 | givens = dict([(x, X_data)]) 33 | train = theano.function([], [log_likelihood], givens=givens, updates=updates) 34 | 35 | # Run 100 epochs of training: 36 | for i in range(100): 37 | print "epoch", i, ", log likelihood:", train()[0] 38 | 39 | 40 | # Generate testing function: 41 | givens = dict([(x, X_data)]) 42 | givens.update(params) 43 | test = theano.function([], [y], givens=givens) 44 | 45 | print test(), Y_data -------------------------------------------------------------------------------- /montetheano/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Misc utils 3 | """ 4 | import __builtin__ 5 | 6 | class ClobberContext(object): 7 | """ 8 | Makes an object useable with 'with' statements. 9 | 10 | with obj as _: 11 | ... # obj.method is accessible as method() 12 | 13 | Danger - the illusion is not perfect! It works by inserting things into 14 | __builtin__ namespace, so if there are local variables in enclosing scopes, 15 | they will actually trump the object's own methods. 16 | """ 17 | def __enter__(self): 18 | assert not hasattr(self, '_clobbered_symbols') 19 | self._clobbered_symbols = {} 20 | for name in self.clobber_symbols: 21 | if hasattr(__builtin__, name): 22 | self._clobbered_symbols[name] = getattr(__builtin__, name) 23 | if hasattr(self, name): 24 | setattr(__builtin__, name, getattr(self, name)) 25 | return self 26 | 27 | def __exit__(self, e_type, e_val, e_traceback): 28 | for name in self.clobber_symbols: 29 | if name in self._clobbered_symbols: 30 | setattr(__builtin__, name, self._clobbered_symbols[name]) 31 | elif hasattr(__builtin__, name): 32 | delattr(__builtin__, name) 33 | del self._clobbered_symbols 34 | 35 | 36 | class Updates(dict): 37 | """ 38 | Updates is a dictionary for which the '+' operator does an update. 39 | 40 | Not a normal update though, because a KeyError is raised if a symbol is 41 | present in both dictionaries. 42 | """ 43 | def __add__(self, other): 44 | rval = Updates(self) 45 | rval += other # see: __iadd__ 46 | return rval 47 | def __iadd__(self, other): 48 | d = dict(other) 49 | for k,v in d.items(): 50 | if k in self and v != self[k]: 51 | raise KeyError() 52 | self[k] = v 53 | return self 54 | 55 | -------------------------------------------------------------------------------- /montetheano/examples/bayesian_linear_regression.py: -------------------------------------------------------------------------------- 1 | import numpy, pylab 2 | import theano 3 | from theano import tensor 4 | from rstreams import RandomStreams 5 | import distributions 6 | from sample import mh2_sample 7 | from rv import full_log_likelihood 8 | from for_theano import evaluate 9 | 10 | s_rng = RandomStreams(3424) 11 | 12 | def poly_expansion(x, order): 13 | x = x.T 14 | result, updates = theano.scan(fn=lambda prior_result, x: prior_result * x, 15 | outputs_info=tensor.ones_like(x), 16 | non_sequences=x, 17 | n_steps=order) 18 | 19 | return tensor.concatenate([tensor.ones([x.shape[1],1]), tensor.reshape(result.T, (x.shape[1], x.shape[0]*order))], axis=1) 20 | 21 | # Define priors to be inverse gamma distributions 22 | alpha = 1/s_rng.gamma(1., 2.) 23 | beta = 1/s_rng.gamma(1., .1) 24 | 25 | # Order of the model 26 | # TODO: this currently has to be fixed, would be nice if this could also be a RV! 27 | m = 7 #s_rng.random_integers(1, 10) 28 | w = s_rng.normal(0, beta, draw_shape=(m+1,)) 29 | 30 | # Input variable used for training 31 | x = tensor.matrix('x') 32 | # Input variable used for testing 33 | xn = tensor.matrix('xn') 34 | 35 | # Actual linear model 36 | y = lambda x_in: tensor.dot(poly_expansion(x_in, m), w) 37 | 38 | # Observation model 39 | t = s_rng.normal(y(x), alpha, draw_shape=(10,)) 40 | 41 | # Generate some noisy training data (sine + noise) 42 | X_data = numpy.arange(-1,1,0.3) 43 | Y_data = numpy.sin(numpy.pi*X_data) + 0.1*numpy.random.randn(*X_data.shape) 44 | X_data.shape = (X_data.shape[0],1) 45 | 46 | X_new = numpy.arange(-1,1,0.05) 47 | X_new.shape = (X_new.shape[0],1) 48 | 49 | pylab.plot(X_data, Y_data, 'x', markersize=10) 50 | 51 | # Generate samples from the model 52 | sampler = mh2_sample(s_rng, [y(xn)], observations={t: Y_data}, givens={x: X_data, xn: X_new}) 53 | samples = sampler(50, 1000, 200) 54 | pylab.errorbar(X_new, numpy.mean(samples[0].T, axis=1), numpy.std(samples[0].T, axis=1)) 55 | pylab.show() 56 | pylab.plot(X_new, samples[0].T) 57 | pylab.show() 58 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Monte Theano 3 | ============ 4 | 5 | **This is an inactive project - for a Theano-based inference engine for graphical models, check out the 6 | [PyMC3](https://github.com/pymc-devs/pymc/tree/pymc3) branch of [PyMC](http://pymc-devs.github.io/pymc/)** 7 | 8 | Monte Carlo inference algorithms for stochastic Theano programs. 9 | 10 | 1. Directed models: Use Theano (with RandomStreams) to build a directed graphical model, then 11 | 12 | - Estimate likelihood of a full assignment. 13 | 14 | - Condition on observations, draw samples from posterior over latent internal variables. 15 | 16 | - Estimate marginal likelihood analytically or by MCMC. 17 | 18 | - Learning by inferring MAP or ML estimates of latent variables. 19 | 20 | 2. Undirected models: still thinking about if/how to do this. And what about 21 | factor graphs? 22 | 23 | 24 | 25 | How does it work 26 | ---------------- 27 | 28 | Not totally clear yet! Ingredients will be: 29 | 30 | - symbolic representations of likelihood functions 31 | 32 | - automatically factorizing directed models 33 | 34 | - generic Metropolis Hastings samplers 35 | 36 | - Hamiltonian Monte Carlo 37 | 38 | - Importance sampling? 39 | 40 | - Rejection sampling? 41 | 42 | - slice sampling? 43 | 44 | - Tempered sampling? 45 | 46 | It seems like it should be possible to automatically recognize opportunities for 47 | blocked Gibbs sampling, in which for example we recognize blocks of continuous 48 | variables for an HMC sampler. Not sure if this is a useful thing to do. 49 | 50 | 51 | Similar Packages 52 | ---------------- 53 | 54 | - MIT-Church (probabilistic scheme) 55 | 56 | - IBAL (probabilistic OCAML) 57 | 58 | - PyMC (MCMC inference in Python) 59 | 60 | - Infer.net (Csoft) 61 | 62 | - Factorie 63 | 64 | - PMTK 65 | 66 | - Dyna 67 | 68 | This package differs from the ones above in building on top of Theano, which already has a) a 69 | natural graph data structure for expressing directed graphical models, b) a 70 | performance-oriented backend with GPU support, and c) automatic symbolic differentiation which 71 | makes HMC and optimization routines much easier to implement. 72 | -------------------------------------------------------------------------------- /montetheano/test_for_theano.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import theano 3 | from theano import tensor 4 | from for_theano import infer_shape 5 | import rstreams 6 | import distributions 7 | import for_theano 8 | 9 | def test_infer_shape_const(): 10 | shp = infer_shape(tensor.alloc(0, 5, 6, 7)) 11 | print shp 12 | assert shp == (5, 6, 7) 13 | 14 | def test_infer_shape_shared_var(): 15 | sv = theano.shared(numpy.asarray([2,3,5])) 16 | assert infer_shape(sv) == (3,) 17 | assert infer_shape(sv * 2 + 75) == (3,) 18 | 19 | def test_shape_infer_shape(): 20 | sv = theano.shared(numpy.asarray([2,3,5])) 21 | assert infer_shape(sv.shape) == (1,) 22 | 23 | def test_shape_rv(): 24 | R = tensor.shared_randomstreams.RandomStreams(234) 25 | n = R.normal(avg=0, std=1.0) 26 | assert infer_shape(n) == () 27 | 28 | def test_shape_scalar_rv_w_size(): 29 | R = tensor.shared_randomstreams.RandomStreams(234) 30 | n = R.normal(avg=0, std=1.0, size=(40,20)) 31 | assert infer_shape(n) == (40, 20) 32 | 33 | def test_shape_scalar_rv_w_size_rstreams(): 34 | R = rstreams.RandomStreams(234) 35 | n = R.normal(mu=0, sigma=1.0, draw_shape=(40,20)) 36 | 37 | assert infer_shape(n) == (40, 20) 38 | 39 | def test_shape_vector_rv_rstreams(): 40 | R = rstreams.RandomStreams(234) 41 | n = R.normal(mu=numpy.zeros(10,), sigma=numpy.ones(10,), draw_shape=(10,)) 42 | assert infer_shape(n) == (10,) 43 | 44 | def test_shape_vector_rv_dirichlet_rstreams(): 45 | R = rstreams.RandomStreams(234) 46 | n = R.dirichlet(alpha=numpy.ones(10,), draw_shape=(10,)) 47 | assert infer_shape(n) == (10,), infer_shape(n) 48 | 49 | def test_find(): 50 | 51 | query = tensor.ivector() 52 | keepset = tensor.ivector() 53 | r = for_theano.find(query, keepset) 54 | 55 | assert r.ndim == 1 56 | assert 'int' in r.dtype 57 | 58 | f = theano.function([query, keepset], r) 59 | 60 | assert numpy.all(f([], []) == []) 61 | assert numpy.all(f([2, 1, 0, 4, 3], [5, 5, 5, 5]) == []) 62 | assert numpy.all(f([2, 1, 0, 4, 3], [4]) == [3]) 63 | assert numpy.all(f([2, 1, 0, 4, 3], [4, 1]) == [1, 3]) 64 | assert numpy.all(f([2, 1, 0, 4, 3], [1, 4]) == [1, 3]) 65 | assert numpy.all(f([], [1, 4]) == []) 66 | -------------------------------------------------------------------------------- /montetheano/examples/bayesian_logistic_regression.py: -------------------------------------------------------------------------------- 1 | import numpy, pylab 2 | import theano 3 | from theano import tensor 4 | from rstreams import RandomStreams 5 | import distributions 6 | from sample import hybridmc_sample 7 | from rv import full_log_likelihood 8 | 9 | s_rng = RandomStreams(3424) 10 | 11 | # Define model 12 | w = s_rng.normal(0, 4, draw_shape=(2,)) 13 | 14 | x = tensor.matrix('x') 15 | y = tensor.nnet.sigmoid(tensor.dot(x, w)) 16 | 17 | t = s_rng.binomial(p=y, draw_shape=(4,)) 18 | 19 | # Define data 20 | X_data = numpy.asarray([[-1.5, -0.4, 1.3, 2.2],[-1.1, -2.2, 1.3, 0]], dtype=theano.config.floatX).T 21 | Y_data = numpy.asarray([1., 1., 0., 0.], dtype=theano.config.floatX) 22 | 23 | # Plot full likelihood function 24 | RVs = dict([(t, Y_data)]) 25 | lik = full_log_likelihood(RVs) 26 | 27 | givens = dict([(x, X_data)]) 28 | lik_func = theano.function([w], lik, givens=givens, allow_input_downcast=True) 29 | 30 | delta = .1 31 | x_range = numpy.arange(-10.0, 10.0, delta) 32 | y_range = numpy.arange(-10.0, 10.0, delta) 33 | X, Y = numpy.meshgrid(x_range, y_range) 34 | 35 | response = [] 36 | for xl, yl in zip(X.flatten(), Y.flatten()): 37 | response.append(lik_func([xl, yl])) 38 | 39 | pylab.figure(1) 40 | pylab.contour(X, Y, numpy.exp(numpy.asarray(response)).reshape(X.shape), 20) 41 | pylab.draw() 42 | 43 | # Generate samples from the model 44 | sample, ll, updates = hybridmc_sample(s_rng, [w], observations={t: Y_data}) 45 | 46 | sampler = theano.function([], sample + [ll] , updates=updates, givens={x: X_data}, allow_input_downcast=True) 47 | out = theano.function([w, x], y, allow_input_downcast=True) 48 | 49 | delta = 0.1 50 | x_range = numpy.arange(-3, 3, delta) 51 | y_range = numpy.arange(-3, 3, delta) 52 | X, Y = numpy.meshgrid(x_range, y_range) 53 | 54 | b = numpy.zeros(X.shape) 55 | for i in range(1000): 56 | w, ll = sampler() 57 | 58 | if i % 50 == 0: 59 | pylab.figure(1) 60 | pylab.plot(w[0], w[1], 'x') 61 | pylab.draw() 62 | 63 | response = out(w, numpy.vstack((X.flatten(), Y.flatten())).T) 64 | response = response.reshape(X.shape) 65 | b += response 66 | 67 | pylab.figure(2) 68 | pylab.contour(X, Y, response) 69 | pylab.plot(X_data[:2,1], X_data[:2,0], 'kx') 70 | pylab.plot(X_data[2:,1], X_data[2:,0], 'bo') 71 | pylab.draw() 72 | pylab.clf() 73 | 74 | # Plot averaged model 75 | pylab.figure(1) 76 | pylab.clf() 77 | pylab.contour(X, Y, b) 78 | pylab.plot(X_data[:2,0], X_data[:2,1], 'kx') 79 | pylab.plot(X_data[2:,0], X_data[2:,1], 'bo') 80 | pylab.show() 81 | -------------------------------------------------------------------------------- /montetheano/examples/hierarchical_dirichlet.py: -------------------------------------------------------------------------------- 1 | import numpy, pylab 2 | import theano 3 | from rstreams import RandomStreams 4 | import distributions 5 | from sample import mh2_sample 6 | from for_theano import memoized 7 | 8 | s_rng = RandomStreams(23424) 9 | 10 | # Define data 11 | marbles_bag_1 = numpy.asarray([[1,1,1,1,1,1], 12 | [0,0,0,0,0,0], 13 | [0,0,0,0,0,0], 14 | [0,0,0,0,0,0], 15 | [0,0,0,0,0,0]], dtype=theano.config.floatX).T 16 | marbles_bag_2 = numpy.asarray([[0,0,0,0,0,0], 17 | [1,1,1,1,1,1], 18 | [0,0,0,0,0,0], 19 | [0,0,0,0,0,0], 20 | [0,0,0,0,0,0]], dtype=theano.config.floatX).T 21 | marbles_bag_3 = numpy.asarray([[0,0,0,0,0,0], 22 | [0,0,0,0,0,0], 23 | [0,0,0,0,0,0], 24 | [1,1,1,1,1,1], 25 | [0,0,0,0,0,0]], dtype=theano.config.floatX).T 26 | marbles_bag_4 = numpy.asarray([[0],[0],[0],[0],[1]], dtype=theano.config.floatX).T 27 | 28 | 29 | 30 | # Define flat model 31 | bag_prototype = memoized(lambda bag: s_rng.dirichlet(numpy.asarray([1, 1, 1, 1, 1])*5)) 32 | draw_marbles = lambda bag, nr: s_rng.multinomial(1, bag_prototype(bag), draw_shape=(nr,)) 33 | 34 | # Generate samples from the model 35 | givens = {draw_marbles(1,6): marbles_bag_1, 36 | draw_marbles(2,6): marbles_bag_2, 37 | draw_marbles(3,6): marbles_bag_3, 38 | draw_marbles(4,1): marbles_bag_4} 39 | 40 | sampler = mh2_sample(s_rng, [draw_marbles(4,1)], givens) 41 | 42 | samples = sampler(200, 100, 100) 43 | data = samples[0] 44 | 45 | # Show histogram 46 | pylab.subplot(211) 47 | pylab.bar(range(5), data.sum(axis=0)) 48 | pylab.title("Flat model") 49 | 50 | 51 | 52 | # Define hierarchical model 53 | phi = s_rng.dirichlet(numpy.asarray([1, 1, 1, 1, 1])) 54 | alpha = s_rng.gamma(2., 2.) 55 | prototype = phi*alpha 56 | 57 | bag_prototype = memoized(lambda bag: s_rng.dirichlet(prototype)) 58 | draw_marbles = lambda bag, nr: s_rng.multinomial(1, bag_prototype(bag), draw_shape=(nr,)) 59 | 60 | # Generate samples from the model 61 | givens = {draw_marbles(1,6): marbles_bag_1, 62 | draw_marbles(2,6): marbles_bag_2, 63 | draw_marbles(3,6): marbles_bag_3, 64 | draw_marbles(4,1): marbles_bag_4} 65 | 66 | sampler = mh2_sample(s_rng, [draw_marbles(4,1)], givens) 67 | 68 | samples = sampler(200, 100, 100) 69 | data = samples[0] 70 | 71 | # Show histogram 72 | pylab.subplot(212) 73 | pylab.bar(range(5), data.sum(axis=0)) 74 | pylab.title("Hierarchical model") 75 | pylab.show() 76 | -------------------------------------------------------------------------------- /montetheano/examples/latent_dirichlet_allocation.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import theano 3 | from theano import tensor 4 | from rstreams import RandomStreams 5 | import distributions 6 | from sample import mh2_sample, mh_sample 7 | from for_theano import memoized, evaluate 8 | 9 | s_rng = RandomStreams(123) 10 | 11 | nr_words = 4 12 | nr_topics = 2 13 | alpha = 0.8 14 | beta = 1. 15 | 16 | # Topic distribution per document 17 | doc_mixture = memoized(lambda doc_id: s_rng.dirichlet([alpha/nr_topics]*nr_topics)) 18 | 19 | # Word distribution per topic 20 | topic_mixture = memoized(lambda top_id: s_rng.dirichlet([beta/nr_words]*nr_words)) 21 | 22 | # For each word in the document, draw a topic according to multinomial with document specific prior 23 | # TODO, see comment below: topics = memoized(lambda doc_id, nr: s_rng.multinomial(1, doc_mixture[doc_id], draw_shape=(nr,))) 24 | topics = memoized(lambda doc_id, nr: s_rng.binomial(1, doc_mixture(doc_id)[0], draw_shape=(nr,))) 25 | 26 | # Draw words for a specific topic 27 | word_topic = lambda top_id: s_rng.multinomial(1, topic_mixture(top_id)) 28 | 29 | # TODO: memoized only works on the pre-compiled graph. This makes it fail in the case where we have to map 30 | # a vector of topics to individual multinomials with as priors the different topics. In the case of two topics 31 | # we can hack around this by using a binomial topic distribution and using a switch statement here: 32 | word_topic_mapper = lambda top_id: tensor.switch(top_id, word_topic(0), word_topic(1)) 33 | 34 | # Maps topics to words 35 | # TODO, see comment above: get_words = memoized(lambda doc_id, nr: theano.map(word_topic, topics(doc_id, nr))[0]) 36 | get_words = memoized(lambda doc_id, nr: theano.map(word_topic_mapper, topics(doc_id, nr))[0]) 37 | 38 | # Define training 'documents' 39 | document_1 = numpy.asarray([[1,0,0,0], 40 | [1,0,0,0], 41 | [0,1,0,0], 42 | [1,0,0,0], 43 | [0,1,0,0], 44 | [0,1,0,0], 45 | [1,0,0,0], 46 | [0,1,0,0], 47 | [1,0,0,0], 48 | [0,1,0,0]], dtype=theano.config.floatX) 49 | 50 | document_2 = numpy.asarray([[0,0,1,0], 51 | [0,0,0,1], 52 | [0,0,0,1], 53 | [0,0,0,1], 54 | [0,0,1,0], 55 | [0,0,1,0], 56 | [0,0,0,1], 57 | [0,0,1,0], 58 | [0,0,1,0], 59 | [0,0,1,0]], dtype=theano.config.floatX) 60 | 61 | document_3 = numpy.asarray([[1,0,0,0], 62 | [0,0,0,1], 63 | [0,1,0,0], 64 | [0,1,0,0], 65 | [0,0,1,0], 66 | [0,1,0,0], 67 | [0,0,0,1], 68 | [1,0,0,0], 69 | [0,0,1,0], 70 | [0,0,1,0]], dtype=theano.config.floatX) 71 | 72 | # Map documents to RVs 73 | givens = {get_words(1, 10): document_1, 74 | get_words(2, 10): document_2, 75 | get_words(3, 10): document_3} 76 | 77 | # Build sampler 78 | sample, ll, updates = mh_sample(s_rng, [doc_mixture(1), doc_mixture(2), doc_mixture(3), topic_mixture(0), topic_mixture(1)]) 79 | sampler = theano.function([], sample, updates=updates, givens=givens, allow_input_downcast=True) 80 | 81 | # Run sampling 82 | for i in range(10000): 83 | d = sampler() 84 | 85 | if i % 1000 == 0: 86 | print d 87 | -------------------------------------------------------------------------------- /montetheano/test_rv.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy 3 | import theano 4 | from theano import tensor 5 | from rstreams import RandomStreams 6 | import distributions # triggers registry 7 | import rv 8 | from for_theano import where 9 | 10 | def test_dag_condition_top(): 11 | """ 12 | Easy test of conditioning 13 | """ 14 | with RandomStreams(234) as _: 15 | mu = normal(10, .1) 16 | x = normal(mu, sigma=1) 17 | 18 | post_x = rv.condition([x], {mu: -7}) 19 | theano.printing.debugprint(post_x) 20 | 21 | f = theano.function([], post_x) 22 | r = [f() for i in range(10)] 23 | assert numpy.allclose(numpy.mean(r), -7.4722755432) 24 | 25 | 26 | def test_dag_condition_bottom(): 27 | """ 28 | Test test of conditioning an upper node on a lower one 29 | """ 30 | with RandomStreams(234) as _: 31 | mu = normal(10, .1) 32 | x = normal(mu, sigma=1) 33 | 34 | post_mu = rv.condition([mu], {x: -7}) 35 | theano.printing.debugprint(post_mu) 36 | 37 | f = theano.function([], post_mu) 38 | f() 39 | 40 | 41 | def test_normal_simple(): 42 | s_rng = RandomStreams(23) 43 | n = s_rng.normal() 44 | 45 | p0 = rv.lpdf(n, 0) 46 | p1 = rv.lpdf(n, 1) 47 | pn1 = rv.lpdf(n, -1) 48 | 49 | f = theano.function([], [p0, p1, pn1]) 50 | 51 | pvals = f() 52 | targets = numpy.asarray([ 53 | numpy.log(1.0 / numpy.sqrt(2*numpy.pi)), 54 | numpy.log(numpy.exp(-0.5) / numpy.sqrt(2*numpy.pi)), 55 | numpy.log(numpy.exp(-0.5) / numpy.sqrt(2*numpy.pi)), 56 | ]) 57 | 58 | assert numpy.allclose(pvals,targets), (pvals, targets) 59 | 60 | 61 | def test_normal_w_params(): 62 | s_rng = RandomStreams(23) 63 | n = s_rng.normal(mu=2, sigma=3) 64 | 65 | p0 = rv.lpdf(n, 0) 66 | p1 = rv.lpdf(n, 2) 67 | pn1 = rv.lpdf(n, -1) 68 | 69 | f = theano.function([], [p0, p1, pn1]) 70 | 71 | pvals = f() 72 | targets = numpy.asarray([ 73 | numpy.log(numpy.exp(-0.5 * ((2.0/3.0)**2)) / 74 | numpy.sqrt(2*numpy.pi*9.0)), 75 | numpy.log(numpy.exp(0) / numpy.sqrt(2*numpy.pi*9)), 76 | numpy.log(numpy.exp(-0.5 * ((3.0/3.0)**2)) / 77 | numpy.sqrt(2*numpy.pi*9.0)), 78 | ]) 79 | 80 | assert numpy.allclose(pvals,targets), (pvals, targets) 81 | 82 | 83 | def test_normal_nonscalar(): 84 | s_rng = RandomStreams(234) 85 | n = s_rng.normal() 86 | 87 | data = numpy.asarray([1, 2, 3, 4, 5]) 88 | p_data = rv.lpdf(n, data) 89 | 90 | f = theano.function([], [p_data]) 91 | 92 | pvals = f() 93 | targets = numpy.log(numpy.exp(-0.5 * (data**2)) / numpy.sqrt(2*numpy.pi)) 94 | 95 | assert numpy.allclose(pvals,targets), (pvals, targets) 96 | 97 | 98 | def test_normal_w_broadcasting(): 99 | raise NotImplementedError() 100 | 101 | 102 | def test_uniform_simple(): 103 | s_rng = RandomStreams(234) 104 | u = s_rng.uniform() 105 | 106 | p0 = rv.lpdf(u, 0) 107 | p1 = rv.lpdf(u, 1) 108 | p05 = rv.lpdf(u, 0.5) 109 | pn1 = rv.lpdf(u, -1) 110 | 111 | f = theano.function([], [p0, p1, p05, pn1]) 112 | 113 | pvals = f() 114 | targets = numpy.log(numpy.asarray([1.0, 1.0, 1.0, 0.0])) 115 | 116 | assert numpy.allclose(pvals,targets), (pvals, targets) 117 | 118 | 119 | def test_uniform_w_params(): 120 | s_rng = RandomStreams(234) 121 | u = s_rng.uniform(low=-0.999, high=9.001) 122 | 123 | p0 = rv.lpdf(u, 0) 124 | p1 = rv.lpdf(u, 2) 125 | p05 = rv.lpdf(u, -1.5) 126 | pn1 = rv.lpdf(u, 10) 127 | 128 | f = theano.function([], [p0, p1, p05, pn1]) 129 | 130 | pvals = f() 131 | targets = numpy.log(numpy.asarray([.1, .1, 0, 0])) 132 | assert numpy.allclose(pvals,targets), (pvals, targets) 133 | 134 | 135 | def test_uniform_nonscalar(): 136 | raise NotImplementedError() 137 | 138 | 139 | def test_uniform_w_broadcasting(): 140 | raise NotImplementedError() 141 | 142 | 143 | if 0: 144 | def test_likelihood_visually(): 145 | class A(object):pass 146 | self = A() 147 | 148 | s_rng = tensor.shared_randomstreams.RandomStreams(23424) 149 | 150 | err_thresh = self.err_thresh = tensor.scalar() 151 | data_err = self.data_err = tensor.vector() 152 | data_llr = self.data_llr = tensor.vector() 153 | 154 | rv_err = self.rv_err = s_rng.uniform() 155 | rv_err_good = rv_err < err_thresh 156 | 157 | data_llr_good = data_llr[where(data_err < err_thresh)] 158 | data_llr_bad = data_llr[where(data_err >= err_thresh)] 159 | 160 | # design decisions 161 | mu_llr_good, sigma_llr_good = AdaptiveParzen()(data_llr_good, low=-5, 162 | high=-1.5, minsigma=0.01) 163 | mu_llr_bad, sigma_llr_bad = AdaptiveParzen()(data_llr_bad, low=-5, 164 | high=-1.5, minsigma=0.01) 165 | 166 | 167 | rv_llr_good = gauss_mixture(s_rng, mu=mu_llr_good, sigma=sigma_llr_good) 168 | self.sample_llr = s_rng.normal(avg=-4, std=2, size=(5,)) 169 | 170 | self.sample_llr_logprob = log_density( 171 | self.sample_llr, rv_llr_good) 172 | 173 | if os.env.get("SHOW_PLOTS", False): 174 | 175 | # test that rv_llr really is a random variable 176 | 177 | f = theano.function( 178 | [self.err_thresh, self.data_err, self.data_llr], 179 | [self.rv_err, self.rv_llr], 180 | allow_input_downcast=True) 181 | 182 | data_err = [.0, .0, .0, .7, .7, .7] 183 | data_llr = [-4.5, -4, -3.5, -2, -1.5, -1.0] 184 | 185 | r = numpy.asarray([f(.5, data_err, data_llr) for i in xrange(100)]) 186 | import matplotlib.pyplot as plt 187 | plt.scatter(r[:, 0], r[:, 1]) 188 | plt.show() 189 | 190 | test_normal_nonscalar() 191 | -------------------------------------------------------------------------------- /montetheano/rv.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for operating on random variables. 3 | """ 4 | import theano 5 | from theano import tensor 6 | from for_theano import ancestors, as_variable, clone_keep_replacements 7 | import rstreams 8 | 9 | 10 | def is_randomstate(var): 11 | """ 12 | """ 13 | return isinstance(var.type, rstreams.randomstate_types) 14 | 15 | 16 | def is_rv(var, blockers=None): 17 | """ 18 | Return True iff var is a random variable. 19 | 20 | A random variable is a variable with a randomstate object in its ancestors. 21 | """ 22 | #TODO: could optimize by stopping the recusion as soon as a randomstate is 23 | # found 24 | return any(is_randomstate(v) for v in ancestors([var], blockers=blockers)) 25 | 26 | 27 | def is_raw_rv(var): 28 | """ 29 | Return True iff v is the result of calling s_rng.something() 30 | """ 31 | return var.owner and is_randomstate(var.owner.inputs[0]) 32 | 33 | 34 | def all_raw_rvs(outputs): 35 | """ 36 | Return a list of all random variables required to compute `outputs`. 37 | """ 38 | all_vars = ancestors(outputs) 39 | assert outputs[0] in all_vars 40 | rval = [v for v in all_vars if is_raw_rv(v)] 41 | return rval 42 | 43 | 44 | def typed_items(dct): 45 | return dict([ 46 | (rv, as_variable(sample, type=rv.type)) 47 | for rv, sample in dct.items()]) 48 | 49 | 50 | def condition(rvs, observations): 51 | if len(rvs) > 1: 52 | raise NotImplementedError() 53 | observations = typed_items(observations) 54 | # if none of the rvs show up in the ancestors of any observations 55 | # then this is easy conditioning 56 | obs_ancestors = ancestors(observations.keys(), blockers=rvs) 57 | if any(rv in obs_ancestors for rv in rvs): 58 | # not-so-easy conditioning 59 | # we need to produce a sampler-driven model 60 | raise NotImplementedError() 61 | else: 62 | # easy conditioning 63 | rvs_anc = ancestors(rvs, blockers=observations.keys()) 64 | frontier = [r for r in rvs_anc 65 | if r.owner is None or r in observations.keys()] 66 | cloned_inputs, cloned_outputs = clone_keep_replacements(frontier, rvs, 67 | replacements=observations) 68 | return cloned_outputs 69 | 70 | # TODO: does this function belong here or in rstreams 71 | def lpdf(rv, sample, **kwargs): 72 | """ 73 | Return the probability (density) that random variable `rv`, returned by 74 | a call to one of the sampling routines of this class takes value `sample` 75 | """ 76 | if not is_rv(rv): 77 | raise TypeError('rv not recognized as a random variable', rv) 78 | 79 | if is_raw_rv(rv): 80 | dist_name = rstreams.rv_dist_name(rv) 81 | pdf = rstreams.pdfs[dist_name] 82 | return pdf(rv.owner, sample, kwargs) 83 | else: 84 | #TODO: infer from the ancestors of v what distribution it 85 | # has. 86 | raise NotImplementedError(rv) 87 | 88 | def conditional_log_likelihood(assignment, givens): 89 | """ 90 | Return log(P(rv0=sample | given)) 91 | 92 | assignment: rv0=val0, rv1=val1, ... 93 | given: var0=v0, var1=v1, ... 94 | 95 | Each of val0, val1, ... v0, v1, ... is supposed to represent an identical 96 | number of draws from a distribution. This function returns the real-valued 97 | density for each one of those draws. 98 | 99 | The output from this function may be a random variable, if not all sources 100 | of randomness are removed by the assignment and the given. 101 | """ 102 | 103 | for rv in assignment.keys(): 104 | if not is_rv(rv): 105 | raise ValueError('non-random var in assignment key', rv) 106 | 107 | # Cast assignment elements to the right kind of thing 108 | assignment = typed_items(assignment) 109 | 110 | rvs = assignment.keys() 111 | #TODO: this is not ok for undirected models 112 | # we need to be able to let condition introduce joint 113 | # dependencies somehow. 114 | # The trouble is that lpdf wants to get the pdfs one variable at a 115 | # time. That makes sense for directed models, but not for 116 | # undirected ones. 117 | new_rvs = condition(rvs, givens) 118 | return full_log_likelihood( 119 | [(new_rv, assignment[rv]) 120 | for (new_rv, rv) in zip(new_rvs, rvs)], 121 | given={}) 122 | 123 | def full_log_likelihood(assignment): 124 | """ 125 | Return log(P(rv0=sample)) 126 | 127 | assignment: rv0=val0, rv1=val1, ... 128 | 129 | Each of val0, val1, ... v0, v1, ... is supposed to represent an identical 130 | number of draws from a distribution. This function returns the real-valued 131 | density for each one of those draws. 132 | 133 | The output from this function may be a random variable, if not all sources 134 | of randomness are removed by the assignment and the given. 135 | """ 136 | 137 | for rv in assignment.keys(): 138 | if not is_rv(rv): 139 | raise ValueError('non-random var in assignment key', rv) 140 | 141 | # All random variables that are not assigned should stay as the same object so it can later be replaced 142 | # If this is not done this way, they get cloned 143 | RVs = [v for v in ancestors(assignment.keys()) if is_raw_rv(v)] 144 | for rv in RVs: 145 | if rv not in assignment: 146 | assignment[rv] = rv 147 | 148 | # Cast assignment elements to the right kind of thing 149 | assignment = typed_items(assignment) 150 | 151 | pdfs = [lpdf(rv, sample) for rv, sample in assignment.items()] 152 | lik = tensor.add(*[tensor.sum(p) for p in pdfs]) 153 | 154 | dfs_variables = ancestors([lik], blockers=assignment.keys()) 155 | frontier = [r for r in dfs_variables 156 | if r.owner is None or r in assignment.keys()] 157 | cloned_inputs, cloned_outputs = clone_keep_replacements(frontier, [lik], 158 | replacements=assignment) 159 | cloned_lik, = cloned_outputs 160 | return cloned_lik 161 | 162 | 163 | def energy(assignment, given): 164 | """ 165 | Return -log(P(rv0=sample | given)) +- const 166 | 167 | assignment: rv0=val0, rv1=val1, ... 168 | given: var0=v0, var1=v1, ... 169 | 170 | Each of val0, val1, ... v0, v1, ... is supposed to represent an identical 171 | number of draws from a distribution. This function returns the real-valued 172 | density for each one of those draws. 173 | 174 | The output from this function may be a random variable, if not all sources 175 | of randomness are removed by the assignment and the given. 176 | """ 177 | try: 178 | return -conditional_log_likelihood(assignment, given) 179 | except: 180 | # get the log_density up to an additive constant 181 | raise NotImplementedError() 182 | -------------------------------------------------------------------------------- /montetheano/rstreams.py: -------------------------------------------------------------------------------- 1 | """ 2 | Registry and definition for new-and-improved RandomStreams 3 | """ 4 | 5 | import copy 6 | import numpy 7 | import theano 8 | from theano import tensor 9 | from for_theano import elemwise_cond 10 | from for_theano import ancestors 11 | from utils import ClobberContext 12 | 13 | samplers = {} 14 | pdfs = {} 15 | ml_handlers = {} 16 | params_handlers = {} 17 | local_proposals = {} 18 | randomstate_types = (tensor.raw_random.RandomStateType,) 19 | 20 | 21 | def rv_dist_name(rv): 22 | try: 23 | return rv.owner.op.dist_name 24 | except AttributeError: 25 | try: 26 | return rv.owner.op.fn.__name__ 27 | except AttributeError: 28 | raise TypeError('rv not recognized as output of RandomFunction', rv) 29 | 30 | class RandomStreams(ClobberContext): 31 | clobber_symbols = ['pdf'] 32 | 33 | def __init__(self, seed, draw_shape=()): 34 | self.state_updates = [] 35 | self.default_instance_seed = seed 36 | self.seed_generator = numpy.random.RandomState(seed) 37 | self.default_updates = {} 38 | if draw_shape == (): 39 | self.draw_shape = tensor.as_tensor_variable( 40 | numpy.empty((0,), dtype='int64')) 41 | else: 42 | self.draw_shape = tensor.as_tensor_variable(draw_shape) 43 | if self.draw_shape.ndim != 1: 44 | raise ValueError(draw_shape) 45 | 46 | def shared(self, val, **kwargs): 47 | rval = theano.shared(val, **kwargs) 48 | return rval 49 | 50 | def sharedX(self, val, **kwargs): 51 | rval = theano.shared( 52 | numpy.asarray(val, dtype=theano.config.floatX), 53 | **kwargs) 54 | return rval 55 | 56 | def new_shared_rstate(self): 57 | seed = int(self.seed_generator.randint(2**30)) 58 | rval = theano.shared(numpy.random.RandomState(seed)) 59 | return rval 60 | 61 | def add_default_update(self, used, recip, new_expr): 62 | if used not in self.default_updates: 63 | self.default_updates[used] = {} 64 | self.default_updates[used][recip] = new_expr 65 | used.update = (recip, new_expr) # necessary? 66 | recip.default_update = new_expr 67 | self.state_updates.append((recip, new_expr)) 68 | 69 | def sample(self, dist_name, *args, **kwargs): 70 | handler = samplers[dist_name] 71 | if 'draw_shape' in kwargs: 72 | draw_shape = kwargs['draw_shape'] 73 | if isinstance(draw_shape, (list, tuple)) and draw_shape: 74 | draw_shape = tensor.stack(*draw_shape) 75 | else: 76 | draw_shape = self.draw_shape 77 | 78 | kwargs['draw_shape'] = draw_shape 79 | out = handler(self, *args, **kwargs) 80 | return out 81 | 82 | def seed(self, seed=None): 83 | """Re-initialize each random stream 84 | 85 | :param seed: each random stream will be assigned a unique state that depends 86 | deterministically on this value. 87 | 88 | :type seed: None or integer in range 0 to 2**30 89 | 90 | :rtype: None 91 | """ 92 | if seed is None: 93 | seed = self.default_instance_seed 94 | 95 | seedgen = numpy.random.RandomState(seed) 96 | for old_r, new_r in self.state_updates: 97 | old_r_seed = seedgen.randint(2**30) 98 | old_r.set_value(numpy.random.RandomState(int(old_r_seed)), 99 | borrow=True) 100 | 101 | def pdf(self, rv, sample, **kwargs): 102 | """ 103 | Return the probability (density) that random variable `rv`, returned by 104 | a call to one of the sampling routines of this class takes value `sample` 105 | """ 106 | if rv.owner: 107 | dist_name = rv_dist_name(rv) 108 | pdf = pdfs[dist_name] 109 | return pdf(rv.owner, sample, kwargs) 110 | else: 111 | raise TypeError('rv not recognized as output of RandomFunction') 112 | 113 | def ml(self, rv, sample, weights=None): 114 | """ 115 | Return an Updates object mapping distribution parameters to expressions 116 | of their maximum likelihood values. 117 | """ 118 | if rv.owner: 119 | dist_name = rv_dist_name(rv) 120 | pdf = ml_handlers[dist_name] 121 | return pdf(rv.owner, sample, weights=weights) 122 | else: 123 | raise TypeError('rv not recognized as output of RandomFunction') 124 | 125 | def params(self, rv): 126 | """ 127 | Return an Updates object mapping distribution parameters to expressions 128 | of their maximum likelihood values. 129 | """ 130 | if rv.owner: 131 | return self.params_handlers[rv_dist_name(rv)](rv.owner) 132 | else: 133 | raise TypeError('rv not recognized as output of RandomFunction') 134 | 135 | def local_proposal(self, rv, sample, **kwargs): 136 | """ 137 | Return the probability (density) that random variable `rv`, returned by 138 | a call to one of the sampling routines of this class takes value `sample` 139 | """ 140 | if rv.owner: 141 | dist_name = rv_dist_name(rv) 142 | if dist_name in local_proposals: 143 | # If proposal distribution is provided, use this 144 | local_proposal = local_proposals[dist_name] 145 | return local_proposal(self, rv.owner, sample, kwargs) 146 | else: 147 | # Otherwise fall back to drawing samples from the distribution itself 148 | return rv.owner.outputs[1] 149 | else: 150 | raise TypeError('rv not recognized as output of RandomFunction') 151 | 152 | # 153 | # N.B. OTHER METHODS (samplers) ARE INSTALLED HERE BY 154 | # - register_sampler 155 | # - rng_register 156 | # 157 | def register_sampler(dist_name, f): 158 | """ 159 | Inject a sampling function into RandomStreams for the distribution with name 160 | f.__name__ 161 | """ 162 | # install an instancemethod on the RandomStreams class 163 | # that is a shortcut for something like 164 | # self.sample('uniform', *args, **kwargs) 165 | 166 | def sampler(self, *args, **kwargs): 167 | return self.sample(dist_name, *args, **kwargs) 168 | setattr(RandomStreams, dist_name, sampler) 169 | RandomStreams.clobber_symbols.append(dist_name) 170 | 171 | if dist_name in samplers: 172 | # TODO: allow for multiple handlers? 173 | raise KeyError(dist_name) 174 | samplers[dist_name] = f 175 | return f 176 | 177 | 178 | def register_lpdf(dist_name, f): 179 | if dist_name in pdfs: 180 | # TODO: allow for multiple handlers? 181 | raise KeyError(dist_name, pdfs[dist_name]) 182 | pdfs[dist_name] = f 183 | return f 184 | 185 | 186 | #TODO: think about what this function is supposed to do?? 187 | def register_ml(dist_name, f): 188 | if dist_name in ml_handlers: 189 | # TODO: allow for multiple handlers? 190 | raise KeyError(dist_name, ml_handlers[dist_name]) 191 | ml_handlers[dist_name] = f 192 | return f 193 | 194 | 195 | #TODO: think about what this function is supposed to do?? 196 | def register_params(dist_name, f): 197 | if dist_name in params_handlers: 198 | # TODO: allow for multiple handlers? 199 | raise KeyError(dist_name, params_handlers[dist_name]) 200 | params_handlers[dist_name] = f 201 | return f 202 | 203 | def register_local_proposal(dist_name, f): 204 | if dist_name in local_proposals: 205 | # TODO: allow for multiple handlers? 206 | raise KeyError(dist_name, local_proposals[dist_name]) 207 | local_proposals[dist_name] = f 208 | return f 209 | 210 | 211 | def rng_register(f): 212 | if f.__name__.endswith('_sampler'): 213 | dist_name = f.__name__[:-len('_sampler')] 214 | return register_sampler(dist_name, f) 215 | 216 | elif f.__name__.endswith('_lpdf'): 217 | dist_name = f.__name__[:-len('_lpdf')] 218 | return register_lpdf(dist_name, f) 219 | 220 | elif f.__name__.endswith('_ml'): 221 | dist_name = f.__name__[:-len('_ml')] 222 | return register_ml(dist_name, f) 223 | 224 | elif f.__name__.endswith('_params'): 225 | dist_name = f.__name__[:-len('_params')] 226 | return register_params(dist_name, f) 227 | 228 | elif f.__name__.endswith('_proposal'): 229 | dist_name = f.__name__[:-len('_proposal')] 230 | return register_local_proposal(dist_name, f) 231 | 232 | else: 233 | raise ValueError("function name suffix not recognized", f.__name__) 234 | 235 | -------------------------------------------------------------------------------- /montetheano/sample.py: -------------------------------------------------------------------------------- 1 | """ 2 | Algorithms for drawing samples by MCMC 3 | 4 | """ 5 | import numpy 6 | import theano 7 | from theano import tensor 8 | from for_theano import ancestors, infer_shape, evaluate_with_assignments, evaluate 9 | from rv import is_raw_rv, full_log_likelihood, lpdf 10 | 11 | 12 | # Major TODOs: 13 | # - RVs should have a non-symbolic shape so the MC states can be allocated 14 | # - We need to initialize the chains in draw from the independent prior distributions 15 | # - We need proposal distributions for all RVs from which to draw samples 16 | # - An additional loop around mh_sample is required 17 | # - An efficient parallel MC sampler is possible, which might be less decorrelated (or more book-keeping is required) 18 | # - The HMC sampler needs an outside loop and an additional inner loop for the leap-frog steps 19 | 20 | 21 | 22 | # Sample the generative model and return "outputs" for cases where "condition" is met. 23 | # If no condition is given, it just samples from the model 24 | # The outputs can be a single TheanoVariable or a list of TheanoVariables. 25 | # The function returns a single sample or a list of samples, depending on "outputs"; and an updates dictionary. 26 | def rejection_sample(outputs, condition = None): 27 | if isinstance(outputs, tensor.TensorVariable): 28 | init = [0] 29 | else: 30 | init = [0]*len(outputs) 31 | if condition is None: 32 | # TODO: I am just calling scan to get updates, can't I create this myself? 33 | # output desired RVs when condition is met 34 | def rejection(): 35 | return outputs 36 | 37 | samples, updates = theano.scan(rejection, outputs_info = init, n_steps = 1) 38 | else: 39 | # output desired RVs when condition is met 40 | def rejection(): 41 | return outputs, {}, theano.scan_module.until(condition) 42 | samples, updates = theano.scan(rejection, outputs_info = init, n_steps = 1000) 43 | if isinstance(samples, tensor.TensorVariable): 44 | sample = samples[-1] 45 | else: 46 | sample = [s[-1] for s in samples] 47 | return sample, updates 48 | 49 | def mh_sample(s_rng, outputs, observations = {}): 50 | all_vars = ancestors(list(outputs) + list(observations.keys())) 51 | 52 | for o in observations: 53 | assert o in all_vars 54 | if not is_raw_rv(o): 55 | raise TypeError(o) 56 | 57 | RVs = [v for v in all_vars if is_raw_rv(v)] 58 | free_RVs = [v for v in RVs if v not in observations] 59 | 60 | # Draw sample from the proposal 61 | free_RVs_state = [] 62 | for v in free_RVs: 63 | f = theano.function([], v, 64 | mode=theano.Mode(linker='py', optimizer=None)) 65 | free_RVs_state.append(theano.shared(f())) 66 | 67 | log_likelihood = theano.shared(numpy.array(float('-inf'))) 68 | 69 | U = s_rng.uniform(low=0.0, high=1.0) 70 | 71 | def mcmc(ll, *frvs): 72 | proposals = [s_rng.local_proposal(v, rvs) for v, rvs in zip(free_RVs, frvs)] 73 | proposals_rev = [s_rng.local_proposal(v, rvs) for v, rvs in zip(free_RVs, proposals)] 74 | 75 | full_observations = dict(observations) 76 | full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, proposals)])) 77 | new_log_likelihood = full_log_likelihood(full_observations) 78 | 79 | logratio = new_log_likelihood - ll \ 80 | + tensor.add(*[tensor.sum(lpdf(p, r)) for p, r in zip(proposals_rev, frvs)]) \ 81 | - tensor.add(*[tensor.sum(lpdf(p, r)) for p, r in zip(proposals, proposals)]) 82 | 83 | accept = tensor.gt(logratio, tensor.log(U)) 84 | 85 | return [tensor.switch(accept, new_log_likelihood, ll)] + \ 86 | [tensor.switch(accept, p, f) for p, f in zip(proposals, frvs)], \ 87 | {}, theano.scan_module.until(accept) 88 | 89 | samples, updates = theano.scan(mcmc, outputs_info = [log_likelihood] + free_RVs_state, n_steps = 100) 90 | updates[log_likelihood] = samples[0][-1] 91 | updates.update(dict([(f, s[-1]) for f, s in zip(free_RVs_state, samples[1:])])) 92 | 93 | return [free_RVs_state[free_RVs.index(out)] for out in outputs], log_likelihood, updates 94 | 95 | def hybridmc_sample(s_rng, outputs, observations = {}): 96 | # TODO: should there be a size variable here? 97 | # TODO: implement lag and burn-in 98 | # TODO: implement size 99 | """ 100 | Return a dictionary mapping random variables to their sample values. 101 | """ 102 | 103 | all_vars = ancestors(list(outputs) + list(observations.keys())) 104 | 105 | for o in observations: 106 | assert o in all_vars 107 | if not is_raw_rv(o): 108 | raise TypeError(o) 109 | 110 | RVs = [v for v in all_vars if is_raw_rv(v)] 111 | 112 | free_RVs = [v for v in RVs if v not in observations] 113 | 114 | free_RVs_state = [theano.shared(numpy.ones(shape=infer_shape(v)), broadcastable=tuple(numpy.asarray(infer_shape(v))==1)) for v in free_RVs] 115 | free_RVs_prop = [s_rng.normal(0, 1, draw_shape=infer_shape(v)) for v in free_RVs] 116 | 117 | log_likelihood = theano.shared(numpy.array(float('-inf'))) 118 | 119 | U = s_rng.uniform(low=0, high=1.0) 120 | 121 | epsilon = numpy.sqrt(2*0.03) 122 | def mcmc(ll, *frvs): 123 | full_observations = dict(observations) 124 | full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, frvs)])) 125 | 126 | loglik = -full_log_likelihood(full_observations) 127 | 128 | proposals = free_RVs_prop 129 | H = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + loglik 130 | 131 | # -- this should be an inner loop 132 | g = [] 133 | g.append(tensor.grad(loglik, frvs)) 134 | 135 | proposals = [(p - epsilon*gg[0]/2.) for p, gg in zip(proposals, g)] 136 | 137 | rvsp = [(rvs + epsilon*rvp) for rvs,rvp in zip(frvs, proposals)] 138 | 139 | full_observations = dict(observations) 140 | full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, rvsp)])) 141 | new_loglik = -full_log_likelihood(full_observations) 142 | 143 | gnew = [] 144 | gnew.append(tensor.grad(new_loglik, rvsp)) 145 | proposals = [(p - epsilon*gn[0]/2.) for p, gn in zip(proposals, gnew)] 146 | # -- 147 | 148 | Hnew = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + new_loglik 149 | 150 | dH = Hnew - H 151 | accept = tensor.or_(dH < 0., U < tensor.exp(-dH)) 152 | 153 | return [tensor.switch(accept, -new_loglik, ll)] + \ 154 | [tensor.switch(accept, p, f) for p, f in zip(rvsp, frvs)], \ 155 | {}, theano.scan_module.until(accept) 156 | 157 | samples, updates = theano.scan(mcmc, outputs_info = [log_likelihood] + free_RVs_state, n_steps = 10000000) 158 | 159 | updates[log_likelihood] = samples[0][-1] 160 | updates.update(dict([(f, s[-1]) for f, s in zip(free_RVs_state, samples[1:])])) 161 | 162 | return [free_RVs_state[free_RVs.index(out)] for out in outputs], log_likelihood, updates 163 | 164 | def mh2_sample(s_rng, outputs, observations = {}, givens = {}): 165 | all_vars = ancestors(list(observations.keys()) + list(outputs)) 166 | 167 | for o in observations: 168 | assert o in all_vars 169 | if not is_raw_rv(o): 170 | raise TypeError(o) 171 | 172 | RVs = [v for v in all_vars if is_raw_rv(v)] 173 | free_RVs = [v for v in RVs if v not in observations] 174 | 175 | free_RVs_state = [] 176 | for v in free_RVs: 177 | f = theano.function([], v, 178 | mode=theano.Mode(linker='py', optimizer=None)) 179 | free_RVs_state.append(theano.shared(f())) 180 | 181 | U = s_rng.uniform(low=0.0, high=1.0) 182 | 183 | rr = [] 184 | for index in range(len(free_RVs)): 185 | # TODO: why does the compiler crash when we try to expose the likelihood ? 186 | full_observations = dict(observations) 187 | full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, free_RVs_state)])) 188 | log_likelihood = full_log_likelihood(full_observations) 189 | 190 | proposal = s_rng.local_proposal(free_RVs[index], free_RVs_state[index]) 191 | proposal_rev = s_rng.local_proposal(free_RVs[index], proposal) 192 | 193 | full_observations = dict(observations) 194 | full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, free_RVs_state)])) 195 | full_observations.update(dict([(free_RVs[index], proposal)])) 196 | new_log_likelihood = full_log_likelihood(full_observations) 197 | 198 | bw = tensor.sum(lpdf(proposal_rev, free_RVs_state[index])) 199 | fw = tensor.sum(lpdf(proposal, proposal)) 200 | 201 | lr = new_log_likelihood-log_likelihood+bw-fw 202 | 203 | accept = tensor.gt(lr, tensor.log(U)) 204 | 205 | updates = {free_RVs_state[index] : tensor.switch(accept, proposal, free_RVs_state[index])} 206 | rr.append(theano.function([], [accept], updates=updates, givens=givens)) 207 | 208 | # TODO: this exacte amount of samples given back is still wrong 209 | def sampler(nr_samples, burnin = 100, lag = 100): 210 | data = [[] for o in outputs] 211 | for i in range(nr_samples*lag+burnin): 212 | accept = False 213 | while not accept: 214 | index = numpy.random.randint(len(free_RVs)) 215 | 216 | accept = rr[index]() 217 | if accept and i > burnin and (i-burnin) % lag == 0: 218 | for d, o in zip(data, outputs): 219 | # TODO: this can be optimized 220 | if is_raw_rv(o): 221 | d.append(free_RVs_state[free_RVs.index(o)].get_value()) 222 | else: 223 | full_observations = dict(observations) 224 | full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, free_RVs_state)])) 225 | d.append(evaluate(evaluate_with_assignments(o, full_observations), givens=givens)) 226 | data = [numpy.asarray(d).squeeze() for d in data] 227 | 228 | return data 229 | 230 | return sampler -------------------------------------------------------------------------------- /montetheano/for_theano.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import numpy 3 | import theano 4 | from theano import tensor 5 | from theano.gof import graph 6 | 7 | def evaluate(var, givens=None): 8 | f = theano.function([], var, mode=theano.Mode(linker='py', optimizer=None), givens=givens) 9 | return f() 10 | 11 | class memoized(object): 12 | def __init__(self, func): 13 | self.func = func 14 | self.cache = {} 15 | def __call__(self, *args): 16 | try: 17 | return self.cache[args] 18 | except KeyError: 19 | value = self.func(*args) 20 | self.cache[args] = value 21 | return value 22 | 23 | def as_variable(thing, type=None): 24 | if isinstance(thing, theano.Variable): 25 | if type is None or thing.type == type: 26 | return thing 27 | else: 28 | return thing 29 | # raise TypeError(thing) 30 | if hasattr(thing, 'type'): 31 | if type is None or thing.type == type: 32 | return thing 33 | else: 34 | raise TypeError(thing) 35 | if type is None: 36 | #TODO: why there is no theano.constant?? 37 | return theano.shared(thing) 38 | else: 39 | return type.Constant( 40 | type, 41 | type.filter(thing, allow_downcast=True)) 42 | 43 | class Bincount(theano.Op): 44 | """ 45 | Map a vector to an integer vector containing the sorted positions of 46 | non-zeros in the argument. 47 | """ 48 | #TODO: check to see if numpy.bincount supports minlength argument 49 | def __eq__(self, other): 50 | return type(self) == type(other) 51 | 52 | def __hash__(self): 53 | return hash(type(self)) 54 | 55 | def make_node(self, x, weights=1, minlength=0): 56 | x = tensor.as_tensor_variable(x) 57 | weights = tensor.as_tensor_variable(weights) 58 | minlength = tensor.as_tensor_variable(minlength) 59 | if x.ndim != 1: 60 | raise NotImplementedError( x) 61 | if 'int' not in str(x.dtype): 62 | raise TypeError('bincount requires integer argument x', x) 63 | # TODO: check that weights and minlength are ok 64 | return theano.gof.Apply(self, 65 | [x, weights, minlength], 66 | [tensor.lvector()]) 67 | 68 | def perform(self, node, inputs, outstorage): 69 | x, weights, minlength = inputs 70 | if weights == 1: 71 | rval = numpy.bincount(x)#, minlength=minlength) 72 | else: 73 | rval = numpy.bincount(*inputs) 74 | if len(rval) < minlength: 75 | tmp = numpy.zeros((minlength,), dtype=rval.dtype) 76 | tmp[:len(rval)] = rval 77 | rval = tmp 78 | outstorage[0][0] = rval 79 | #XXX: infer_shape 80 | 81 | bincount = Bincount() 82 | 83 | 84 | class Where(theano.Op): 85 | """ 86 | Map a vector to an integer vector containing the sorted positions of 87 | non-zeros in the argument. 88 | """ 89 | def __eq__(self, other): 90 | return type(self) == type(other) 91 | 92 | def __hash__(self): 93 | return hash(type(self)) 94 | 95 | def make_node(self, x): 96 | if x.ndim != 1: 97 | raise NotImplementedError() 98 | return theano.gof.Apply(self, 99 | [x], 100 | [tensor.lvector()]) 101 | 102 | def perform(self, node, inputs, outstorage): 103 | # Fixed by GWT: ensure output from numpy matches expected output dtype 104 | # Addresses hyperopt issue #58 105 | outstorage[0][0] = theano._asarray( 106 | numpy.where(inputs[0])[0], dtype=node.outputs[0].type.dtype) 107 | 108 | #XXX: infer_shape 109 | where = Where() 110 | 111 | 112 | class BoolTake(theano.Op): 113 | """ 114 | Return the equivalent of 115 | [x[i] for i, j in enumerate(tf) if j] 116 | 117 | """ 118 | 119 | def __hash__(self): 120 | return hash(type(self)) 121 | 122 | def __eq__(self, other): 123 | return type(self) == type(other) 124 | 125 | def make_node(self, x, tf): 126 | x = tensor.as_tensor_variable(x) 127 | tf = tensor.as_tensor_variable(tf) 128 | if x.ndim < 1: raise TypeError() 129 | if x.ndim != 1: raise TypeError() 130 | if 'int' not in tf.dtype: raise TypeError() 131 | return theano.gof.Apply(self, 132 | [x, tf], 133 | [x.type()]) 134 | 135 | def perform(self, node, inputs, output_storage): 136 | x, tf = inputs 137 | xx = x[:len(tf)] 138 | rval = x[:len(tf)][tf > 0] 139 | output_storage[0][0] = rval 140 | bool_take = BoolTake() 141 | 142 | 143 | class Find(theano.Op): 144 | """ 145 | Returns positions in `query` where elements of `keepset` occur. 146 | 147 | Return the equivalent of 148 | [i for (i, q) in enumerate(query) if q in keepset] 149 | 150 | """ 151 | 152 | def __hash__(self): 153 | return hash(type(self)) 154 | 155 | def __eq__(self, other): 156 | return type(self) == type(other) 157 | 158 | def make_node(self, query, keepset): 159 | query = tensor.as_tensor_variable(query) 160 | keepset = tensor.as_tensor_variable(keepset) 161 | if query.ndim != 1: raise TypeError() 162 | if keepset.ndim != 1: raise TypeError() 163 | if 'int' not in query.dtype: raise TypeError() 164 | if 'int' not in keepset.dtype: raise TypeError() 165 | return theano.gof.Apply(self, 166 | [query, keepset], 167 | [keepset.type()]) 168 | 169 | def perform(self, node, inputs, output_storage): 170 | query, keepset = inputs 171 | keepset = set(keepset) 172 | rval = numpy.asarray( 173 | [i for i, e in enumerate(query) if e in keepset], 174 | dtype=inputs[1].dtype) 175 | output_storage[0][0] = rval 176 | #XXX: infer_shape 177 | find = Find() 178 | 179 | class Argsort(theano.Op): 180 | """ 181 | Return the equivalent of numpy.argsort(x) 182 | """ 183 | 184 | def __hash__(self): 185 | return hash(type(self)) 186 | 187 | def __eq__(self, other): 188 | return type(self) == type(other) 189 | 190 | def make_node(self, x): 191 | x = tensor.as_tensor_variable(x) 192 | if x.ndim != 1: raise TypeError() 193 | if 'complex' in str(x.dtype): raise TypeError() 194 | return theano.gof.Apply(self, [x], [tensor.lvector()]) 195 | 196 | def perform(self, node, inputs, output_storage): 197 | # Fixed by GWT: ensure output from numpy matches expected output dtype 198 | # Addresses hyperopt issue #58 199 | output_storage[0][0] = theano._asarray(numpy.argsort(inputs[0]), 200 | dtype=node.outputs[0].type.dtype) 201 | 202 | #XXX: infer_shape 203 | argsort = Argsort() 204 | 205 | def elemwise_cond(*args): 206 | """Build a nested elemwise if elif ... statement. 207 | 208 | elemwise_cond( 209 | a, cond_a, 210 | b, cond_b, 211 | c) 212 | 213 | Translates roughly to an elementwise version of this... 214 | 215 | if cond_a: 216 | a 217 | elif cond_b: 218 | b 219 | else: 220 | c 221 | """ 222 | assert len(args) % 2, 'need an add number of args' 223 | if len(args) == 1: 224 | return args[0] 225 | else: 226 | return tensor.switch( 227 | args[1], 228 | args[0], 229 | elemwise_cond(*args[2:])) 230 | 231 | 232 | class LazySwitch(theano.gof.op.PureOp): 233 | """ 234 | lazy_switch(which_case, case0, case1, case2, case3, ...) 235 | 236 | """ 237 | 238 | def __init__(self): 239 | pass 240 | 241 | def __eq__(self, other): 242 | return type(self) == type(other) 243 | 244 | def __hash__(self, other): 245 | return hash(type(self)) 246 | 247 | def make_node(self, c, arg0, *args): 248 | for a in args: 249 | if a.type != arg0.type: 250 | raise TypeError( 251 | 'Switch requires same type for all cases', 252 | (a.type, arg0.type)) 253 | return theano.gof.Apply(self, 254 | [c, arg0] + list(args), 255 | [a.type()]) 256 | 257 | def make_thunk(self, node, storage_map, compute_map, no_recycling): 258 | outtype = node.outputs[0].type 259 | c = node.inputs[0] 260 | s_output = node.outputs[0] 261 | ocontainer = storage_map[s_output] 262 | def thunk(): 263 | if not compute_map[c][0]: 264 | return [0] # ask to compute c 265 | else: 266 | casenum = storage_map[c][0] 267 | argvar = node.inputs[casenum+1] 268 | if compute_map[argvar][0]: 269 | argval = storage_map[argvar][0] 270 | ocontainer[0] = outtype.filter( 271 | copy.deepcopy(argval)) 272 | return [] # computations are done 273 | else: 274 | # ask to compute the input element we need 275 | return [casenum+1] 276 | thunk.lazy = True 277 | thunk.inputs = [storage_map[v] for v in node.inputs] 278 | thunk.outputs = [storage_map[v] for v in node.outputs] 279 | return thunk 280 | 281 | lazy_switch = LazySwitch() 282 | 283 | 284 | def ancestors(variable_list, blockers = None): 285 | """Return the variables that contribute to those in variable_list (inclusive). 286 | 287 | :type variable_list: list of `Variable` instances 288 | :param variable_list: 289 | output `Variable` instances from which to search backward through owners 290 | :rtype: list of `Variable` instances 291 | :returns: 292 | all input nodes, in the order found by a left-recursive depth-first search 293 | started at the nodes in `variable_list`. 294 | 295 | """ 296 | def expand(r): 297 | if r.owner and (not blockers or r not in blockers): 298 | l = list(r.owner.inputs) 299 | l.reverse() 300 | return l 301 | dfs_variables = graph.stack_search(graph.deque(variable_list), expand, 'dfs') 302 | return dfs_variables 303 | 304 | 305 | def clone_keep_replacements(i, o, replacements=None): 306 | """Duplicate nodes from i -> o inclusive. 307 | 308 | i - sequence of variables 309 | o - sequence of variables 310 | replacements - dictionary mapping each old node to its new one. 311 | (this is modified in-place as described in `clone_get_equiv`) 312 | 313 | By default new inputs are actually the same as old inputs, but 314 | when a replacements dictionary is provided this will not generally be the 315 | case. 316 | """ 317 | equiv = clone_get_equiv(i, o, replacements) 318 | return [equiv[input] for input in i], [equiv[output] for output in o] 319 | 320 | 321 | def clone_get_equiv(i, o, replacements=None): 322 | """Duplicate nodes from `i` to `o` inclusive. 323 | 324 | Returns replacements dictionary, mapping each old node to its new one. 325 | 326 | i - sequence of variables 327 | o - sequence of variables 328 | replacements - initial value for return value, modified in place. 329 | 330 | """ 331 | if replacements is None: 332 | d = {} 333 | else: 334 | d = replacements 335 | 336 | # for old, new in replacements.items(): 337 | # if new in replacements: 338 | # # I think we want to do something recursive here, but 339 | # # it feels like it might get tricky? This reminds me of the 340 | # # 'sorted_givens' branch on github/jaberg/Theano 341 | # raise NotImplementedError('think before implementing') 342 | # replacements[new] = new 343 | 344 | for input in i: 345 | if input not in d: 346 | d[input] = input 347 | 348 | for apply in graph.io_toposort(i, o): 349 | for input in apply.inputs: 350 | if input not in d: 351 | d[input] = input 352 | 353 | new_apply = apply.clone_with_new_inputs([d[i] for i in apply.inputs]) 354 | if apply not in d: 355 | d[apply] = new_apply 356 | 357 | for output, new_output in zip(apply.outputs, new_apply.outputs): 358 | if output not in d: 359 | d[output] = new_output 360 | 361 | for output in o: 362 | if output not in d: 363 | d[output] = output.clone() 364 | 365 | return d 366 | 367 | 368 | #XXX: rename -> clone_with_assignment 369 | def evaluate_with_assignments(f, assignment): 370 | dfs_variables = ancestors([f], blockers=assignment.keys()) 371 | frontier = [r for r in dfs_variables 372 | if r.owner is None or r in assignment.keys()] 373 | cloned_inputs, cloned_outputs = clone_keep_replacements(frontier, [f], 374 | replacements=assignment) 375 | out, = cloned_outputs 376 | return out 377 | 378 | 379 | # 380 | # SHAPE INFERENCE 381 | # 382 | 383 | # Shape.infer_shape 384 | if not hasattr(theano.tensor.basic.Shape, 'infer_shape'): 385 | def shape_infer_shape(self, node, ishapes): 386 | return [(node.inputs[0].ndim,)] 387 | theano.tensor.basic.Shape.infer_shape = shape_infer_shape 388 | 389 | # MakeVector.infer_shape 390 | if not hasattr(theano.tensor.opt.MakeVector, 'infer_shape'): 391 | def makevector_infer_shape(self, node, ishapes): 392 | return [(len(node.inputs),)] 393 | theano.tensor.opt.MakeVector.infer_shape = makevector_infer_shape 394 | 395 | def infer_shape_helper(v, assume_shared_size_fixed): 396 | if not isinstance(v.type, tensor.TensorType): 397 | return None 398 | 399 | if v.owner: 400 | if len(v.owner.outputs) > 1: 401 | output_pos = v.owner.outputs.index(v) 402 | else: 403 | output_pos = 0 404 | ishapes = [infer_shape_helper(i, assume_shared_size_fixed) 405 | for i in v.owner.inputs] 406 | return v.owner.op.infer_shape(v.owner, ishapes)[output_pos] 407 | 408 | 409 | if isinstance(v, theano.Constant): 410 | return v.data.shape 411 | 412 | if isinstance(v, theano.compile.SharedVariable): 413 | if assume_shared_size_fixed: 414 | return v.get_value(borrow=True).shape 415 | else: 416 | raise ValueError('shared var') 417 | 418 | def infer_shape(v, assume_shared_size_fixed=True): 419 | rval = infer_shape_helper(v, assume_shared_size_fixed) 420 | if None is rval: 421 | raise TypeError('some ancestor was not a TensorType var') 422 | def as_int(o): 423 | if hasattr(o, 'data'): 424 | return int(o.data) 425 | elif hasattr(o, 'type'): 426 | f = theano.function([], o, 427 | mode=theano.Mode(linker='py', optimizer=None)) 428 | return f() 429 | else: 430 | return int(o) 431 | return tuple([as_int(r) for r in rval]) 432 | 433 | 434 | -------------------------------------------------------------------------------- /montetheano/test_distributions.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy 3 | 4 | import theano 5 | from theano import tensor 6 | 7 | from rstreams import RandomStreams 8 | import distributions 9 | from sample import rejection_sample, mh_sample, hybridmc_sample, mh2_sample 10 | from rv import is_rv, is_raw_rv, full_log_likelihood, lpdf 11 | import for_theano 12 | from for_theano import evaluate, ancestors, infer_shape, memoized 13 | 14 | import pylab 15 | 16 | def test_dirichlet(): 17 | R = RandomStreams(234) 18 | n = R.dirichlet(alpha=numpy.ones(10,), draw_shape=(5,)) 19 | 20 | f = theano.function([], n) 21 | 22 | assert f().shape == (5, 10) 23 | 24 | 25 | def test_multinomial(): 26 | R = RandomStreams(234) 27 | n = R.multinomial(5, numpy.ones(5,)/5, draw_shape=(2,)) 28 | 29 | f = theano.function([], n) 30 | 31 | assert f().shape == (2, 5) 32 | 33 | 34 | class TestBasicBinomial(unittest.TestCase): 35 | def setUp(self): 36 | s_rng = self.s_rng = RandomStreams(23424) 37 | p = 0.5 38 | self.A = s_rng.binomial(1, p) 39 | self.B = s_rng.binomial(1, p) 40 | self.C = s_rng.binomial(1, p) 41 | self.D = self.A+self.B+self.C 42 | self.condition = tensor.ge(self.D, 2) 43 | 44 | def test_rejection_sampler(self): 45 | sample, updates = rejection_sample([self.A, self.B, self.C], self.condition) 46 | 47 | # create a runnable function 48 | sampler = theano.function(inputs=[], outputs = sample, updates = updates) 49 | 50 | # generate some data 51 | data = [] 52 | for i in range(100): 53 | data.append(sampler()) 54 | 55 | # plot histogram 56 | pylab.hist(numpy.asarray(data)) 57 | pylab.show() 58 | 59 | def test_rejection_sampler_no_cond(self): 60 | sample, updates = rejection_sample([self.A, self.B, self.C]) 61 | 62 | # create a runnable function 63 | sampler = theano.function(inputs=[], outputs = sample, updates = updates) 64 | 65 | # generate some data 66 | data = [] 67 | for i in range(100): 68 | data.append(sampler()) 69 | 70 | # plot histogram 71 | pylab.hist(numpy.asarray(data)) 72 | pylab.show() 73 | 74 | 75 | class TestQuantizedLogNormalMixture(unittest.TestCase): 76 | def setUp(self): 77 | s_rng = self.s_rng = RandomStreams(23424) 78 | self.weights = tensor.dvector() 79 | self.mus = tensor.dvector() 80 | self.sigmas = tensor.dvector() 81 | 82 | def test_draw_1(self): 83 | q = self.s_rng.quantized_lognormal_mixture( 84 | self.weights, 85 | self.mus, 86 | self.sigmas, 87 | step=2) 88 | f = theano.function([self.weights, self.mus, self.sigmas], 89 | q) 90 | assert f([1.0], [0.0], [0.01]) == 2.0 91 | assert f([0.5, 0.5], [0.0, 0.0], [0.01, 0.001]) == 2.0 92 | 93 | def test_draw_0(self): 94 | q = self.s_rng.quantized_lognormal_mixture( 95 | self.weights, 96 | self.mus, 97 | self.sigmas, 98 | step=2, 99 | draw_shape = (0,)) 100 | f = theano.function([self.weights, self.mus, self.sigmas], 101 | q) 102 | assert list(f([1.0], [0.0], [0.01])) == [] 103 | assert list(f([0.5, 0.5], [0.0, 0.0], [0.01, 0.001])) == [] 104 | 105 | 106 | 107 | # first example: http://projects.csail.mit.edu/church/wiki/Learning_as_Conditional_Inference 108 | class TestCoin(unittest.TestCase): 109 | def setUp(self): 110 | s_rng = self.s_rng = RandomStreams(23424) 111 | 112 | self.fair_prior = 0.999 113 | self.fair_coin = s_rng.binomial(1, self.fair_prior) 114 | 115 | make_coin = lambda x: s_rng.binomial((4,), 1, x) 116 | self.coin = make_coin(tensor.switch(self.fair_coin > 0.5, 0.5, 0.95)) 117 | 118 | self.data = tensor.as_tensor_variable([[1, 1, 1, 1]]) 119 | 120 | def test_tt(self): 121 | sample, updates = rejection_sample([self.fair_coin,], tensor.eq(tensor.sum(tensor.eq(self.coin, self.data)), 5)) 122 | sampler = theano.function([], sample, updates=updates) 123 | 124 | # TODO: this is super-slow, how can bher do this fast? 125 | for i in range(100): 126 | print sampler() 127 | 128 | 129 | class TestCoin2(): #unittest.TestCase): 130 | def setUp(self): 131 | s_rng = self.s_rng = RandomStreams(23424) 132 | 133 | self.repetitions = 100 134 | self.coin_weight = s_rng.uniform(low=0, high=1) 135 | self.coin = s_rng.binomial((self.repetitions,), 1, self.coin_weight) 136 | 137 | def test_tt(self): 138 | true_sampler = theano.function([self.coin_weight], self.coin) 139 | 140 | sample, ll, updates = mh_sample(self.s_rng, [self.coin_weight]) 141 | sampler = theano.function([self.coin], sample, updates=updates) 142 | 143 | for i in range(100): 144 | print sampler(true_sampler(0.9)) 145 | 146 | 147 | class TestGMM(unittest.TestCase): 148 | def setUp(self): 149 | s_rng = self.s_rng = RandomStreams(23424) 150 | 151 | self.p = tensor.scalar() 152 | self.m1 = tensor.scalar() 153 | self.m2 = tensor.scalar() 154 | self.v = tensor.scalar() 155 | 156 | self.C = s_rng.binomial(1, p) 157 | self.m = tensor.switch(self.C, self.m1, self.m2) 158 | self.D = s_rng.normal(self.m, self.v) 159 | 160 | self.D_data = tensor.as_tensor_variable([1, 1.2, 3, 3.4]) 161 | 162 | def test_tt(self): 163 | RVs = dict([(self.D, self.D_data)]) 164 | lik = full_log_likelihood(RVs) 165 | 166 | lf = theano.function([self.m1, self.m2, self.C], lik) 167 | 168 | print lf(1,3,0) 169 | print lf(1,3,1) 170 | 171 | # EM: 172 | # E-step: 173 | # C = expectation p(C | data, params) 174 | # M-step: 175 | # params = argmax p(params | C, data) 176 | # 177 | # MCMC (Gibbs): 178 | # p(params | data, C) 179 | # p(C | data, params) 180 | 181 | 182 | class TestHierarchicalNormal(): #unittest.TestCase): 183 | def setUp(self): 184 | s_rng = self.s_rng = RandomStreams(23424) 185 | a = 0.0 186 | b = 1.0 187 | c = 1.5 188 | d = 2.0 189 | 190 | self.M = s_rng.normal(a, b) 191 | self.V = s_rng.normal(c, d) 192 | self.V_ = abs(self.V) + .1 193 | self.X = s_rng.normal((4,), self.M, self.V_) 194 | 195 | self.X_data = tensor.as_tensor_variable([1, 2, 3, 2.4]) 196 | 197 | def test_sample_gets_all_rvs(self): 198 | outs, dct = sample(self.s_rng, [self.X], ()) 199 | assert outs == [self.X] 200 | assert len(dct) == 3 201 | 202 | def test_sample_can_be_generated(self): 203 | outs, dct = sample(self.s_rng, [self.X], ()) 204 | f = theano.function([], [dct[self.X], dct[self.M], 205 | dct[self.V.owner.inputs[0]]]) 206 | x0, m0, v0 = f() 207 | x1, m1, v1 = f() 208 | assert not numpy.any(x0 == x1) 209 | assert x0.shape == (4,) 210 | assert m0.shape == () 211 | assert v1.shape == () 212 | print x0, m0, v0 213 | 214 | def test_likelihood(self): 215 | outs, obs = sample(self.s_rng, [self.X], ()) 216 | 217 | lik = likelihood(obs) 218 | 219 | f = theano.function([], lik) 220 | 221 | print f() 222 | 223 | def test_mh_sample(self): 224 | sample, ll, updates = mh_sample(self.s_rng, [self.M, self.V], observations={self.X: self.X_data}, lag = 100) 225 | sampler = theano.function([], sample, updates=updates) 226 | 227 | data = [] 228 | for i in range(100): 229 | print i 230 | data.append(sampler()) 231 | 232 | pylab.subplot(211) 233 | pylab.hist(numpy.asarray(data)[:,0]) 234 | pylab.subplot(212) 235 | pylab.hist(numpy.asarray(data)[:,1]) 236 | pylab.show() 237 | 238 | 239 | class Fitting1D(unittest.TestCase): 240 | def setUp(self): 241 | self.obs = tensor.as_tensor_variable( 242 | numpy.asarray([0.0, 1.01, 0.7, 0.65, 0.3])) 243 | self.rstream = RandomStreams(234) 244 | self.n = self.rstream.normal() 245 | self.u = self.rstream.uniform() 246 | 247 | def test_normal_ml(self): 248 | up = self.rstream.ml(self.n, self.obs) 249 | p = self.rstream.params(self.n) 250 | f = theano.function([], [up[p[0]], up[p[1]]]) 251 | m,v = f() 252 | assert numpy.allclose([m,v], [.532, 0.34856276335]) 253 | 254 | def test_uniform_ml(self): 255 | up = self.rstream.ml(self.u, self.obs) 256 | p = self.rstream.params(self.u) 257 | f = theano.function([], [up[p[0]], up[p[1]]]) 258 | l,h = f() 259 | assert numpy.allclose([l,h], [0.0, 1.01]) 260 | 261 | 262 | class TestHMM(): #unittest.TestCase): 263 | def setUp(self): 264 | s_rng = self.s_rng = RandomStreams(23424) 265 | 266 | self.nr_states = 5 267 | self.nr_obs = 3 268 | 269 | self.observation_model = memoized(lambda state: s_rng.dirichlet([1]*self.nr_obs)) 270 | self.transition_model = memoized(lambda state: s_rng.dirichlet([1]*self.nr_states)) 271 | 272 | self.transition = lambda state: s_rng.multinomial(1, self.tranisition_model(state)) 273 | self.observation = lambda state: s_rng.multinomial(1, self.observation_model(state)) 274 | 275 | def transition(obs, state): 276 | return [self.observation(state), self.transition(state)] ,{}, until(state == numpy.asarray([0,0,0,0,1])) 277 | 278 | [self.sampled_words, self.sampled_states], updates = scan([], [obs, state]) 279 | 280 | def test(self): 281 | print evaluate(self.sample_words([1,0,0,0,0])) 282 | 283 | 284 | class TestGMM1(unittest.TestCase): 285 | def setUp(self): 286 | R = RandomStreams(234) 287 | weights = tensor.dvector() 288 | mus = tensor.dvector() 289 | sigmas = tensor.dvector() 290 | draw_shape = tensor.ivector() 291 | xsca = R.GMM1(weights, mus, sigmas, draw_shape=draw_shape, ndim=0) 292 | xvec = R.GMM1(weights, mus, sigmas, draw_shape=draw_shape, ndim=1) 293 | xmat = R.GMM1(weights, mus, sigmas, draw_shape=draw_shape, ndim=2) 294 | 295 | self.__dict__.update(locals()) 296 | del self.self 297 | 298 | def test1(self): 299 | assert self.xsca.ndim == 0 300 | assert self.xvec.ndim == 1 301 | assert self.xmat.ndim == 2 302 | 303 | assert self.xsca.dtype == 'float64' 304 | assert self.xvec.dtype == 'float64' 305 | assert self.xmat.dtype == 'float64' 306 | 307 | def test_mu_is_used_correctly(self): 308 | f = theano.function( 309 | [self.weights, self.mus, self.sigmas, self.draw_shape], 310 | self.xsca) 311 | assert numpy.allclose(10, f([1], [10.0], [0.0000001], [])) 312 | 313 | def test_sigma_is_used_correctly(self): 314 | f = theano.function( 315 | [self.weights, self.mus, self.sigmas, self.draw_shape], 316 | self.xvec) 317 | samples = f([1], [0.0], [10.0], [1000]) 318 | assert 9 < numpy.std(samples) < 11 319 | 320 | def test_mus_make_variance(self): 321 | f = theano.function( 322 | [self.weights, self.mus, self.sigmas, self.draw_shape], 323 | self.xvec) 324 | 325 | samples = f([.5, .5], [0.0, 1.0], [0.000001, 0.000001], [1000]) 326 | print samples.shape 327 | #import matplotlib.pyplot as plt 328 | #plt.hist(samples) 329 | #plt.show() 330 | assert .45 < numpy.mean(samples) < .55, numpy.mean(samples) 331 | assert .2 < numpy.var(samples) < .3, numpy.var(samples) 332 | 333 | def test_weights(self): 334 | f = theano.function( 335 | [self.weights, self.mus, self.sigmas, self.draw_shape], 336 | self.xvec) 337 | 338 | samples = f([.9999, .0001], [0.0, 1.0], [0.000001, 0.000001], [1000]) 339 | assert samples.shape == (1000,) 340 | #import matplotlib.pyplot as plt 341 | #plt.hist(samples) 342 | #plt.show() 343 | assert -.001 < numpy.mean(samples) < .001, numpy.mean(samples) 344 | assert numpy.var(samples) < .0001, numpy.var(samples) 345 | 346 | def test_mat_output(self): 347 | f = theano.function( 348 | [self.weights, self.mus, self.sigmas, self.draw_shape], 349 | self.xmat) 350 | 351 | samples = f([.9999, .0001], [0.0, 1.0], [0.000001, 0.000001], [40, 20]) 352 | assert samples.shape == (40, 20) 353 | assert -.001 < numpy.mean(samples) < .001, numpy.mean(samples) 354 | assert numpy.var(samples) < .0001, numpy.var(samples) 355 | 356 | def test_lpdf_scalar_one_component(self): 357 | xval = tensor.dscalar() 358 | ll = lpdf(self.xsca, xval) 359 | assert ll.ndim == 0, ll.type 360 | f = theano.function( 361 | [xval, self.weights, self.mus, self.sigmas, self.draw_shape], 362 | ll) 363 | llval = f(1.0, # x 364 | [1.], # weights 365 | [1.0], # mu 366 | [2.0], # sigma 367 | [] # shape 368 | ) 369 | assert llval.shape == () 370 | assert numpy.allclose(llval, 371 | numpy.log(1.0 / numpy.sqrt(2 * numpy.pi * 2.0**2))) 372 | 373 | def test_lpdf_scalar_N_components(self): 374 | xval = tensor.dscalar() 375 | ll = lpdf(self.xsca, xval) 376 | assert ll.ndim == 0, ll.type 377 | f = theano.function( 378 | [xval, self.weights, self.mus, self.sigmas, self.draw_shape], 379 | ll) 380 | llval = f(1.0, # x 381 | [0.25, 0.25, .5], # weights 382 | [0.0, 1.0, 2.0], # mu 383 | [1.0, 2.0, 5.0], # sigma 384 | [] # shape 385 | ) 386 | 387 | a = (.25 / numpy.sqrt(2 * numpy.pi * 1.0 ** 2) 388 | * numpy.exp(-.5 * (1.0)**2)) 389 | a += (.25 / numpy.sqrt(2 * numpy.pi * 2.0 ** 2)) 390 | a += (.5 / numpy.sqrt(2 * numpy.pi * 5.0 ** 2) 391 | * numpy.exp(-.5 * (1.0 / 5.0) ** 2)) 392 | 393 | def test_lpdf_vector_N_components(self): 394 | xval = tensor.dvector() 395 | ll = lpdf(self.xvec, xval) 396 | assert ll.ndim == 1, ll.type 397 | f = theano.function( 398 | [xval, self.weights, self.mus, self.sigmas], 399 | ll) 400 | llval = f([1.0, 0.0], # x 401 | [0.25, 0.25, .5], # weights 402 | [0.0, 1.0, 2.0], # mu 403 | [1.0, 2.0, 5.0], # sigma 404 | ) 405 | 406 | # case x = 1.0 407 | a = (.25 / numpy.sqrt(2 * numpy.pi * 1.0 ** 2) 408 | * numpy.exp(-.5 * (1.0)**2)) 409 | a += (.25 / numpy.sqrt(2 * numpy.pi * 2.0 ** 2)) 410 | a += (.5 / numpy.sqrt(2 * numpy.pi * 5.0 ** 2) 411 | * numpy.exp(-.5 * (1.0 / 5.0) ** 2)) 412 | 413 | assert llval.shape == (2,) 414 | assert numpy.allclose(llval[0], numpy.log(a)) 415 | 416 | 417 | # case x = 0.0 418 | a = (.25 / numpy.sqrt(2 * numpy.pi * 1.0 ** 2)) 419 | a += (.25 / numpy.sqrt(2 * numpy.pi * 2.0 ** 2) 420 | * numpy.exp(-.5 * (1.0 / 2.0) ** 2)) 421 | a += (.5 / numpy.sqrt(2 * numpy.pi * 5.0 ** 2) 422 | * numpy.exp(-.5 * (2.0 / 5.0) ** 2)) 423 | assert numpy.allclose(llval[1], numpy.log(a)) 424 | 425 | def test_lpdf_matrix_N_components(self): 426 | xval = tensor.dmatrix() 427 | ll = lpdf(self.xmat, xval) 428 | assert ll.ndim == 2, ll.type 429 | f = theano.function( 430 | [xval, self.weights, self.mus, self.sigmas], 431 | ll) 432 | llval = f([[1.0, 0.0, 0.0], [0, 0, 1]], # x 433 | [0.25, 0.25, .5], # weights 434 | [0.0, 1.0, 2.0], # mu 435 | [1.0, 2.0, 5.0], # sigma 436 | ) 437 | 438 | a = (.25 / numpy.sqrt(2 * numpy.pi * 1.0 ** 2) 439 | * numpy.exp(-.5 * (1.0)**2)) 440 | a += (.25 / numpy.sqrt(2 * numpy.pi * 2.0 ** 2)) 441 | a += (.5 / numpy.sqrt(2 * numpy.pi * 5.0 ** 2) 442 | * numpy.exp(-.5 * (1.0 / 5.0) ** 2)) 443 | 444 | assert llval.shape == (2,3) 445 | assert numpy.allclose(llval[0,0], numpy.log(a)) 446 | assert numpy.allclose(llval[1,2], numpy.log(a)) 447 | 448 | 449 | a = (.25 / numpy.sqrt(2 * numpy.pi * 1.0 ** 2)) 450 | a += (.25 / numpy.sqrt(2 * numpy.pi * 2.0 ** 2) 451 | * numpy.exp(-.5 * (1.0 / 2.0)**2)) 452 | a += (.5 / numpy.sqrt(2 * numpy.pi * 5.0 ** 2) 453 | * numpy.exp(-.5 * (2.0 / 5.0) ** 2)) 454 | 455 | assert numpy.allclose(llval[0,1], numpy.log(a)) 456 | assert numpy.allclose(llval[0,2], numpy.log(a)) 457 | assert numpy.allclose(llval[1,0], numpy.log(a)) 458 | assert numpy.allclose(llval[1,1], numpy.log(a)) 459 | 460 | # XXX: make sure lpdf calculation includes logsum 461 | 462 | if 0: 463 | def test_illustrate(self): 464 | f = theano.function( 465 | [self.weights, self.mus, self.sigmas, self.draw_shape], 466 | self.xvec) 467 | 468 | samples = f(#numpy.arange(16)/numpy.arange(16).sum(), 469 | numpy.ones(16)/16, 470 | numpy.arange(16), 471 | #.02 * (numpy.arange(16)+1), 472 | .2 * numpy.ones(16), 473 | [10000]) 474 | import matplotlib.pyplot as plt 475 | plt.hist(samples, bins=100) 476 | plt.show() 477 | -------------------------------------------------------------------------------- /montetheano/distributions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Math for various distributions. 3 | 4 | """ 5 | import __builtin__ 6 | import copy 7 | import logging 8 | 9 | logger = logging.getLogger(__file__) 10 | 11 | import numpy 12 | import theano 13 | import scipy 14 | import scipy.special 15 | from theano import tensor 16 | from for_theano import elemwise_cond, ancestors, infer_shape, evaluate 17 | from rstreams import rng_register, rv_dist_name 18 | 19 | 20 | # TODOs: 21 | # - Additional distributions of interest: 22 | # - Wishart 23 | # - Dirichlet process / CRP 24 | # - REFACTOR: GMM1, BGMM1, lognormal_mixture are largely cut-and-pasted 25 | 26 | # ------- 27 | # Random integer 28 | # ------- 29 | 30 | @rng_register 31 | def random_integers_sampler(rstream, low=0, high=1, ndim=None, draw_shape=None, dtype=numpy.dtype('int32')): 32 | # TODO: this should be only integer, nothing else: check dtype check boundaries 33 | 34 | low = tensor.as_tensor_variable(low) 35 | high = tensor.as_tensor_variable(high) 36 | 37 | ndim, draw_shape, bcast = tensor.raw_random._infer_ndim_bcast(ndim, draw_shape, low, high) 38 | op = tensor.raw_random.RandomFunction('random_integers', 39 | tensor.TensorType(dtype=dtype, broadcastable=bcast)) 40 | 41 | rstate = rstream.new_shared_rstate() 42 | new_rstate, out = op(rstate, draw_shape, low, high) 43 | rstream.add_default_update(out, rstate, new_rstate) 44 | return out 45 | 46 | @rng_register 47 | def random_integers_lpdf(node, sample, kw): 48 | rstate, shape, low, high = node.inputs 49 | 50 | # TODO: Check that sample is integer ! 51 | 52 | rval = elemwise_cond( 53 | numpy.array(float('-inf')), sample < low, 54 | -tensor.log(high-low+1.), sample <= high, 55 | numpy.array(float('-inf'))) 56 | return rval 57 | 58 | 59 | # ------- 60 | # Uniform 61 | # ------- 62 | 63 | @rng_register 64 | def uniform_sampler(rstream, low=0.0, high=1.0, ndim=None, draw_shape=None, 65 | dtype=theano.config.floatX): 66 | low = tensor.as_tensor_variable(low) 67 | high = tensor.as_tensor_variable(high) 68 | rstate = rstream.new_shared_rstate() 69 | 70 | # James: why is this required? fails in draw_shape is not provided 71 | # if isinstance(draw_shape, (list, tuple)): 72 | # draw_shape = tensor.stack(*draw_shape) 73 | 74 | new_rstate, out = tensor.raw_random.uniform( 75 | rstate, draw_shape, low, high, ndim, dtype) 76 | rstream.add_default_update(out, rstate, new_rstate) 77 | return out 78 | 79 | 80 | @rng_register 81 | def uniform_lpdf(node, sample, kw): 82 | rstate, shape, low, high = node.inputs 83 | rval = elemwise_cond( 84 | numpy.array(float('-inf')), sample < low, 85 | -tensor.log(high - low), sample <= high, 86 | numpy.array(float('-inf'))) 87 | return rval 88 | 89 | 90 | @rng_register 91 | def uniform_ml(node, sample, weights): 92 | rstate, shape, low, high = node.inputs 93 | return Updates({ 94 | low: sample.min(), 95 | high: sample.max()}) 96 | 97 | 98 | @rng_register 99 | def uniform_params(node): 100 | rstate, shape, low, high = node.inputs 101 | return [low, high] 102 | 103 | def uniform_get_low(v): 104 | # look in uniform_sampler to see the positions of these things 105 | if rv_dist_name(v) == 'uniform': 106 | return v.owner.inputs[2] 107 | raise ValueError('v is not a uniform draw', v) 108 | 109 | 110 | def uniform_get_high(v): 111 | # look in uniform_sampler to see the positions of these things 112 | if rv_dist_name(v) == 'uniform': 113 | return v.owner.inputs[3] 114 | raise ValueError('v is not a uniform draw', v) 115 | 116 | 117 | # ------ 118 | # Normal 119 | # ------ 120 | def normal_get_mu(v): 121 | # look in uniform_sampler to see the positions of these things 122 | if rv_dist_name(v) == 'normal': 123 | return v.owner.inputs[2] 124 | raise ValueError('v is not a normal draw', v) 125 | 126 | def normal_get_mu(v): 127 | # look in uniform_sampler to see the positions of these things 128 | if rv_dist_name(v) == 'normal': 129 | return v.owner.inputs[2] 130 | raise ValueError('v is not a normal draw', v) 131 | 132 | def normal_get_sigma(v): 133 | # look in uniform_sampler to see the positions of these things 134 | if rv_dist_name(v) == 'normal': 135 | return v.owner.inputs[3] 136 | raise ValueError('v is not a normal draw', v) 137 | 138 | @rng_register 139 | def normal_sampler(rstream, mu=0.0, sigma=1.0, draw_shape=None, ndim=None, 140 | dtype=None): 141 | mu = tensor.as_tensor_variable(mu) 142 | sigma = tensor.as_tensor_variable(sigma) 143 | rstate = rstream.new_shared_rstate() 144 | 145 | new_rstate, out = tensor.raw_random.normal( 146 | rstate, draw_shape, mu, sigma, dtype=dtype) 147 | rstream.add_default_update(out, rstate, new_rstate) 148 | return out 149 | 150 | @rng_register 151 | def normal_lpdf(node, sample, kw): 152 | # make sure that the division is done at least with float32 precision 153 | one = tensor.as_tensor_variable(numpy.asarray(1, dtype='float32')) 154 | rstate, shape, mu, sigma = node.inputs 155 | Z = tensor.sqrt(2 * numpy.pi * sigma**2) 156 | E = 0.5 * ((mu - sample)/(one*sigma))**2 157 | return - E - tensor.log(Z) 158 | 159 | @rng_register 160 | def normal_ml(node, sample, weights): 161 | rstate, shape, mu, sigma = node.inputs 162 | eps = 1e-8 163 | if weights is None: 164 | new_mu = tensor.mean(sample) 165 | new_sigma = tensor.std(sample) 166 | 167 | else: 168 | denom = tensor.maximum(tensor.sum(weights), eps) 169 | new_mu = tensor.sum(sample*weights) / denom 170 | new_sigma = tensor.sqrt( 171 | tensor.sum(weights * (sample - new_mu)**2) 172 | / denom) 173 | return Updates({ 174 | mu: new_mu, 175 | sigma: new_sigma}) 176 | 177 | @rng_register 178 | def normal_params(node): 179 | rstate, shape, mu, sigma = node.inputs 180 | return [mu, sigma] 181 | 182 | @rng_register 183 | def normal_proposal(rstream, node, sample, kw): 184 | # TODO: how do we determine the variance? 185 | return rstream.normal(sample, 0.1, draw_shape = infer_shape(node.outputs[1])) 186 | 187 | 188 | # --------- 189 | # Binomial 190 | # --------- 191 | 192 | @rng_register 193 | def binomial_sampler(rstream, n=1, p=0.5, ndim=0, draw_shape=None, dtype=theano.config.floatX): 194 | if not isinstance(n, theano.Variable): 195 | n = tensor.shared(numpy.asarray(n, dtype=int)) 196 | if not isinstance(p, theano.Variable): 197 | p = tensor.shared(numpy.asarray(p, dtype=theano.config.floatX)) 198 | rstate = rstream.new_shared_rstate() 199 | 200 | new_rstate, out = tensor.raw_random.binomial(rstate, draw_shape, n, p, dtype=dtype) 201 | rstream.add_default_update(out, rstate, new_rstate) 202 | return out 203 | 204 | @rng_register 205 | def binomial_lpdf(node, x, kw): 206 | random_state, size, n, p = node.inputs 207 | 208 | # for the n > 1 the "choose" operation is required 209 | # TODO assert n == 1 210 | 211 | return tensor.switch(tensor.eq(x, 1.), tensor.log(p), tensor.log(1. - p)) 212 | 213 | @rng_register 214 | def binomial_params(node): 215 | rstate, shape, n, p = node.inputs 216 | return [n, p] 217 | 218 | 219 | # --------- 220 | # Lognormal 221 | # --------- 222 | 223 | 224 | def lognormal_get_mu(v): 225 | # look in uniform_sampler to see the positions of these things 226 | if rv_dist_name(v) == 'lognormal': 227 | return v.owner.inputs[2] 228 | raise ValueError('v is not a lognormal draw', v) 229 | 230 | def lognormal_get_sigma(v): 231 | # look in uniform_sampler to see the positions of these things 232 | if rv_dist_name(v) == 'lognormal': 233 | return v.owner.inputs[3] 234 | raise ValueError('v is not a lognormal draw', v) 235 | 236 | @rng_register 237 | def lognormal_sampler(rstream, mu=0.0, sigma=1.0, draw_shape=None, ndim=None, dtype=theano.config.floatX): 238 | """ 239 | Sample from a log-normal distribution centered (in the log domain) on avg 240 | with the specified standard deviation (std). 241 | 242 | If the size argument is ambiguous on the number of dimensions, ndim 243 | may be a plain integer to supplement the missing information. 244 | 245 | If size is None, the output shape will be determined by the shapes 246 | of avg and std. 247 | 248 | If dtype is not specified, it will be inferred from the dtype of 249 | avg and std, but will be at least as precise as floatX. 250 | """ 251 | 252 | if 'int' in str(dtype): 253 | return quantized_lognormal_sampler(rstream, mu, sigma, 1, 254 | draw_shape, ndim, dtype=theano.config.floatX) 255 | 256 | mu = tensor.as_tensor_variable(mu) 257 | sigma = tensor.as_tensor_variable(sigma) 258 | 259 | if dtype == None: 260 | dtype = tensor.scal.upcast( 261 | theano.config.floatX, mu.dtype, sigma.dtype) 262 | rstate = rstream.new_shared_rstate() 263 | ndim, draw_shape, bcast = tensor.raw_random._infer_ndim_bcast( 264 | ndim, draw_shape, mu, sigma) 265 | op = tensor.raw_random.RandomFunction('lognormal', 266 | tensor.TensorType(dtype=dtype, broadcastable=bcast)) 267 | new_rstate, out = op(rstate, draw_shape, mu, sigma) 268 | rstream.add_default_update(out, rstate, new_rstate) 269 | return out 270 | 271 | @rng_register 272 | def lognormal_lpdf(node, x, kw): 273 | r, shape, mu, sigma = node.inputs 274 | return lognormal_lpdf_math(x, mu, sigma) 275 | 276 | def lognormal_cdf_math(x, mu, sigma, eps=1e-12): 277 | # wikipedia claims cdf is 278 | # .5 + .5 erf( log(x) - mu / sqrt(2 sigma^2)) 279 | # 280 | # the maximum is used to move negative values and 0 up to a point 281 | # where they do not cause nan or inf, but also don't contribute much 282 | # to the cdf. 283 | return .5 + .5 * tensor.erf( 284 | (tensor.log(tensor.maximum(x, eps)) - mu) 285 | / tensor.sqrt(2 * sigma**2)) 286 | 287 | def lognormal_lpdf_math(x, mu, sigma, step=1): 288 | # formula copied from wikipedia 289 | # http://en.wikipedia.org/wiki/Log-normal_distribution 290 | Z = sigma * x * numpy.sqrt(2 * numpy.pi) 291 | E = 0.5 * ((tensor.log(x) - mu) / sigma)**2 292 | return -E - tensor.log(Z) 293 | 294 | 295 | # ------------------- 296 | # Quantized Lognormal 297 | # ------------------- 298 | 299 | def quantized_lognormal_get_mu(v): 300 | # look in uniform_sampler to see the positions of these things 301 | if rv_dist_name(v) == 'quantized_lognormal': 302 | return v.owner.inputs[2] 303 | raise ValueError('v is not a quantized_lognormal draw', v) 304 | 305 | def quantized_lognormal_get_sigma(v): 306 | # look in uniform_sampler to see the positions of these things 307 | if rv_dist_name(v) == 'quantized_lognormal': 308 | return v.owner.inputs[3] 309 | raise ValueError('v is not a quantized_lognormal draw', v) 310 | 311 | def quantized_lognormal_get_round(v): 312 | # look in uniform_sampler to see the positions of these things 313 | if rv_dist_name(v) == 'quantized_lognormal': 314 | return v.owner.inputs[4] 315 | raise ValueError('v is not a quantized_lognormal draw', v) 316 | 317 | class QuantizedLognormal(theano.Op): 318 | dist_name = 'quantized_lognormal' 319 | 320 | def __init__(self, otype, destructive=False): 321 | self.destructive = destructive 322 | self.otype = otype 323 | if destructive: 324 | self.destroy_map = {0:[0]} 325 | else: 326 | self.destroy_map = {} 327 | 328 | def __eq__(self, other): 329 | return (type(self) == type(other) 330 | and self.destructive == other.destructive 331 | and self.otype == other.otype) 332 | 333 | def __hash__(self): 334 | return hash((type(self), self.destructive, self.otype)) 335 | 336 | def make_node(self, s_rstate, draw_shape, mu, sigma, step): 337 | draw_shape = tensor.as_tensor_variable(draw_shape) 338 | mu = tensor.as_tensor_variable(mu) 339 | sigma = tensor.as_tensor_variable(sigma) 340 | step = tensor.as_tensor_variable(step) 341 | return theano.gof.Apply(self, 342 | [s_rstate, draw_shape, mu, sigma, step], 343 | [s_rstate.type(), self.otype()]) 344 | 345 | def perform(self, node, inputs, outstor): 346 | rng, shp, mu, sigma, step = inputs 347 | if not self.destructive: 348 | rng = copy.deepcopy(rng) 349 | shp = tuple(shp) 350 | sample = rng.lognormal(mean=mu, sigma=sigma, size=shp) 351 | sample = numpy.ceil(sample / step) * step 352 | assert sample.shape == shp 353 | if sample.size: assert sample.min() > 0 354 | sample = self.otype.filter(sample, allow_downcast=True) 355 | if sample.size: assert sample.min() > 0 356 | outstor[0][0] = rng 357 | outstor[1][0] = sample 358 | 359 | def infer_shape(self, node, ishapes): 360 | return [None, [node.inputs[1][i] for i in range(self.otype.ndim)]] 361 | 362 | @rng_register 363 | def quantized_lognormal_sampler(rstream, mu=0.0, sigma=1.0, step=1, draw_shape=None, ndim=None, 364 | dtype=theano.config.floatX): 365 | """ 366 | Sample from a quantized log-normal distribution centered on avg with 367 | the specified standard deviation (std). 368 | 369 | If the size argument is ambiguous on the number of dimensions, ndim 370 | may be a plain integer to supplement the missing information. 371 | 372 | If size is None, the output shape will be determined by the shapes 373 | of avg and std. 374 | 375 | If dtype is not specified, it will be inferred from the dtype of 376 | avg and std, but will be at least as precise as floatX. 377 | """ 378 | 379 | mu = tensor.as_tensor_variable(mu) 380 | sigma = tensor.as_tensor_variable(sigma) 381 | step = tensor.as_tensor_variable(step) 382 | 383 | if dtype == None: 384 | dtype = tensor.scal.upcast( 385 | theano.config.floatX, 386 | mu.dtype, sigma.dtype, step.dtype) 387 | rstate = rstream.new_shared_rstate() 388 | ndim, draw_shape, bcast = tensor.raw_random._infer_ndim_bcast( 389 | ndim, draw_shape, mu, sigma) 390 | op = QuantizedLognormal( 391 | otype=tensor.TensorType(dtype=dtype, broadcastable=bcast)) 392 | new_rstate, out = op(rstate, draw_shape, mu, sigma, step) 393 | rstream.add_default_update(out, rstate, new_rstate) 394 | return out 395 | 396 | @rng_register 397 | def quantized_lognormal_lpdf(node, x, kw): 398 | r, shape, mu, sigma, step = node.inputs 399 | 400 | # casting rounds up to nearest step multiple. 401 | # so lpdf is log of integral from x-step to x+1 of P(x) 402 | 403 | # XXX: subtracting two numbers potentially very close together. 404 | return tensor.log( 405 | lognormal_cdf_math(x, mu, sigma) 406 | - lognormal_cdf_math(x - step, mu, sigma)) 407 | 408 | 409 | # ----------- 410 | # Categorical 411 | # ----------- 412 | 413 | 414 | class Categorical(theano.Op): 415 | dist_name = 'categorical' 416 | def __init__(self, destructive, otype): 417 | self.destructive = destructive 418 | self.otype = otype 419 | if destructive: 420 | self.destroy_map = {0:[0]} 421 | else: 422 | self.destroy_map = {} 423 | 424 | def __eq__(self, other): 425 | return (type(self) == type(other) 426 | and self.destructive == other.destructive 427 | and self.otype == other.otype) 428 | 429 | def __hash__(self): 430 | return hash((type(self), self.destructive, self.otype)) 431 | 432 | def make_node(self, s_rstate, p, draw_shape): 433 | p = tensor.as_tensor_variable(p) 434 | draw_shape = tensor.as_tensor_variable(draw_shape) 435 | return theano.gof.Apply(self, 436 | [s_rstate, p, draw_shape], 437 | [s_rstate.type(), self.otype()]) 438 | 439 | def perform(self, node, inputs, outstor): 440 | rng, p, shp = inputs 441 | if not self.destructive: 442 | rng = copy.deepcopy(rng) 443 | n_draws = numpy.prod(shp) 444 | sample = rng.multinomial(n=1, pvals=p, size=tuple(shp)) 445 | assert sample.shape == tuple(shp) + (len(p),) 446 | if tuple(shp): 447 | rval = numpy.sum(sample * numpy.arange(len(p)), axis=len(shp)) 448 | else: 449 | rval = [numpy.where(rng.multinomial(pvals=p, n=1))[0][0] 450 | for i in xrange(n_draws)] 451 | rval = numpy.asarray(rval, dtype=self.otype.dtype) 452 | assert (rval.shape == shp).all() 453 | #print "categorical drawing samples", rval.shape, (rval==0).sum(), (rval==1).sum() 454 | outstor[0][0] = rng 455 | outstor[1][0] = self.otype.filter(rval, allow_downcast=True) 456 | 457 | def infer_shape(self, node, ishapes): 458 | return [None, [node.inputs[2][i] for i in range(self.otype.ndim)]] 459 | 460 | 461 | @rng_register 462 | def categorical_sampler(rstream, p, draw_shape, dtype='int32'): 463 | if not isinstance(p, theano.Variable): 464 | p = tensor._shared(numpy.asarray(p, dtype=theano.config.floatX)) 465 | if p.ndim != 1: 466 | raise NotImplementedError() 467 | if draw_shape.ndim != 1: 468 | raise TypeError() 469 | op = Categorical(False, 470 | tensor.TensorType( 471 | broadcastable=(False,)* tensor.get_vector_length(draw_shape), 472 | dtype=dtype)) 473 | rstate = rstream.new_shared_rstate() 474 | new_rstate, out = op(rstate, p, draw_shape) 475 | rstream.add_default_update(out, rstate, new_rstate) 476 | return out 477 | 478 | 479 | @rng_register 480 | def categorical_lpdf(node, sample, kw): 481 | """ 482 | Return a random integer from 0 .. N-1 inclusive according to the 483 | probabilities p[0] .. P[N-1]. 484 | 485 | This is formally equivalent to numpy.where(multinomial(n=1, p)) 486 | """ 487 | # WARNING: I think the p[-1] is not used, but assumed to be p[:-1].sum() 488 | s_rstate, p, draw_shape = node.inputs 489 | return p[sample] 490 | 491 | 492 | # --------- 493 | # LogGamma helper Op 494 | # --------- 495 | 496 | # class PolyGamma(theano.Op): 497 | # def __eq__(self, other): 498 | # return type(self) == type(other) 499 | # 500 | # def __hash__(self): 501 | # return hash(type(self)) 502 | # 503 | # def make_node(self, x): 504 | # x_ = tensor.as_tensor_variable(x).astype(theano.config.floatX) 505 | # return theano.Apply(self, 506 | # inputs=[x_], 507 | # outputs=[x_.type()]) 508 | # 509 | # def perform(self, node, inputs, output_storage): 510 | # x, = inputs 511 | # output_storage[0][0] = numpy.asarray(scipy.special.polygamma(0, x), dtype=node.outputs[0].dtype) 512 | # 513 | # polyGamma = PolyGamma() 514 | 515 | class LogGamma(theano.Op): 516 | def __eq__(self, other): 517 | return type(self) == type(other) 518 | 519 | def __hash__(self): 520 | return hash(type(self)) 521 | 522 | def make_node(self, x): 523 | x_ = tensor.as_tensor_variable(x).astype(theano.config.floatX) 524 | return theano.Apply(self, 525 | inputs=[x_], 526 | outputs=[x_.type()]) 527 | 528 | def perform(self, node, inputs, output_storage): 529 | x, = inputs 530 | output_storage[0][0] = numpy.asarray(scipy.special.gammaln(x), dtype=node.outputs[0].dtype) 531 | 532 | # TODO: is this correct ? 533 | # def grad(self, inp, grads): 534 | # s, = inp 535 | # dt, = grads 536 | # return [polyGamma(s)*dt] 537 | 538 | logGamma = LogGamma() 539 | 540 | # --------- 541 | # Dirichlet 542 | # --------- 543 | 544 | @rng_register 545 | def dirichlet_sampler(rstream, alpha, draw_shape=None, ndim=None, dtype=theano.config.floatX): 546 | alpha = tensor.as_tensor_variable(alpha) 547 | tmp = alpha.T[0].T 548 | 549 | alpha = tensor.as_tensor_variable(alpha).astype(theano.config.floatX) 550 | if dtype == None: 551 | dtype = tensor.scal.upcast(theano.config.floatX, alpha.dtype) 552 | 553 | ndim, draw_shape, bcast = tensor.raw_random._infer_ndim_bcast(ndim, draw_shape, tmp) 554 | bcast = bcast+(alpha.type.broadcastable[-1],) 555 | 556 | op = tensor.raw_random.RandomFunction('dirichlet', 557 | tensor.TensorType(dtype=dtype, broadcastable=bcast), ndim_added=1) 558 | 559 | rstate = rstream.new_shared_rstate() 560 | new_rstate, out = op(rstate, draw_shape, alpha) 561 | rstream.add_default_update(out, rstate, new_rstate) 562 | return out 563 | 564 | def logBeta(alpha): 565 | return tensor.sum(logGamma(alpha)) - logGamma(tensor.sum(alpha)) 566 | 567 | @rng_register 568 | def dirichlet_lpdf(node, sample, kw): 569 | r, shape, alpha = node.inputs 570 | 571 | # assert sum(sample) == 1 572 | 573 | stable = tensor.eq(0, (tensor.sum(alpha <= 0.) + tensor.sum(sample <= 0.))) 574 | ll = -logBeta(alpha) + tensor.sum(tensor.log(sample)*(alpha-1.), axis=0) 575 | return tensor.switch(stable, ll, tensor.as_tensor_variable(float('-inf'))) 576 | 577 | # --------- 578 | # Gamma 579 | # --------- 580 | 581 | @rng_register 582 | def gamma_sampler(rstream, k, theta, draw_shape=None, ndim=None, dtype=theano.config.floatX): 583 | k = tensor.as_tensor_variable(k) 584 | theta = tensor.as_tensor_variable(theta) 585 | if dtype == None: 586 | dtype = tensor.scal.upcast(theano.config.floatX, k.dtype, theta.dtype) 587 | 588 | ndim, draw_shape, bcast = tensor.raw_random._infer_ndim_bcast(ndim, draw_shape, k, theta) 589 | op = tensor.raw_random.RandomFunction('gamma', 590 | tensor.TensorType(dtype=dtype, broadcastable=bcast)) 591 | 592 | rstate = rstream.new_shared_rstate() 593 | new_rstate, out = op(rstate, draw_shape, k, theta) 594 | rstream.add_default_update(out, rstate, new_rstate) 595 | return out 596 | 597 | @rng_register 598 | def gamma_lpdf(node, x, kw): 599 | r, shape, k, theta = node.inputs 600 | 601 | return tensor.log(x)*(k-1.) - x/theta - tensor.log(theta)*k - logGamma(k) 602 | 603 | # --------- 604 | # Multinomial 605 | # --------- 606 | 607 | @rng_register 608 | def multinomial_sampler(rstream, n=1, p=[0.5, 0.5], draw_shape=None, ndim=None, dtype=theano.config.floatX): 609 | if not isinstance(n, theano.Variable): 610 | n = tensor.shared(numpy.asarray(n, dtype=int)) 611 | if not isinstance(p, theano.Variable): 612 | p = tensor.shared(numpy.asarray(p, dtype=theano.config.floatX)) 613 | rstate = rstream.new_shared_rstate() 614 | 615 | new_rstate, out = tensor.raw_random.multinomial(rstate, draw_shape, n, p, dtype=dtype) 616 | rstream.add_default_update(out, rstate, new_rstate) 617 | return out 618 | 619 | def logFactorial(x): 620 | return logGamma(x+1.) 621 | 622 | @rng_register 623 | def multinomial_lpdf(node, x, kw): 624 | r, shape, n, p = node.inputs 625 | 626 | # TODO: how do I check this ? 627 | # assert n == tensor.sum(x) 628 | 629 | x = tensor.as_tensor_variable(x).astype(theano.config.floatX) 630 | 631 | return logFactorial(n) - tensor.sum(logFactorial(x), axis=1) + tensor.sum(tensor.log(p)*x, axis=1) 632 | 633 | # some weirdness because raw_random uses a helper function 634 | # TODO: is there a clear way to fix this ? 635 | @rng_register 636 | def multinomial_helper_sampler(*args, **kwargs): 637 | return multinomial_sampler(*args, **kwargs) 638 | 639 | @rng_register 640 | def multinomial_helper_lpdf(*args, **kwargs): 641 | return multinomial_lpdf(*args, **kwargs) 642 | 643 | # -------------------------------------------------- 644 | # Dirichlet-Multinomial 645 | # 646 | # Only the LPDF is implemented, the sampler is bogus. Could be use as an optimization to remove 647 | # a dirichlet-multinomial to a single op 648 | # --------- 649 | 650 | class DM(theano.Op): 651 | dist_name = 'DM' 652 | def __init__(self, otype): 653 | self.otype = otype 654 | 655 | def make_node(self, s_rstate, alpha): 656 | alpha = tensor.as_tensor_variable(alpha) 657 | return theano.gof.Apply(self, 658 | [s_rstate, alpha], 659 | [s_rstate.type(), self.otype()]) 660 | 661 | def perform(self, node, inputs, output_storage): 662 | raise NotImplemented 663 | 664 | @rng_register 665 | def DM_sampler(rstream, alpha, draw_shape=None, ndim=None, dtype=None): 666 | shape = infer_shape(rstream.dirichlet(alpha, draw_shape=draw_shape)) 667 | rstate = rstream.new_shared_rstate() 668 | op = DM(tensor.TensorType(broadcastable=(False,)* tensor.get_vector_length(shape), dtype=theano.config.floatX)) 669 | rs, out = op(rstate, alpha) 670 | rstream.add_default_update(out, rstate, rs) 671 | return out 672 | 673 | @rng_register 674 | def DM_lpdf(node, sample, kw): 675 | r, alpha = node.inputs 676 | return logBeta(sample + alpha) - logBeta(alpha) 677 | 678 | 679 | # -------------------------------- 680 | # Gaussian Mixture Model 1D (GMM1) 681 | # -------------------------------- 682 | 683 | class GMM1(theano.Op): 684 | """ 685 | 1-dimensional Gaussian Mixture - distributed random variable 686 | 687 | weights - vector (M,) of prior mixture component probabilities 688 | mus - vector (M, ) of component centers 689 | sigmas - vector (M,) of component variances (already squared) 690 | """ 691 | 692 | dist_name = 'GMM1' 693 | def __init__(self, otype): 694 | self.otype = otype 695 | 696 | def __hash__(self): 697 | return hash((type(self), self.otype)) 698 | 699 | def __eq__(self, other): 700 | return type(self) == type(other) and self.otype == other.otype 701 | 702 | def make_node(self, s_rstate, weights, mus, sigmas, draw_shape): 703 | weights = tensor.as_tensor_variable(weights) 704 | mus = tensor.as_tensor_variable(mus) 705 | sigmas = tensor.as_tensor_variable(sigmas) 706 | if weights.ndim != 1: 707 | raise TypeError('weights', weights) 708 | if mus.ndim != 1: 709 | raise TypeError('mus', mus) 710 | if sigmas.ndim != 1: 711 | raise TypeError('sigmas', sigmas) 712 | return theano.gof.Apply(self, 713 | [s_rstate, weights, mus, sigmas, draw_shape], 714 | [s_rstate.type(), self.otype()]) 715 | 716 | def perform(self, node, inputs, output_storage): 717 | rstate, weights, mus, sigmas, draw_shape = inputs 718 | 719 | n_samples = numpy.prod(draw_shape) 720 | rstate = copy.copy(rstate) 721 | 722 | active = numpy.argmax( 723 | rstate.multinomial(1, weights, (n_samples,)), 724 | axis=1) 725 | assert len(active) == n_samples 726 | samples = rstate.normal(loc=mus[active], scale=sigmas[active]) 727 | samples = numpy.asarray( 728 | numpy.reshape(samples, draw_shape), 729 | dtype=self.otype.dtype) 730 | output_storage[0][0] = rstate 731 | output_storage[1][0] = samples 732 | 733 | def infer_shape(self, node, ishapes): 734 | rstate, weights, mus, sigmas, draw_shape = node.inputs 735 | return [None, [draw_shape[i] for i in range(self.otype.ndim)]] 736 | 737 | @rng_register 738 | def GMM1_sampler(rstream, weights, mus, sigmas, 739 | draw_shape=None, ndim=None, dtype=None): 740 | rstate = rstream.new_shared_rstate() 741 | 742 | # shape prep 743 | if draw_shape is None: 744 | raise NotImplementedError() 745 | elif draw_shape is tensor.as_tensor_variable(draw_shape): 746 | shape = draw_shape 747 | if ndim is None: 748 | ndim = tensor.get_vector_length(shape) 749 | else: 750 | shape = tensor.hstack(*draw_shape) 751 | if ndim is None: 752 | ndim = len(draw_shape) 753 | assert tensor.get_vector_length(shape) == ndim 754 | 755 | # XXX: be smarter about inferring broadcastable 756 | op = GMM1( 757 | tensor.TensorType( 758 | broadcastable=(False,) * ndim, 759 | dtype=theano.config.floatX if dtype is None else dtype)) 760 | rs, out = op(rstate, weights, mus, sigmas, shape) 761 | rstream.add_default_update(out, rstate, rs) 762 | return out 763 | 764 | @rng_register 765 | def GMM1_lpdf(node, sample, kw): 766 | r, weights, mus, sigmas, draw_shape = node.inputs 767 | assert weights.ndim == 1 768 | assert mus.ndim == 1 769 | assert sigmas.ndim == 1 770 | _sample = sample 771 | if sample.ndim != 1: 772 | sample = sample.flatten() 773 | 774 | dist = (sample.dimshuffle(0, 'x') - mus) 775 | mahal = ((dist ** 2) / (sigmas ** 2)) 776 | # POSTCONDITION: mahal.shape == (n_samples, n_components) 777 | 778 | Z = tensor.sqrt(2 * numpy.pi * sigmas**2) 779 | rval = tensor.log(tensor.sum( 780 | tensor.exp(-.5 * mahal) * weights / Z, 781 | axis=1)) 782 | if not sample is _sample: 783 | rval = rval.reshape(_sample.shape) 784 | assert rval.ndim != 1 785 | return rval 786 | 787 | 788 | # ----------------------------------------- 789 | # Bounded Gaussian Mixture Model 1D (BGMM1) 790 | # ----------------------------------------- 791 | 792 | class BGMM1(theano.Op): 793 | """ 794 | Bounded 1-dimensional Gaussian Mixture - distributed random variable 795 | 796 | weights - vector (M,) of prior mixture component probabilities 797 | mus - vector (M, ) of component centers 798 | sigmas - vector (M,) of component variances (already squared) 799 | low - scalar 800 | high - scalar 801 | 802 | This density is a Gaussian Mixture model truncated both below (`low`) and 803 | above (`high`). 804 | """ 805 | 806 | dist_name = 'BGMM1' 807 | def __init__(self, otype): 808 | self.otype = otype 809 | 810 | def __hash__(self): 811 | return hash((type(self), self.otype)) 812 | 813 | def __eq__(self, other): 814 | return type(self) == type(other) and self.otype == other.otype 815 | 816 | def make_node(self, s_rstate, weights, mus, sigmas, low, high, draw_shape): 817 | weights = tensor.as_tensor_variable(weights) 818 | mus = tensor.as_tensor_variable(mus) 819 | sigmas = tensor.as_tensor_variable(sigmas) 820 | low = tensor.as_tensor_variable(low) 821 | high = tensor.as_tensor_variable(high) 822 | if weights.ndim != 1: 823 | raise TypeError('weights', weights) 824 | if mus.ndim != 1: 825 | raise TypeError('mus', mus) 826 | if sigmas.ndim != 1: 827 | raise TypeError('sigmas', sigmas) 828 | if low.ndim != 0: 829 | raise TypeError('low', low) 830 | if high.ndim != 0: 831 | raise TypeError('low', high) 832 | return theano.gof.Apply(self, 833 | [s_rstate, weights, mus, sigmas, low, high, draw_shape], 834 | [s_rstate.type(), self.otype()]) 835 | 836 | def perform(self, node, inputs, output_storage): 837 | rstate, weights, mus, sigmas, low, high, draw_shape = inputs 838 | 839 | n_samples = numpy.prod(draw_shape) 840 | n_components = len(weights) 841 | rstate = copy.copy(rstate) 842 | 843 | # rejection sampling, one sample at a time... 844 | samples = [] 845 | while len(samples) < n_samples: 846 | active = numpy.argmax(rstate.multinomial(1, weights)) 847 | draw = rstate.normal(loc=mus[active], scale=sigmas[active]) 848 | if low < draw < high: 849 | samples.append(draw) 850 | samples = numpy.asarray( 851 | numpy.reshape(samples, draw_shape), 852 | dtype=self.otype.dtype) 853 | #print "BGMM drawing samples", samples.shape, samples.flatten()[:4] 854 | output_storage[0][0] = rstate 855 | output_storage[1][0] = samples 856 | 857 | def infer_shape(self, node, ishapes): 858 | rstate, weights, mus, sigmas, low, high, draw_shape = node.inputs 859 | return [None, [draw_shape[i] for i in range(self.otype.ndim)]] 860 | 861 | @rng_register 862 | def BGMM1_sampler(rstream, weights, mus, sigmas, low, high, 863 | draw_shape=None, ndim=None, dtype=None): 864 | rstate = rstream.new_shared_rstate() 865 | 866 | # shape prep 867 | if draw_shape is None: 868 | raise NotImplementedError() 869 | elif draw_shape is tensor.as_tensor_variable(draw_shape): 870 | shape = draw_shape 871 | if ndim is None: 872 | ndim = tensor.get_vector_length(shape) 873 | else: 874 | shape = tensor.hstack(*draw_shape) 875 | if ndim is None: 876 | ndim = len(draw_shape) 877 | assert tensor.get_vector_length(shape) == ndim 878 | 879 | # XXX: be smarter about inferring broadcastable 880 | op = BGMM1( 881 | tensor.TensorType( 882 | broadcastable=(False,) * ndim, 883 | dtype=theano.config.floatX if dtype is None else dtype)) 884 | rs, out = op(rstate, weights, mus, sigmas, low, high, shape) 885 | rstream.add_default_update(out, rstate, rs) 886 | return out 887 | 888 | @rng_register 889 | def BGMM1_lpdf(node, sample, kw): 890 | r, weights, mus, sigmas, low, high, draw_shape = node.inputs 891 | assert weights.ndim == 1 892 | assert mus.ndim == 1 893 | assert sigmas.ndim == 1 894 | _sample = sample 895 | if sample.ndim != 1: 896 | sample = sample.flatten() 897 | 898 | erf = theano.tensor.erf 899 | 900 | effective_weights = 0.5 * weights * ( 901 | erf((high - mus) / sigmas) - erf((low - mus) / sigmas)) 902 | 903 | dist = (sample.dimshuffle(0, 'x') - mus) 904 | mahal = ((dist ** 2) / (sigmas ** 2)) 905 | # POSTCONDITION: mahal.shape == (n_samples, n_components) 906 | 907 | Z = tensor.sqrt(2 * numpy.pi * sigmas**2) 908 | rval = tensor.log( 909 | tensor.sum( 910 | tensor.true_div( 911 | tensor.exp(-.5 * mahal) * weights, 912 | Z * effective_weights.sum()), 913 | axis=1)) 914 | if not sample is _sample: 915 | rval = rval.reshape(_sample.shape) 916 | assert rval.ndim != 1 917 | return rval 918 | 919 | 920 | # ------------------------- 921 | # Mixture of Lognormal (1D) 922 | # ------------------------- 923 | 924 | class LognormalMixture(theano.Op): 925 | """ 926 | 1-dimensional Gaussian Mixture - distributed random variable 927 | 928 | weights - vector (M,) of prior mixture component probabilities 929 | mus - vector (M, ) of component centers 930 | sigmas - vector (M,) of component variances (already squared) 931 | """ 932 | 933 | dist_name = 'lognormal_mixture' 934 | def __init__(self, otype): 935 | self.otype = otype 936 | 937 | def __hash__(self): 938 | return hash((type(self), self.otype)) 939 | 940 | def __eq__(self, other): 941 | return type(self) == type(other) and self.otype == other.otype 942 | 943 | def make_node(self, s_rstate, weights, mus, sigmas, draw_shape): 944 | weights = tensor.as_tensor_variable(weights) 945 | mus = tensor.as_tensor_variable(mus) 946 | sigmas = tensor.as_tensor_variable(sigmas) 947 | if weights.ndim != 1: 948 | raise TypeError('weights', weights) 949 | if mus.ndim != 1: 950 | raise TypeError('mus', mus) 951 | if sigmas.ndim != 1: 952 | raise TypeError('sigmas', sigmas) 953 | return theano.gof.Apply(self, 954 | [s_rstate, weights, mus, sigmas, draw_shape], 955 | [s_rstate.type(), self.otype()]) 956 | 957 | def perform(self, node, inputs, output_storage): 958 | rstate, weights, mus, sigmas, draw_shape = inputs 959 | 960 | n_samples = numpy.prod(draw_shape) 961 | n_components = len(weights) 962 | rstate = copy.copy(rstate) 963 | 964 | active = numpy.argmax( 965 | rstate.multinomial(1, weights, (n_samples,)), 966 | axis=1) 967 | assert len(active) == n_samples 968 | samples = numpy.exp( 969 | rstate.normal( 970 | loc=mus[active], 971 | scale=sigmas[active])) 972 | if not numpy.all(numpy.isfinite(samples)): 973 | logger.warning('overflow in LognormalMixture') 974 | logger.warning(' mu = %s' % str(mus[active])) 975 | logger.warning(' sigma = %s' % str(sigmas[active])) 976 | logger.warning(' samples = %s' % str(samples)) 977 | samples = numpy.asarray( 978 | numpy.reshape(samples, draw_shape), 979 | dtype=self.otype.dtype) 980 | if not numpy.all(numpy.isfinite(samples)): 981 | logger.warning('overflow in LognormalMixture after astype') 982 | logger.warning(' mu = %s' % str(mus[active])) 983 | logger.warning(' sigma = %s' % str(sigmas[active])) 984 | logger.warning(' samples = %s' % str(samples)) 985 | output_storage[0][0] = rstate 986 | output_storage[1][0] = samples 987 | 988 | def infer_shape(self, node, ishapes): 989 | rstate, weights, mus, sigmas, draw_shape = node.inputs 990 | return [None, [draw_shape[i] for i in range(self.otype.ndim)]] 991 | 992 | @rng_register 993 | def lognormal_mixture_sampler(rstream, weights, mus, sigmas, 994 | draw_shape=None, ndim=None, dtype=None): 995 | rstate = rstream.new_shared_rstate() 996 | # shape prep 997 | if draw_shape is None: 998 | raise NotImplementedError() 999 | elif draw_shape is tensor.as_tensor_variable(draw_shape): 1000 | shape = draw_shape 1001 | if ndim is None: 1002 | ndim = tensor.get_vector_length(shape) 1003 | else: 1004 | shape = tensor.hstack(*draw_shape) 1005 | if ndim is None: 1006 | ndim = len(draw_shape) 1007 | assert tensor.get_vector_length(shape) == ndim 1008 | 1009 | # XXX: be smarter about inferring broadcastable 1010 | op = LognormalMixture( 1011 | tensor.TensorType( 1012 | broadcastable=(False,) * ndim, 1013 | dtype=theano.config.floatX if dtype is None else dtype)) 1014 | rs, out = op(rstate, weights, mus, sigmas, shape) 1015 | rstream.add_default_update(out, rstate, rs) 1016 | return out 1017 | 1018 | @rng_register 1019 | def lognormal_mixture_lpdf(node, sample, kw): 1020 | r, weights, mus, sigmas, draw_shape = node.inputs 1021 | assert weights.ndim == 1 1022 | assert mus.ndim == 1 1023 | assert sigmas.ndim == 1 1024 | _sample = sample 1025 | if sample.ndim != 1: 1026 | sample = sample.flatten() 1027 | 1028 | # compute the lpdf of each sample under each component 1029 | lpdfs = lognormal_lpdf_math(sample.dimshuffle(0, 'x'), mus, sigmas) 1030 | assert lpdfs.ndim == 2 1031 | 1032 | # XXX: Make sure this is done in a numerically good way 1033 | rval = tensor.log( 1034 | tensor.sum( 1035 | tensor.exp(lpdfs) * weights, 1036 | axis=1)) 1037 | 1038 | if not sample is _sample: 1039 | rval = rval.reshape(_sample.shape) 1040 | assert rval.ndim != 1 1041 | return rval 1042 | 1043 | 1044 | # ------------------------- 1045 | # Mixture of Lognormal (1D) 1046 | # ------------------------- 1047 | 1048 | class QuantizedLognormalMixture(theano.Op): 1049 | """ 1050 | 1-dimensional Gaussian Mixture - distributed random variable 1051 | 1052 | weights - vector (M,) of prior mixture component probabilities 1053 | mus - vector (M, ) of component centers 1054 | sigmas - vector (M,) of component variances (already squared) 1055 | """ 1056 | 1057 | dist_name = 'quantized_lognormal_mixture' 1058 | def __init__(self, otype): 1059 | self.otype = otype 1060 | 1061 | def __hash__(self): 1062 | return hash((type(self), self.otype)) 1063 | 1064 | def __eq__(self, other): 1065 | return type(self) == type(other) and self.otype == other.otype 1066 | 1067 | def make_node(self, s_rstate, draw_shape, weights, mus, sigmas, step): 1068 | weights = tensor.as_tensor_variable(weights) 1069 | mus = tensor.as_tensor_variable(mus) 1070 | sigmas = tensor.as_tensor_variable(sigmas) 1071 | step = tensor.as_tensor_variable(step) 1072 | if weights.ndim != 1: 1073 | raise TypeError('weights', weights) 1074 | if mus.ndim != 1: 1075 | raise TypeError('mus', mus) 1076 | if sigmas.ndim != 1: 1077 | raise TypeError('sigmas', sigmas) 1078 | if step.ndim != 0: 1079 | raise TypeError('step', step) 1080 | return theano.gof.Apply(self, 1081 | [s_rstate, draw_shape, weights, mus, sigmas, step], 1082 | [s_rstate.type(), self.otype()]) 1083 | 1084 | def perform(self, node, inputs, output_storage): 1085 | rstate, draw_shape, weights, mus, sigmas, step = inputs 1086 | 1087 | if len(weights) != len(mus): 1088 | raise ValueError('length mismatch between weights and mus', 1089 | (weights.shape, mus.shape)) 1090 | if len(weights) != len(sigmas): 1091 | raise ValueError('length mismatch between weights and sigmas', 1092 | (weights.shape, sigmas.shape)) 1093 | if len(weights) == 0: 1094 | raise ValueError('length of weights vector must be positive', 1095 | weights.shape) 1096 | 1097 | n_samples = numpy.prod(draw_shape) 1098 | n_components = len(weights) 1099 | # XXX: add destructive version 1100 | rstate = copy.copy(rstate) 1101 | 1102 | if n_samples == 0: 1103 | samples = numpy.empty((0,), dtype=node.outputs[1].dtype) 1104 | elif n_samples == 1: 1105 | active = numpy.argmax(rstate.multinomial(1, weights)) 1106 | samples = rstate.lognormal( 1107 | mean=mus[active], 1108 | sigma=sigmas[active]) 1109 | samples = numpy.asarray(numpy.ceil(samples / step) * step) 1110 | assert samples.ndim == 0 1111 | if len(draw_shape) == 0: 1112 | samples.shape = () 1113 | else: 1114 | samples.shape = (1,) 1115 | else: 1116 | active = numpy.argmax( 1117 | rstate.multinomial(1, weights, (n_samples,)), 1118 | axis=1) 1119 | assert len(active) == n_samples 1120 | samples = rstate.lognormal( 1121 | mean=mus[active], 1122 | sigma=sigmas[active]) 1123 | assert len(samples) == n_samples 1124 | samples = numpy.ceil(samples / step) * step 1125 | samples.shape = tuple(draw_shape) 1126 | 1127 | if not numpy.all(numpy.isfinite(samples)): 1128 | logger.warning('overflow in LognormalMixture after astype') 1129 | logger.warning(' mu = %s' % str(mus[active])) 1130 | logger.warning(' sigma = %s' % str(sigmas[active])) 1131 | logger.warning(' samples = %s' % str(samples)) 1132 | 1133 | if samples.size: 1134 | assert samples.min() > 0 1135 | samples = self.otype.filter(samples, allow_downcast=True) 1136 | if samples.size: 1137 | assert samples.min() > 0 1138 | 1139 | output_storage[0][0] = rstate 1140 | output_storage[1][0] = samples 1141 | 1142 | def infer_shape(self, node, ishapes): 1143 | rstate, draw_shape, weights, mus, sigmas, step = node.inputs 1144 | return [None, [draw_shape[i] for i in range(self.otype.ndim)]] 1145 | 1146 | @rng_register 1147 | def quantized_lognormal_mixture_sampler(rstream, weights, mus, sigmas, step, 1148 | draw_shape=None, ndim=None, dtype=None): 1149 | rstate = rstream.new_shared_rstate() 1150 | # shape prep 1151 | if draw_shape is None: 1152 | raise NotImplementedError() 1153 | elif draw_shape is tensor.as_tensor_variable(draw_shape): 1154 | shape = draw_shape 1155 | if ndim is None: 1156 | ndim = tensor.get_vector_length(shape) 1157 | elif tuple(draw_shape) == (): 1158 | ndim = 0 1159 | shape = tensor.as_tensor_variable( 1160 | numpy.asarray([], dtype='int')) 1161 | else: 1162 | shape = tensor.stack(*draw_shape) 1163 | if ndim is None: 1164 | ndim = len(draw_shape) 1165 | assert tensor.get_vector_length(shape) == ndim 1166 | 1167 | # XXX: be smarter about inferring broadcastable 1168 | op = QuantizedLognormalMixture( 1169 | tensor.TensorType( 1170 | broadcastable=(False,) * ndim, 1171 | dtype=theano.config.floatX if dtype is None else dtype)) 1172 | rs, out = op(rstate, shape, weights, mus, sigmas, step) 1173 | rstream.add_default_update(out, rstate, rs) 1174 | return out 1175 | 1176 | @rng_register 1177 | def quantized_lognormal_mixture_lpdf(node, sample, kw): 1178 | r, draw_shape, weights, mus, sigmas, step = node.inputs 1179 | assert weights.ndim == 1 1180 | assert mus.ndim == 1 1181 | assert sigmas.ndim == 1 1182 | assert step.ndim == 0 1183 | _sample = sample 1184 | if sample.ndim != 1: 1185 | sample = sample.flatten() 1186 | 1187 | # compute the lpdf of each sample under each component 1188 | lpdfs = tensor.log( 1189 | lognormal_cdf_math( 1190 | sample.dimshuffle(0, 'x'), 1191 | mus, 1192 | sigmas) 1193 | - lognormal_cdf_math( 1194 | sample.dimshuffle(0, 'x') - step, 1195 | mus, 1196 | sigmas) 1197 | + 1.0e-7) 1198 | assert lpdfs.ndim == 2 1199 | # XXX: Make sure this is done in a numerically good way 1200 | rval = tensor.log( 1201 | tensor.sum( 1202 | tensor.exp(lpdfs) * weights, 1203 | axis=1)) 1204 | if not sample is _sample: 1205 | rval = rval.reshape(_sample.shape) 1206 | assert rval.ndim != 1 1207 | return rval 1208 | --------------------------------------------------------------------------------