├── .gitignore
├── requirements.txt
├── montetheano
    ├── .gitignore
    ├── __init__.py
    ├── README.txt
    ├── examples
    │   ├── gaussian_mixture_model.py
    │   ├── bayes_occams_razor.py
    │   ├── max_likelihood_logistic_regression.py
    │   ├── bayesian_linear_regression.py
    │   ├── bayesian_logistic_regression.py
    │   ├── hierarchical_dirichlet.py
    │   └── latent_dirichlet_allocation.py
    ├── max_lik.py
    ├── utils.py
    ├── test_for_theano.py
    ├── test_rv.py
    ├── rv.py
    ├── rstreams.py
    ├── sample.py
    ├── for_theano.py
    ├── test_distributions.py
    └── distributions.py
├── setup.py
└── README.rst


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | theano
2 | 


--------------------------------------------------------------------------------
/montetheano/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.swp
3 | 


--------------------------------------------------------------------------------
/montetheano/__init__.py:
--------------------------------------------------------------------------------
1 | from rstreams import RandomStreams
2 | import distributions # populates registry
3 | from rv import energy
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | from distutils.core import setup
4 | 
5 | setup(name='montetheano',
6 |       packages=['montetheano'],
7 |      )
8 | 


--------------------------------------------------------------------------------
/montetheano/README.txt:
--------------------------------------------------------------------------------
 1 | Files in here:
 2 | 
 3 | __init__.py   - empty
 4 | for_theano.py - things that could maybe migrate upstream.
 5 | sample.py     - algorithms for drawing samples by MCMC
 6 | 
 7 | rstreams.py - RandomStreams and associated registries
 8 | distributions.py - distribution-specific code (normal, bernoulli, etc.)
 9 | rv.py - functions for working with random variables.
10 | 
11 | 


--------------------------------------------------------------------------------
/montetheano/examples/gaussian_mixture_model.py:
--------------------------------------------------------------------------------
 1 | import numpy, pylab
 2 | import theano
 3 | from theano import tensor
 4 | from rstreams import RandomStreams
 5 | import distributions
 6 | from sample import mh2_sample
 7 | from rv import full_log_likelihood
 8 | 
 9 | s_rng = RandomStreams(3424)
10 | 
11 | p = s_rng.dirichlet(numpy.asarray([1, 1]))[0]
12 | m1 = s_rng.uniform(low=-5, high=5)
13 | m2 = s_rng.uniform(low=-5, high=5)
14 | v = s_rng.uniform(low=0, high=1)
15 | 
16 | C = s_rng.binomial(1, p, draw_shape=(4,))
17 | m = tensor.switch(C, m1, m2)
18 | D = s_rng.normal(m, v, draw_shape=(4,))        
19 | 
20 | D_data = numpy.asarray([1, 1.2, 3, 3.4], dtype=theano.config.floatX)
21 | 
22 | givens = dict([(D, D_data)])
23 | sampler = mh2_sample(s_rng, [p, m1, m2, v], givens)            
24 | 
25 | samples = sampler(200, 1000, 100)
26 | print samples[0].mean(), samples[1].mean(), samples[2].mean(), samples[3].mean()
27 | 


--------------------------------------------------------------------------------
/montetheano/examples/bayes_occams_razor.py:
--------------------------------------------------------------------------------
 1 | import numpy, pylab
 2 | import theano
 3 | from theano import tensor
 4 | from rstreams import RandomStreams
 5 | import distributions
 6 | from sample import mh2_sample
 7 | from for_theano import evaluate
 8 | from rv import full_log_likelihood
 9 | 
10 | s_rng = RandomStreams(23424)
11 | 
12 | fair_prior = 0.999
13 | 
14 | coin_weight = tensor.switch(s_rng.binomial(1, fair_prior) > 0.5, 0.5, s_rng.dirichlet([1, 1])[0])
15 | 
16 | make_coin = lambda p, size: s_rng.binomial(1, p, draw_shape=(size,))    
17 | coin = lambda size: make_coin(coin_weight, size)
18 |             
19 | for size in [1, 3, 6, 10, 20, 30, 50, 70, 100]:
20 |     data = evaluate(make_coin(0.9, size))
21 |             
22 |     sampler = mh2_sample(s_rng, [coin_weight], {coin(size) : data})            
23 |     
24 |     print "nr of examples", size, ", estimated probability", sampler(nr_samples=400, burnin=20000, lag=10)[0].mean()
25 | 


--------------------------------------------------------------------------------
/montetheano/max_lik.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import theano
 3 | from theano import tensor
 4 | from for_theano import ancestors, infer_shape, evaluate_with_assignments, evaluate
 5 | from rv import is_raw_rv, full_log_likelihood, lpdf
 6 | 
 7 | def likelihood_gradient(observations = {}, learning_rate = 0.1):
 8 |     all_vars = ancestors(list(observations.keys()))
 9 |     
10 |     for o in observations:
11 |         assert o in all_vars
12 |         if not is_raw_rv(o):
13 |             raise TypeError(o)
14 | 
15 |     RVs = [v for v in all_vars if is_raw_rv(v)]
16 |     free_RVs = [v for v in RVs if v not in observations]
17 | 
18 |     # Instantiate actual values for the different random variables:
19 |     params = dict()
20 |     for v in free_RVs:
21 |         f = theano.function([], v, mode=theano.Mode(linker='py', optimizer=None))
22 |         params[v] = theano.shared(f())
23 | 
24 |     # Compute the full log likelihood:
25 |     full_observations = dict(observations)
26 |     full_observations.update(params)    
27 |     log_likelihood = full_log_likelihood(full_observations)
28 |     
29 |     # Construct the update equations for learning:
30 |     updates = dict()
31 |     for frvs in params.values():
32 |         updates[frvs] = frvs + learning_rate * tensor.grad(log_likelihood, frvs)
33 |         
34 |     return params, updates, log_likelihood
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/montetheano/examples/max_likelihood_logistic_regression.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import theano
 3 | from theano import tensor
 4 | from rstreams import RandomStreams
 5 | import distributions
 6 | from sample import hybridmc_sample
 7 | from rv import full_log_likelihood
 8 | 
 9 | from max_lik import likelihood_gradient 
10 | 
11 | s_rng = RandomStreams(3424)
12 | 
13 | # Weight prior:
14 | w = s_rng.normal(0, 2, draw_shape=(3,))
15 | 
16 | # Linear model:
17 | x = tensor.matrix('x')
18 | y = tensor.nnet.sigmoid(tensor.dot(x, w))
19 | 
20 | # Bernouilli observation model:
21 | t = s_rng.binomial(p=y, draw_shape=(4,))
22 | 
23 | # Some data:
24 | X_data = numpy.asarray([[-1.5, -0.4, 1.3, 2.2], [-1.1, -2.2, 1.3, 0], [1., 1., 1., 1.]], dtype=theano.config.floatX).T 
25 | Y_data = numpy.asarray([1., 1., 0., 0.], dtype=theano.config.floatX)
26 | 
27 | # Compute gradient updates:
28 | observations = dict([(t, Y_data)])
29 | params, updates, log_likelihood = likelihood_gradient(observations)
30 | 
31 | # Compile training function and assign input data as givens:
32 | givens = dict([(x, X_data)])
33 | train = theano.function([], [log_likelihood], givens=givens, updates=updates)
34 | 
35 | # Run 100 epochs of training:
36 | for i in range(100):
37 |     print "epoch", i, ", log likelihood:", train()[0]
38 | 
39 |     
40 | # Generate testing function:    
41 | givens = dict([(x, X_data)]) 
42 | givens.update(params)
43 | test = theano.function([], [y], givens=givens)
44 | 
45 | print test(), Y_data


--------------------------------------------------------------------------------
/montetheano/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Misc utils
 3 | """
 4 | import __builtin__
 5 | 
 6 | class ClobberContext(object):
 7 |     """
 8 |     Makes an object useable with 'with' statements.
 9 | 
10 |     with obj as _:
11 |         ... # obj.method is accessible as method()
12 | 
13 |     Danger - the illusion is not perfect! It works by inserting things into
14 |     __builtin__ namespace, so if there are local variables in enclosing scopes,
15 |     they will actually trump the object's own methods.
16 |     """
17 |     def __enter__(self):
18 |         assert not hasattr(self, '_clobbered_symbols')
19 |         self._clobbered_symbols = {}
20 |         for name in self.clobber_symbols:
21 |             if hasattr(__builtin__, name):
22 |                 self._clobbered_symbols[name] = getattr(__builtin__, name)
23 |             if hasattr(self, name):
24 |                 setattr(__builtin__, name, getattr(self, name))
25 |         return self
26 | 
27 |     def __exit__(self, e_type, e_val, e_traceback):
28 |         for name in self.clobber_symbols:
29 |             if name in self._clobbered_symbols:
30 |                 setattr(__builtin__, name, self._clobbered_symbols[name])
31 |             elif hasattr(__builtin__, name):
32 |                 delattr(__builtin__, name)
33 |         del self._clobbered_symbols
34 | 
35 | 
36 | class Updates(dict):
37 |     """
38 |     Updates is a dictionary for which the '+' operator does an update.
39 | 
40 |     Not a normal update though, because a KeyError is raised if a symbol is
41 |     present in both dictionaries.
42 |     """
43 |     def __add__(self, other):
44 |         rval = Updates(self)
45 |         rval += other  # see: __iadd__
46 |         return rval
47 |     def __iadd__(self, other):
48 |         d = dict(other)
49 |         for k,v in d.items():
50 |             if k in self and v != self[k]:
51 |                 raise KeyError()
52 |             self[k] = v
53 |         return self
54 | 
55 | 


--------------------------------------------------------------------------------
/montetheano/examples/bayesian_linear_regression.py:
--------------------------------------------------------------------------------
 1 | import numpy, pylab
 2 | import theano
 3 | from theano import tensor
 4 | from rstreams import RandomStreams
 5 | import distributions
 6 | from sample import mh2_sample
 7 | from rv import full_log_likelihood
 8 | from for_theano import evaluate
 9 | 
10 | s_rng = RandomStreams(3424)
11 | 
12 | def poly_expansion(x, order):
13 | 	x = x.T
14 | 	result, updates = theano.scan(fn=lambda prior_result, x: prior_result * x,
15 | 			outputs_info=tensor.ones_like(x),
16 | 			non_sequences=x,
17 | 			n_steps=order)
18 | 			
19 | 	return tensor.concatenate([tensor.ones([x.shape[1],1]), tensor.reshape(result.T, (x.shape[1], x.shape[0]*order))], axis=1)
20 | 
21 | # Define priors to be inverse gamma distributions
22 | alpha = 1/s_rng.gamma(1., 2.)
23 | beta = 1/s_rng.gamma(1., .1)
24 | 
25 | # Order of the model
26 | # TODO: this currently has to be fixed, would be nice if this could also be a RV!
27 | m = 7 #s_rng.random_integers(1, 10)
28 | w = s_rng.normal(0, beta, draw_shape=(m+1,))
29 | 
30 | # Input variable used for training
31 | x = tensor.matrix('x')
32 | # Input variable used for testing
33 | xn = tensor.matrix('xn')
34 | 
35 | # Actual linear model
36 | y = lambda x_in: tensor.dot(poly_expansion(x_in, m), w)
37 | 
38 | # Observation model
39 | t = s_rng.normal(y(x), alpha, draw_shape=(10,))
40 | 
41 | # Generate some noisy training data (sine + noise)
42 | X_data = numpy.arange(-1,1,0.3)
43 | Y_data = numpy.sin(numpy.pi*X_data) + 0.1*numpy.random.randn(*X_data.shape)
44 | X_data.shape = (X_data.shape[0],1)
45 | 
46 | X_new = numpy.arange(-1,1,0.05)
47 | X_new.shape = (X_new.shape[0],1)
48 | 
49 | pylab.plot(X_data, Y_data, 'x', markersize=10)
50 | 
51 | # Generate samples from the model
52 | sampler = mh2_sample(s_rng, [y(xn)], observations={t: Y_data}, givens={x: X_data, xn: X_new})            
53 | samples = sampler(50, 1000, 200)
54 | pylab.errorbar(X_new, numpy.mean(samples[0].T, axis=1), numpy.std(samples[0].T, axis=1))
55 | pylab.show()
56 | pylab.plot(X_new, samples[0].T)
57 | pylab.show()
58 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | Monte Theano
 3 | ============
 4 | 
 5 | **This is an inactive project - for a Theano-based inference engine for graphical models, check out the
 6 | [PyMC3](https://github.com/pymc-devs/pymc/tree/pymc3) branch of [PyMC](http://pymc-devs.github.io/pymc/)**
 7 | 
 8 | Monte Carlo inference algorithms for stochastic Theano programs.
 9 | 
10 |   1. Directed models: Use Theano (with RandomStreams) to build a directed graphical model, then
11 | 
12 |    - Estimate likelihood of a full assignment.
13 | 
14 |    - Condition on observations, draw samples from posterior over latent internal variables.
15 | 
16 |    - Estimate marginal likelihood analytically or by MCMC.
17 | 
18 |    - Learning by inferring MAP or ML estimates of latent variables.
19 | 
20 |   2. Undirected models: still thinking about if/how to do this. And what about
21 |      factor graphs?
22 | 
23 | 
24 | 
25 | How does it work
26 | ----------------
27 | 
28 | Not totally clear yet!  Ingredients will be:
29 | 
30 |   - symbolic representations of likelihood functions
31 | 
32 |   - automatically factorizing directed models
33 | 
34 |   - generic Metropolis Hastings samplers
35 | 
36 |   - Hamiltonian Monte Carlo
37 | 
38 |   - Importance sampling?
39 | 
40 |   - Rejection sampling?
41 | 
42 |   - slice sampling?
43 | 
44 |   - Tempered sampling?
45 | 
46 | It seems like it should be possible to automatically recognize opportunities for
47 | blocked Gibbs sampling, in which for example we recognize blocks of continuous
48 | variables for an HMC sampler.  Not sure if this is a useful thing to do.
49 | 
50 | 
51 | Similar Packages
52 | ----------------
53 | 
54 |   - MIT-Church (probabilistic scheme)
55 | 
56 |   - IBAL (probabilistic OCAML)
57 | 
58 |   - PyMC (MCMC inference in Python)
59 | 
60 |   - Infer.net (Csoft)
61 | 
62 |   - Factorie
63 | 
64 |   - PMTK
65 | 
66 |   - Dyna
67 | 
68 | This package differs from the ones above in building on top of Theano, which already has a) a
69 | natural graph data structure for expressing directed graphical models, b) a
70 | performance-oriented backend with GPU support, and c) automatic symbolic differentiation which
71 | makes HMC and optimization routines much easier to implement.
72 | 


--------------------------------------------------------------------------------
/montetheano/test_for_theano.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import theano
 3 | from theano import tensor
 4 | from for_theano import infer_shape
 5 | import rstreams
 6 | import distributions
 7 | import for_theano
 8 | 
 9 | def test_infer_shape_const():
10 |     shp = infer_shape(tensor.alloc(0, 5, 6, 7))
11 |     print shp
12 |     assert  shp == (5, 6, 7)
13 | 
14 | def test_infer_shape_shared_var():
15 |     sv = theano.shared(numpy.asarray([2,3,5]))
16 |     assert infer_shape(sv) == (3,)
17 |     assert infer_shape(sv * 2 + 75) == (3,)
18 | 
19 | def test_shape_infer_shape():
20 |     sv = theano.shared(numpy.asarray([2,3,5]))
21 |     assert infer_shape(sv.shape) == (1,)
22 | 
23 | def test_shape_rv():
24 |     R = tensor.shared_randomstreams.RandomStreams(234)
25 |     n = R.normal(avg=0, std=1.0)
26 |     assert infer_shape(n) == ()
27 | 
28 | def test_shape_scalar_rv_w_size():
29 |     R = tensor.shared_randomstreams.RandomStreams(234)
30 |     n = R.normal(avg=0, std=1.0, size=(40,20))
31 |     assert infer_shape(n) == (40, 20)
32 | 
33 | def test_shape_scalar_rv_w_size_rstreams():
34 |     R = rstreams.RandomStreams(234)
35 |     n = R.normal(mu=0, sigma=1.0, draw_shape=(40,20))
36 |     
37 |     assert infer_shape(n) == (40, 20)
38 | 
39 | def test_shape_vector_rv_rstreams():
40 |     R = rstreams.RandomStreams(234)
41 |     n = R.normal(mu=numpy.zeros(10,), sigma=numpy.ones(10,), draw_shape=(10,))
42 |     assert infer_shape(n) == (10,)
43 | 
44 | def test_shape_vector_rv_dirichlet_rstreams():
45 |     R = rstreams.RandomStreams(234)
46 |     n = R.dirichlet(alpha=numpy.ones(10,), draw_shape=(10,))
47 |     assert infer_shape(n) == (10,), infer_shape(n)
48 | 
49 | def test_find():
50 | 
51 |     query = tensor.ivector()
52 |     keepset = tensor.ivector()
53 |     r = for_theano.find(query, keepset)
54 | 
55 |     assert r.ndim == 1
56 |     assert 'int' in r.dtype
57 | 
58 |     f = theano.function([query, keepset], r)
59 | 
60 |     assert numpy.all(f([], []) == [])
61 |     assert numpy.all(f([2, 1, 0, 4, 3], [5, 5, 5, 5]) == [])
62 |     assert numpy.all(f([2, 1, 0, 4, 3], [4]) == [3])
63 |     assert numpy.all(f([2, 1, 0, 4, 3], [4, 1]) == [1, 3])
64 |     assert numpy.all(f([2, 1, 0, 4, 3], [1, 4]) == [1, 3])
65 |     assert numpy.all(f([], [1, 4]) == [])
66 | 


--------------------------------------------------------------------------------
/montetheano/examples/bayesian_logistic_regression.py:
--------------------------------------------------------------------------------
 1 | import numpy, pylab
 2 | import theano
 3 | from theano import tensor
 4 | from rstreams import RandomStreams
 5 | import distributions
 6 | from sample import hybridmc_sample
 7 | from rv import full_log_likelihood
 8 | 
 9 | s_rng = RandomStreams(3424)
10 | 
11 | # Define model
12 | w = s_rng.normal(0, 4, draw_shape=(2,))
13 | 
14 | x = tensor.matrix('x')
15 | y = tensor.nnet.sigmoid(tensor.dot(x, w))
16 | 
17 | t = s_rng.binomial(p=y, draw_shape=(4,))
18 | 
19 | # Define data
20 | X_data = numpy.asarray([[-1.5, -0.4, 1.3, 2.2],[-1.1, -2.2, 1.3, 0]], dtype=theano.config.floatX).T 
21 | Y_data = numpy.asarray([1., 1., 0., 0.], dtype=theano.config.floatX)
22 | 
23 | # Plot full likelihood function
24 | RVs = dict([(t, Y_data)])                
25 | lik = full_log_likelihood(RVs)
26 | 
27 | givens = dict([(x, X_data)])
28 | lik_func = theano.function([w], lik, givens=givens, allow_input_downcast=True)
29 | 
30 | delta = .1
31 | x_range = numpy.arange(-10.0, 10.0, delta)
32 | y_range = numpy.arange(-10.0, 10.0, delta)
33 | X, Y = numpy.meshgrid(x_range, y_range)
34 | 
35 | response = []
36 | for xl, yl in zip(X.flatten(), Y.flatten()):
37 |     response.append(lik_func([xl, yl]))
38 | 
39 | pylab.figure(1)
40 | pylab.contour(X, Y, numpy.exp(numpy.asarray(response)).reshape(X.shape), 20)            
41 | pylab.draw()
42 | 
43 | # Generate samples from the model
44 | sample, ll, updates = hybridmc_sample(s_rng, [w], observations={t: Y_data})
45 | 
46 | sampler = theano.function([], sample + [ll] , updates=updates, givens={x: X_data}, allow_input_downcast=True)
47 | out = theano.function([w, x], y, allow_input_downcast=True)
48 | 
49 | delta = 0.1
50 | x_range = numpy.arange(-3, 3, delta)
51 | y_range = numpy.arange(-3, 3, delta)
52 | X, Y = numpy.meshgrid(x_range, y_range)
53 | 
54 | b = numpy.zeros(X.shape)
55 | for i in range(1000):
56 |     w, ll = sampler()            
57 | 
58 |     if i % 50 == 0:
59 |         pylab.figure(1)            
60 |         pylab.plot(w[0], w[1], 'x')
61 |         pylab.draw()
62 | 
63 |         response = out(w, numpy.vstack((X.flatten(), Y.flatten())).T)
64 |         response = response.reshape(X.shape)
65 |         b += response
66 | 
67 |         pylab.figure(2)
68 |         pylab.contour(X, Y, response)            
69 |         pylab.plot(X_data[:2,1], X_data[:2,0], 'kx')
70 |         pylab.plot(X_data[2:,1], X_data[2:,0], 'bo')
71 |         pylab.draw()
72 |         pylab.clf()
73 | 
74 | # Plot averaged model
75 | pylab.figure(1)
76 | pylab.clf()
77 | pylab.contour(X, Y, b)            
78 | pylab.plot(X_data[:2,0], X_data[:2,1], 'kx')
79 | pylab.plot(X_data[2:,0], X_data[2:,1], 'bo')
80 | pylab.show()
81 | 


--------------------------------------------------------------------------------
/montetheano/examples/hierarchical_dirichlet.py:
--------------------------------------------------------------------------------
 1 | import numpy, pylab
 2 | import theano
 3 | from rstreams import RandomStreams
 4 | import distributions
 5 | from sample import mh2_sample
 6 | from for_theano import memoized
 7 | 
 8 | s_rng = RandomStreams(23424)
 9 | 
10 | # Define data
11 | marbles_bag_1 = numpy.asarray([[1,1,1,1,1,1],
12 |                                [0,0,0,0,0,0],
13 |                                [0,0,0,0,0,0],
14 |                                [0,0,0,0,0,0],
15 |                                [0,0,0,0,0,0]], dtype=theano.config.floatX).T                                
16 | marbles_bag_2 = numpy.asarray([[0,0,0,0,0,0],
17 |                                [1,1,1,1,1,1],
18 |                                [0,0,0,0,0,0],
19 |                                [0,0,0,0,0,0],
20 |                                [0,0,0,0,0,0]], dtype=theano.config.floatX).T 
21 | marbles_bag_3 = numpy.asarray([[0,0,0,0,0,0],
22 |                                [0,0,0,0,0,0],
23 |                                [0,0,0,0,0,0],
24 |                                [1,1,1,1,1,1],
25 |                                [0,0,0,0,0,0]], dtype=theano.config.floatX).T 
26 | marbles_bag_4 = numpy.asarray([[0],[0],[0],[0],[1]], dtype=theano.config.floatX).T 
27 | 
28 | 
29 | 
30 | # Define flat model
31 | bag_prototype =  memoized(lambda bag: s_rng.dirichlet(numpy.asarray([1, 1, 1, 1, 1])*5))
32 | draw_marbles = lambda bag, nr: s_rng.multinomial(1, bag_prototype(bag), draw_shape=(nr,))
33 | 
34 | # Generate samples from the model
35 | givens = {draw_marbles(1,6): marbles_bag_1,
36 |             draw_marbles(2,6): marbles_bag_2,
37 |             draw_marbles(3,6): marbles_bag_3,
38 |             draw_marbles(4,1): marbles_bag_4}
39 |             
40 | sampler = mh2_sample(s_rng, [draw_marbles(4,1)], givens)            
41 | 
42 | samples = sampler(200, 100, 100)
43 | data = samples[0]
44 | 
45 | # Show histogram
46 | pylab.subplot(211)
47 | pylab.bar(range(5), data.sum(axis=0))
48 | pylab.title("Flat model")
49 | 
50 | 
51 | 
52 | # Define hierarchical model
53 | phi = s_rng.dirichlet(numpy.asarray([1, 1, 1, 1, 1]))
54 | alpha = s_rng.gamma(2., 2.)        
55 | prototype = phi*alpha
56 | 
57 | bag_prototype =  memoized(lambda bag: s_rng.dirichlet(prototype))
58 | draw_marbles = lambda bag, nr: s_rng.multinomial(1, bag_prototype(bag), draw_shape=(nr,))
59 | 
60 | # Generate samples from the model
61 | givens = {draw_marbles(1,6): marbles_bag_1,
62 |             draw_marbles(2,6): marbles_bag_2,
63 |             draw_marbles(3,6): marbles_bag_3,
64 |             draw_marbles(4,1): marbles_bag_4}
65 |             
66 | sampler = mh2_sample(s_rng, [draw_marbles(4,1)], givens)            
67 | 
68 | samples = sampler(200, 100, 100)
69 | data = samples[0]
70 | 
71 | # Show histogram
72 | pylab.subplot(212)
73 | pylab.bar(range(5), data.sum(axis=0))
74 | pylab.title("Hierarchical model")
75 | pylab.show()
76 | 


--------------------------------------------------------------------------------
/montetheano/examples/latent_dirichlet_allocation.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import theano
 3 | from theano import tensor
 4 | from rstreams import RandomStreams
 5 | import distributions
 6 | from sample import mh2_sample, mh_sample
 7 | from for_theano import memoized, evaluate
 8 | 
 9 | s_rng = RandomStreams(123)
10 | 
11 | nr_words = 4
12 | nr_topics = 2
13 | alpha = 0.8
14 | beta = 1.
15 | 
16 | # Topic distribution per document
17 | doc_mixture = memoized(lambda doc_id: s_rng.dirichlet([alpha/nr_topics]*nr_topics))
18 | 
19 | # Word distribution per topic
20 | topic_mixture = memoized(lambda top_id: s_rng.dirichlet([beta/nr_words]*nr_words))
21 | 
22 | # For each word in the document, draw a topic according to multinomial with document specific prior
23 | # TODO, see comment below: topics = memoized(lambda doc_id, nr: s_rng.multinomial(1, doc_mixture[doc_id], draw_shape=(nr,)))
24 | topics = memoized(lambda doc_id, nr: s_rng.binomial(1, doc_mixture(doc_id)[0], draw_shape=(nr,)))
25 | 
26 | # Draw words for a specific topic
27 | word_topic = lambda top_id: s_rng.multinomial(1, topic_mixture(top_id))
28 | 
29 | # TODO: memoized only works on the pre-compiled graph. This makes it fail in the case where we have to map 
30 | # a vector of topics to individual multinomials with as priors the different topics. In the case of two topics
31 | # we can hack around this by using a binomial topic distribution and using a switch statement here:
32 | word_topic_mapper = lambda top_id: tensor.switch(top_id, word_topic(0), word_topic(1))
33 | 
34 | # Maps topics to words
35 | # TODO, see comment above: get_words = memoized(lambda doc_id, nr: theano.map(word_topic, topics(doc_id, nr))[0])
36 | get_words = memoized(lambda doc_id, nr: theano.map(word_topic_mapper, topics(doc_id, nr))[0])
37 | 
38 | # Define training 'documents'
39 | document_1 = numpy.asarray([[1,0,0,0],
40 |                             [1,0,0,0],
41 |                             [0,1,0,0],
42 |                             [1,0,0,0],
43 |                             [0,1,0,0],
44 |                             [0,1,0,0],
45 |                             [1,0,0,0],
46 |                             [0,1,0,0],
47 |                             [1,0,0,0],
48 |                             [0,1,0,0]], dtype=theano.config.floatX)
49 | 
50 | document_2 = numpy.asarray([[0,0,1,0],
51 |                             [0,0,0,1],
52 |                             [0,0,0,1],
53 |                             [0,0,0,1],
54 |                             [0,0,1,0],
55 |                             [0,0,1,0],
56 |                             [0,0,0,1],
57 |                             [0,0,1,0],
58 |                             [0,0,1,0],
59 |                             [0,0,1,0]], dtype=theano.config.floatX)
60 | 
61 | document_3 = numpy.asarray([[1,0,0,0],
62 |                             [0,0,0,1],
63 |                             [0,1,0,0],
64 |                             [0,1,0,0],
65 |                             [0,0,1,0],
66 |                             [0,1,0,0],
67 |                             [0,0,0,1],
68 |                             [1,0,0,0],
69 |                             [0,0,1,0],
70 |                             [0,0,1,0]], dtype=theano.config.floatX)
71 | 
72 | # Map documents to RVs
73 | givens = {get_words(1, 10): document_1,
74 |             get_words(2, 10): document_2,
75 |             get_words(3, 10): document_3}
76 | 
77 | # Build sampler
78 | sample, ll, updates = mh_sample(s_rng, [doc_mixture(1), doc_mixture(2), doc_mixture(3), topic_mixture(0), topic_mixture(1)])
79 | sampler = theano.function([], sample, updates=updates, givens=givens, allow_input_downcast=True)
80 | 
81 | # Run sampling
82 | for i in range(10000):
83 |     d = sampler()            
84 | 
85 |     if i % 1000 == 0:
86 |         print d
87 | 


--------------------------------------------------------------------------------
/montetheano/test_rv.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy
  3 | import theano
  4 | from theano import tensor
  5 | from rstreams import RandomStreams
  6 | import distributions # triggers registry
  7 | import rv
  8 | from for_theano import where
  9 | 
 10 | def test_dag_condition_top():
 11 |     """
 12 |     Easy test of conditioning
 13 |     """
 14 |     with RandomStreams(234) as _:
 15 |         mu = normal(10, .1)
 16 |         x = normal(mu, sigma=1)
 17 | 
 18 |     post_x = rv.condition([x], {mu: -7})
 19 |     theano.printing.debugprint(post_x)
 20 | 
 21 |     f = theano.function([], post_x)
 22 |     r = [f() for i in range(10)]
 23 |     assert numpy.allclose(numpy.mean(r), -7.4722755432)
 24 | 
 25 | 
 26 | def test_dag_condition_bottom():
 27 |     """
 28 |     Test test of conditioning an upper node on a lower one
 29 |     """
 30 |     with RandomStreams(234) as _:
 31 |         mu = normal(10, .1)
 32 |         x = normal(mu, sigma=1)
 33 | 
 34 |     post_mu = rv.condition([mu], {x: -7})
 35 |     theano.printing.debugprint(post_mu)
 36 | 
 37 |     f = theano.function([], post_mu)
 38 |     f()
 39 | 
 40 | 
 41 | def test_normal_simple():
 42 |     s_rng = RandomStreams(23)
 43 |     n = s_rng.normal()
 44 | 
 45 |     p0 = rv.lpdf(n, 0)
 46 |     p1 = rv.lpdf(n, 1)
 47 |     pn1 = rv.lpdf(n, -1)
 48 | 
 49 |     f = theano.function([], [p0, p1, pn1])
 50 | 
 51 |     pvals = f()
 52 |     targets = numpy.asarray([
 53 |                 numpy.log(1.0 / numpy.sqrt(2*numpy.pi)),
 54 |                 numpy.log(numpy.exp(-0.5) / numpy.sqrt(2*numpy.pi)),
 55 |                 numpy.log(numpy.exp(-0.5) / numpy.sqrt(2*numpy.pi)),
 56 |                 ])
 57 | 
 58 |     assert numpy.allclose(pvals,targets), (pvals, targets)
 59 | 
 60 | 
 61 | def test_normal_w_params():
 62 |     s_rng = RandomStreams(23)
 63 |     n = s_rng.normal(mu=2, sigma=3)
 64 | 
 65 |     p0 = rv.lpdf(n, 0)
 66 |     p1 = rv.lpdf(n, 2)
 67 |     pn1 = rv.lpdf(n, -1)
 68 | 
 69 |     f = theano.function([], [p0, p1, pn1])
 70 | 
 71 |     pvals = f()
 72 |     targets = numpy.asarray([
 73 |                 numpy.log(numpy.exp(-0.5 * ((2.0/3.0)**2)) /
 74 |                     numpy.sqrt(2*numpy.pi*9.0)),
 75 |                 numpy.log(numpy.exp(0) / numpy.sqrt(2*numpy.pi*9)),
 76 |                 numpy.log(numpy.exp(-0.5 * ((3.0/3.0)**2)) /
 77 |                     numpy.sqrt(2*numpy.pi*9.0)),
 78 |                 ])
 79 | 
 80 |     assert numpy.allclose(pvals,targets), (pvals, targets)
 81 | 
 82 | 
 83 | def test_normal_nonscalar():
 84 |     s_rng = RandomStreams(234)
 85 |     n = s_rng.normal()
 86 | 
 87 |     data = numpy.asarray([1, 2, 3, 4, 5])
 88 |     p_data = rv.lpdf(n, data)
 89 | 
 90 |     f = theano.function([], [p_data])
 91 | 
 92 |     pvals = f()
 93 |     targets = numpy.log(numpy.exp(-0.5 * (data**2)) / numpy.sqrt(2*numpy.pi))
 94 | 
 95 |     assert numpy.allclose(pvals,targets), (pvals, targets)
 96 | 
 97 | 
 98 | def test_normal_w_broadcasting():
 99 |     raise NotImplementedError()
100 | 
101 | 
102 | def test_uniform_simple():
103 |     s_rng = RandomStreams(234)
104 |     u = s_rng.uniform()
105 | 
106 |     p0 = rv.lpdf(u, 0)
107 |     p1 = rv.lpdf(u, 1)
108 |     p05 = rv.lpdf(u, 0.5)
109 |     pn1 = rv.lpdf(u, -1)
110 | 
111 |     f = theano.function([], [p0, p1, p05, pn1])
112 | 
113 |     pvals = f()
114 |     targets = numpy.log(numpy.asarray([1.0, 1.0, 1.0, 0.0]))
115 | 
116 |     assert numpy.allclose(pvals,targets), (pvals, targets)
117 | 
118 | 
119 | def test_uniform_w_params():
120 |     s_rng = RandomStreams(234)
121 |     u = s_rng.uniform(low=-0.999, high=9.001)
122 | 
123 |     p0 = rv.lpdf(u, 0)
124 |     p1 = rv.lpdf(u, 2)
125 |     p05 = rv.lpdf(u, -1.5)
126 |     pn1 = rv.lpdf(u, 10)
127 | 
128 |     f = theano.function([], [p0, p1, p05, pn1])
129 | 
130 |     pvals = f()
131 |     targets = numpy.log(numpy.asarray([.1, .1, 0, 0]))
132 |     assert numpy.allclose(pvals,targets), (pvals, targets)
133 | 
134 | 
135 | def test_uniform_nonscalar():
136 |     raise NotImplementedError()
137 | 
138 | 
139 | def test_uniform_w_broadcasting():
140 |     raise NotImplementedError()
141 | 
142 | 
143 | if 0:
144 |   def test_likelihood_visually():
145 |     class A(object):pass
146 |     self = A()
147 | 
148 |     s_rng = tensor.shared_randomstreams.RandomStreams(23424)
149 | 
150 |     err_thresh = self.err_thresh = tensor.scalar()
151 |     data_err = self.data_err = tensor.vector()
152 |     data_llr = self.data_llr = tensor.vector()
153 | 
154 |     rv_err = self.rv_err = s_rng.uniform()
155 |     rv_err_good = rv_err < err_thresh
156 | 
157 |     data_llr_good = data_llr[where(data_err < err_thresh)]
158 |     data_llr_bad  = data_llr[where(data_err >= err_thresh)]
159 | 
160 |     # design decisions
161 |     mu_llr_good, sigma_llr_good = AdaptiveParzen()(data_llr_good, low=-5,
162 |             high=-1.5, minsigma=0.01)
163 |     mu_llr_bad, sigma_llr_bad  = AdaptiveParzen()(data_llr_bad,  low=-5,
164 |             high=-1.5, minsigma=0.01)
165 | 
166 | 
167 |     rv_llr_good = gauss_mixture(s_rng, mu=mu_llr_good, sigma=sigma_llr_good)
168 |     self.sample_llr = s_rng.normal(avg=-4, std=2, size=(5,))
169 | 
170 |     self.sample_llr_logprob = log_density(
171 |             self.sample_llr, rv_llr_good)
172 | 
173 |     if os.env.get("SHOW_PLOTS", False):
174 | 
175 |         # test that rv_llr really is a random variable
176 | 
177 |         f = theano.function(
178 |                 [self.err_thresh, self.data_err, self.data_llr],
179 |                 [self.rv_err, self.rv_llr],
180 |                 allow_input_downcast=True)
181 | 
182 |         data_err = [.0, .0, .0, .7, .7, .7]
183 |         data_llr = [-4.5, -4, -3.5, -2, -1.5, -1.0]
184 | 
185 |         r = numpy.asarray([f(.5, data_err, data_llr) for i in xrange(100)])
186 |         import matplotlib.pyplot as plt
187 |         plt.scatter(r[:, 0], r[:, 1])
188 |         plt.show()
189 | 
190 | test_normal_nonscalar()
191 | 


--------------------------------------------------------------------------------
/montetheano/rv.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions for operating on random variables.
  3 | """
  4 | import theano
  5 | from theano import tensor
  6 | from for_theano import ancestors, as_variable, clone_keep_replacements
  7 | import rstreams
  8 | 
  9 | 
 10 | def is_randomstate(var):
 11 |     """
 12 |     """
 13 |     return isinstance(var.type, rstreams.randomstate_types)
 14 | 
 15 | 
 16 | def is_rv(var, blockers=None):
 17 |     """
 18 |     Return True iff var is a random variable.
 19 | 
 20 |     A random variable is a variable with a randomstate object in its ancestors.
 21 |     """    
 22 |     #TODO: could optimize by stopping the recusion as soon as a randomstate is
 23 |     #      found
 24 |     return any(is_randomstate(v) for v in ancestors([var], blockers=blockers))
 25 | 
 26 | 
 27 | def is_raw_rv(var):
 28 |     """
 29 |     Return True iff v is the result of calling s_rng.something()
 30 |     """
 31 |     return var.owner and is_randomstate(var.owner.inputs[0])
 32 | 
 33 | 
 34 | def all_raw_rvs(outputs):
 35 |     """
 36 |     Return a list of all random variables required to compute `outputs`.
 37 |     """
 38 |     all_vars = ancestors(outputs)
 39 |     assert outputs[0] in all_vars
 40 |     rval = [v for v in all_vars if is_raw_rv(v)]
 41 |     return rval
 42 | 
 43 | 
 44 | def typed_items(dct):
 45 |     return dict([
 46 |         (rv, as_variable(sample, type=rv.type))
 47 |         for rv, sample in dct.items()])
 48 | 
 49 | 
 50 | def condition(rvs, observations):
 51 |     if len(rvs) > 1:
 52 |         raise NotImplementedError()
 53 |     observations = typed_items(observations)
 54 |     # if none of the rvs show up in the ancestors of any observations
 55 |     # then this is easy conditioning
 56 |     obs_ancestors = ancestors(observations.keys(), blockers=rvs)
 57 |     if any(rv in obs_ancestors for rv in rvs):
 58 |         # not-so-easy conditioning
 59 |         # we need to produce a sampler-driven model
 60 |         raise NotImplementedError()
 61 |     else:
 62 |         # easy conditioning
 63 |         rvs_anc = ancestors(rvs, blockers=observations.keys())
 64 |         frontier = [r for r in rvs_anc
 65 |                 if r.owner is None or r in observations.keys()]
 66 |         cloned_inputs, cloned_outputs = clone_keep_replacements(frontier, rvs,
 67 |                 replacements=observations)
 68 |         return cloned_outputs
 69 | 
 70 | # TODO: does this function belong here or in rstreams
 71 | def lpdf(rv, sample, **kwargs):
 72 |     """
 73 |     Return the probability (density) that random variable `rv`, returned by
 74 |     a call to one of the sampling routines of this class takes value `sample`
 75 |     """
 76 |     if not is_rv(rv):
 77 |         raise TypeError('rv not recognized as a random variable', rv)
 78 | 
 79 |     if is_raw_rv(rv):
 80 |         dist_name = rstreams.rv_dist_name(rv)
 81 |         pdf = rstreams.pdfs[dist_name]
 82 |         return pdf(rv.owner, sample, kwargs)
 83 |     else:
 84 |         #TODO: infer from the ancestors of v what distribution it
 85 |         #      has.
 86 |         raise NotImplementedError(rv)
 87 | 
 88 | def conditional_log_likelihood(assignment, givens):
 89 |     """
 90 |     Return log(P(rv0=sample | given))
 91 | 
 92 |     assignment: rv0=val0, rv1=val1, ...
 93 |     given: var0=v0, var1=v1, ...
 94 | 
 95 |     Each of val0, val1, ... v0, v1, ... is supposed to represent an identical
 96 |     number of draws from a distribution.  This function returns the real-valued
 97 |     density for each one of those draws.
 98 | 
 99 |     The output from this function may be a random variable, if not all sources
100 |     of randomness are removed by the assignment and the given.
101 |     """
102 |     
103 |     for rv in assignment.keys():
104 |         if not is_rv(rv):
105 |             raise ValueError('non-random var in assignment key', rv)
106 |     
107 |     # Cast assignment elements to the right kind of thing
108 |     assignment = typed_items(assignment)
109 |     
110 |     rvs = assignment.keys()
111 |     #TODO: this is not ok for undirected models
112 |     #      we need to be able to let condition introduce joint
113 |     #      dependencies somehow.
114 |     #      The trouble is that lpdf wants to get the pdfs one variable at a
115 |     #      time.  That makes sense for directed models, but not for
116 |     #      undirected ones.
117 |     new_rvs = condition(rvs, givens)
118 |     return full_log_likelihood(
119 |             [(new_rv, assignment[rv])
120 |                 for (new_rv, rv) in zip(new_rvs, rvs)],
121 |             given={})
122 | 
123 | def full_log_likelihood(assignment):
124 |     """
125 |     Return log(P(rv0=sample))
126 | 
127 |     assignment: rv0=val0, rv1=val1, ...
128 | 
129 |     Each of val0, val1, ... v0, v1, ... is supposed to represent an identical
130 |     number of draws from a distribution.  This function returns the real-valued
131 |     density for each one of those draws.
132 | 
133 |     The output from this function may be a random variable, if not all sources
134 |     of randomness are removed by the assignment and the given.
135 |     """
136 | 
137 |     for rv in assignment.keys():
138 |         if not is_rv(rv):
139 |             raise ValueError('non-random var in assignment key', rv)
140 | 
141 |     # All random variables that are not assigned should stay as the same object so it can later be replaced
142 |     # If this is not done this way, they get cloned
143 |     RVs = [v for v in ancestors(assignment.keys()) if is_raw_rv(v)]
144 |     for rv in RVs:
145 |         if rv not in assignment:
146 |             assignment[rv] = rv
147 | 
148 |     # Cast assignment elements to the right kind of thing
149 |     assignment = typed_items(assignment)
150 | 
151 |     pdfs = [lpdf(rv, sample) for rv, sample in assignment.items()]
152 |     lik = tensor.add(*[tensor.sum(p) for p in pdfs])
153 |     
154 |     dfs_variables = ancestors([lik], blockers=assignment.keys())
155 |     frontier = [r for r in dfs_variables
156 |             if r.owner is None or r in assignment.keys()]
157 |     cloned_inputs, cloned_outputs = clone_keep_replacements(frontier, [lik],
158 |             replacements=assignment)
159 |     cloned_lik, = cloned_outputs
160 |     return cloned_lik
161 | 
162 | 
163 | def energy(assignment, given):
164 |     """
165 |     Return -log(P(rv0=sample | given)) +- const
166 | 
167 |     assignment: rv0=val0, rv1=val1, ...
168 |     given: var0=v0, var1=v1, ...
169 | 
170 |     Each of val0, val1, ... v0, v1, ... is supposed to represent an identical
171 |     number of draws from a distribution.  This function returns the real-valued
172 |     density for each one of those draws.
173 | 
174 |     The output from this function may be a random variable, if not all sources
175 |     of randomness are removed by the assignment and the given.
176 |     """
177 |     try:
178 |         return -conditional_log_likelihood(assignment, given)
179 |     except:
180 |         # get the log_density up to an additive constant
181 |         raise NotImplementedError()
182 | 


--------------------------------------------------------------------------------
/montetheano/rstreams.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Registry and definition for new-and-improved RandomStreams
  3 | """
  4 | 
  5 | import copy
  6 | import numpy
  7 | import theano
  8 | from theano import tensor
  9 | from for_theano import elemwise_cond
 10 | from for_theano import ancestors
 11 | from utils import ClobberContext
 12 | 
 13 | samplers = {}
 14 | pdfs = {}
 15 | ml_handlers = {}
 16 | params_handlers = {}
 17 | local_proposals = {}
 18 | randomstate_types = (tensor.raw_random.RandomStateType,)
 19 | 
 20 | 
 21 | def rv_dist_name(rv):
 22 |     try:
 23 |         return rv.owner.op.dist_name
 24 |     except AttributeError:
 25 |         try:
 26 |             return rv.owner.op.fn.__name__
 27 |         except AttributeError:
 28 |             raise TypeError('rv not recognized as output of RandomFunction', rv)
 29 | 
 30 | class RandomStreams(ClobberContext):
 31 |     clobber_symbols = ['pdf']
 32 | 
 33 |     def __init__(self, seed, draw_shape=()):
 34 |         self.state_updates = []
 35 |         self.default_instance_seed = seed
 36 |         self.seed_generator = numpy.random.RandomState(seed)
 37 |         self.default_updates = {}
 38 |         if draw_shape == ():
 39 |             self.draw_shape = tensor.as_tensor_variable(
 40 |                     numpy.empty((0,), dtype='int64'))
 41 |         else:
 42 |             self.draw_shape = tensor.as_tensor_variable(draw_shape)
 43 |         if self.draw_shape.ndim != 1:
 44 |             raise ValueError(draw_shape)
 45 | 
 46 |     def shared(self, val, **kwargs):
 47 |         rval = theano.shared(val, **kwargs)
 48 |         return rval
 49 | 
 50 |     def sharedX(self, val, **kwargs):
 51 |         rval = theano.shared(
 52 |                 numpy.asarray(val, dtype=theano.config.floatX),
 53 |                 **kwargs)
 54 |         return rval
 55 | 
 56 |     def new_shared_rstate(self):
 57 |         seed = int(self.seed_generator.randint(2**30))
 58 |         rval = theano.shared(numpy.random.RandomState(seed))
 59 |         return rval
 60 | 
 61 |     def add_default_update(self, used, recip, new_expr):
 62 |         if used not in self.default_updates:
 63 |             self.default_updates[used] = {}
 64 |         self.default_updates[used][recip] = new_expr
 65 |         used.update = (recip, new_expr) # necessary?
 66 |         recip.default_update = new_expr
 67 |         self.state_updates.append((recip, new_expr))
 68 | 
 69 |     def sample(self, dist_name, *args, **kwargs):
 70 |         handler = samplers[dist_name]
 71 |         if 'draw_shape' in kwargs:
 72 |             draw_shape = kwargs['draw_shape']
 73 |             if isinstance(draw_shape, (list, tuple)) and draw_shape:
 74 |                  draw_shape = tensor.stack(*draw_shape)
 75 |         else:
 76 |             draw_shape = self.draw_shape
 77 | 
 78 |         kwargs['draw_shape'] = draw_shape
 79 |         out = handler(self, *args, **kwargs)
 80 |         return out
 81 | 
 82 |     def seed(self, seed=None):
 83 |         """Re-initialize each random stream
 84 | 
 85 |         :param seed: each random stream will be assigned a unique state that depends
 86 |         deterministically on this value.
 87 | 
 88 |         :type seed: None or integer in range 0 to 2**30
 89 | 
 90 |         :rtype: None
 91 |         """
 92 |         if seed is None:
 93 |             seed = self.default_instance_seed
 94 | 
 95 |         seedgen = numpy.random.RandomState(seed)
 96 |         for old_r, new_r in self.state_updates:
 97 |             old_r_seed = seedgen.randint(2**30)
 98 |             old_r.set_value(numpy.random.RandomState(int(old_r_seed)),
 99 |                     borrow=True)
100 | 
101 |     def pdf(self, rv, sample, **kwargs):
102 |         """
103 |         Return the probability (density) that random variable `rv`, returned by
104 |         a call to one of the sampling routines of this class takes value `sample`
105 |         """
106 |         if rv.owner:
107 |             dist_name = rv_dist_name(rv)
108 |             pdf = pdfs[dist_name]
109 |             return pdf(rv.owner, sample, kwargs)
110 |         else:
111 |             raise TypeError('rv not recognized as output of RandomFunction')
112 | 
113 |     def ml(self, rv, sample, weights=None):
114 |         """
115 |         Return an Updates object mapping distribution parameters to expressions
116 |         of their maximum likelihood values.
117 |         """
118 |         if rv.owner:
119 |             dist_name = rv_dist_name(rv)
120 |             pdf = ml_handlers[dist_name]
121 |             return pdf(rv.owner, sample, weights=weights)
122 |         else:
123 |             raise TypeError('rv not recognized as output of RandomFunction')
124 | 
125 |     def params(self, rv):
126 |         """
127 |         Return an Updates object mapping distribution parameters to expressions
128 |         of their maximum likelihood values.
129 |         """
130 |         if rv.owner:
131 |             return self.params_handlers[rv_dist_name(rv)](rv.owner)
132 |         else:
133 |             raise TypeError('rv not recognized as output of RandomFunction')
134 | 
135 |     def local_proposal(self, rv, sample, **kwargs):
136 |         """
137 |         Return the probability (density) that random variable `rv`, returned by
138 |         a call to one of the sampling routines of this class takes value `sample`
139 |         """
140 |         if rv.owner:
141 |             dist_name = rv_dist_name(rv)
142 |             if dist_name in local_proposals:
143 |                 # If proposal distribution is provided, use this
144 |                 local_proposal = local_proposals[dist_name]
145 |                 return local_proposal(self, rv.owner, sample, kwargs)
146 |             else:  
147 |                 # Otherwise fall back to drawing samples from the distribution itself              
148 |                 return rv.owner.outputs[1]
149 |         else:
150 |             raise TypeError('rv not recognized as output of RandomFunction')
151 | 
152 |     #
153 |     # N.B. OTHER METHODS (samplers) ARE INSTALLED HERE BY
154 |     # - register_sampler
155 |     # - rng_register
156 |     #
157 | def register_sampler(dist_name, f):
158 |     """
159 |     Inject a sampling function into RandomStreams for the distribution with name
160 |     f.__name__
161 |     """
162 |     # install an instancemethod on the RandomStreams class
163 |     # that is a shortcut for something like
164 |     # self.sample('uniform', *args, **kwargs)
165 | 
166 |     def sampler(self, *args, **kwargs):
167 |         return self.sample(dist_name, *args, **kwargs)
168 |     setattr(RandomStreams, dist_name, sampler)
169 |     RandomStreams.clobber_symbols.append(dist_name)
170 | 
171 |     if dist_name in samplers:
172 |         # TODO: allow for multiple handlers?
173 |         raise KeyError(dist_name)
174 |     samplers[dist_name] = f
175 |     return f
176 | 
177 | 
178 | def register_lpdf(dist_name, f):
179 |     if dist_name in pdfs:
180 |         # TODO: allow for multiple handlers?
181 |         raise KeyError(dist_name, pdfs[dist_name])
182 |     pdfs[dist_name] = f
183 |     return f
184 | 
185 | 
186 | #TODO: think about what this function is supposed to do??
187 | def register_ml(dist_name, f):
188 |     if dist_name in ml_handlers:
189 |         # TODO: allow for multiple handlers?
190 |         raise KeyError(dist_name, ml_handlers[dist_name])
191 |     ml_handlers[dist_name] = f
192 |     return f
193 | 
194 | 
195 | #TODO: think about what this function is supposed to do??
196 | def register_params(dist_name, f):
197 |     if dist_name in params_handlers:
198 |         # TODO: allow for multiple handlers?
199 |         raise KeyError(dist_name, params_handlers[dist_name])
200 |     params_handlers[dist_name] = f
201 |     return f
202 | 
203 | def register_local_proposal(dist_name, f):
204 |     if dist_name in local_proposals:
205 |         # TODO: allow for multiple handlers?
206 |         raise KeyError(dist_name, local_proposals[dist_name])
207 |     local_proposals[dist_name] = f
208 |     return f
209 | 
210 | 
211 | def rng_register(f):
212 |     if f.__name__.endswith('_sampler'):
213 |         dist_name = f.__name__[:-len('_sampler')]
214 |         return register_sampler(dist_name, f)
215 | 
216 |     elif f.__name__.endswith('_lpdf'):
217 |         dist_name = f.__name__[:-len('_lpdf')]
218 |         return register_lpdf(dist_name, f)
219 | 
220 |     elif f.__name__.endswith('_ml'):
221 |         dist_name = f.__name__[:-len('_ml')]
222 |         return register_ml(dist_name, f)
223 | 
224 |     elif f.__name__.endswith('_params'):
225 |         dist_name = f.__name__[:-len('_params')]
226 |         return register_params(dist_name, f)
227 | 
228 |     elif f.__name__.endswith('_proposal'):
229 |         dist_name = f.__name__[:-len('_proposal')]
230 |         return register_local_proposal(dist_name, f)
231 | 
232 |     else:
233 |         raise ValueError("function name suffix not recognized", f.__name__)
234 | 
235 | 


--------------------------------------------------------------------------------
/montetheano/sample.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Algorithms for drawing samples by MCMC
  3 | 
  4 | """
  5 | import numpy
  6 | import theano
  7 | from theano import tensor
  8 | from for_theano import ancestors, infer_shape, evaluate_with_assignments, evaluate
  9 | from rv import is_raw_rv, full_log_likelihood, lpdf
 10 | 
 11 | 
 12 | # Major TODOs:
 13 | # - RVs should have a non-symbolic shape so the MC states can be allocated
 14 | # - We need to initialize the chains in draw from the independent prior distributions
 15 | # - We need proposal distributions for all RVs from which to draw samples
 16 | # - An additional loop around mh_sample is required
 17 | # - An efficient parallel MC sampler is possible, which might be less decorrelated (or more book-keeping is required)
 18 | # - The HMC sampler needs an outside loop and an additional inner loop for the leap-frog steps
 19 | 
 20 | 
 21 | 
 22 | # Sample the generative model and return "outputs" for cases where "condition" is met.
 23 | # If no condition is given, it just samples from the model
 24 | # The outputs can be a single TheanoVariable or a list of TheanoVariables.
 25 | # The function returns a single sample or a list of samples, depending on "outputs"; and an updates dictionary.
 26 | def rejection_sample(outputs, condition = None):
 27 |     if isinstance(outputs, tensor.TensorVariable):
 28 |         init = [0]
 29 |     else:
 30 |         init = [0]*len(outputs)
 31 |     if condition is None:
 32 |         # TODO: I am just calling scan to get updates, can't I create this myself?
 33 |         # output desired RVs when condition is met
 34 |         def rejection():
 35 |             return outputs
 36 | 
 37 |         samples, updates = theano.scan(rejection, outputs_info = init, n_steps = 1)
 38 |     else:
 39 |         # output desired RVs when condition is met
 40 |         def rejection():
 41 |             return outputs, {}, theano.scan_module.until(condition)
 42 |         samples, updates = theano.scan(rejection, outputs_info = init, n_steps = 1000)
 43 |     if isinstance(samples, tensor.TensorVariable):
 44 |         sample = samples[-1]
 45 |     else:
 46 |         sample = [s[-1] for s in samples]
 47 |     return sample, updates
 48 | 
 49 | def mh_sample(s_rng, outputs, observations = {}):
 50 |     all_vars = ancestors(list(outputs) + list(observations.keys()))
 51 |     
 52 |     for o in observations:
 53 |         assert o in all_vars
 54 |         if not is_raw_rv(o):
 55 |             raise TypeError(o)
 56 | 
 57 |     RVs = [v for v in all_vars if is_raw_rv(v)]
 58 |     free_RVs = [v for v in RVs if v not in observations]
 59 | 
 60 |     # Draw sample from the proposal
 61 |     free_RVs_state = []
 62 |     for v in free_RVs:
 63 |         f = theano.function([], v,
 64 |                 mode=theano.Mode(linker='py', optimizer=None))
 65 |         free_RVs_state.append(theano.shared(f()))
 66 | 
 67 |     log_likelihood = theano.shared(numpy.array(float('-inf')))
 68 | 
 69 |     U = s_rng.uniform(low=0.0, high=1.0)
 70 | 
 71 |     def mcmc(ll, *frvs):
 72 |         proposals = [s_rng.local_proposal(v, rvs) for v, rvs in zip(free_RVs, frvs)]
 73 |         proposals_rev = [s_rng.local_proposal(v, rvs) for v, rvs in zip(free_RVs, proposals)]
 74 | 
 75 |         full_observations = dict(observations)
 76 |         full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, proposals)]))
 77 |         new_log_likelihood = full_log_likelihood(full_observations)
 78 | 
 79 |         logratio = new_log_likelihood - ll \
 80 |             + tensor.add(*[tensor.sum(lpdf(p, r)) for p, r in zip(proposals_rev, frvs)]) \
 81 |             - tensor.add(*[tensor.sum(lpdf(p, r)) for p, r in zip(proposals, proposals)])
 82 |                    
 83 |         accept = tensor.gt(logratio, tensor.log(U))
 84 |         
 85 |         return [tensor.switch(accept, new_log_likelihood, ll)] + \
 86 |                [tensor.switch(accept, p, f) for p, f in zip(proposals, frvs)], \
 87 |                {}, theano.scan_module.until(accept)
 88 | 
 89 |     samples, updates = theano.scan(mcmc, outputs_info = [log_likelihood] + free_RVs_state, n_steps = 100)
 90 |     updates[log_likelihood] = samples[0][-1]
 91 |     updates.update(dict([(f, s[-1]) for f, s in zip(free_RVs_state, samples[1:])]))
 92 |     
 93 |     return [free_RVs_state[free_RVs.index(out)] for out in outputs], log_likelihood, updates
 94 | 
 95 | def hybridmc_sample(s_rng, outputs, observations = {}):
 96 |     # TODO: should there be a size variable here?
 97 |     # TODO: implement lag and burn-in
 98 |     # TODO: implement size
 99 |     """
100 |     Return a dictionary mapping random variables to their sample values.
101 |     """
102 | 
103 |     all_vars = ancestors(list(outputs) + list(observations.keys()))
104 |     
105 |     for o in observations:
106 |         assert o in all_vars
107 |         if not is_raw_rv(o):
108 |             raise TypeError(o)
109 | 
110 |     RVs = [v for v in all_vars if is_raw_rv(v)]
111 | 
112 |     free_RVs = [v for v in RVs if v not in observations]
113 |     
114 |     free_RVs_state = [theano.shared(numpy.ones(shape=infer_shape(v)), broadcastable=tuple(numpy.asarray(infer_shape(v))==1)) for v in free_RVs]
115 |     free_RVs_prop = [s_rng.normal(0, 1, draw_shape=infer_shape(v)) for v in free_RVs]
116 |     
117 |     log_likelihood = theano.shared(numpy.array(float('-inf')))
118 |     
119 |     U = s_rng.uniform(low=0, high=1.0)
120 |     
121 |     epsilon = numpy.sqrt(2*0.03)
122 |     def mcmc(ll, *frvs):
123 |         full_observations = dict(observations)
124 |         full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, frvs)]))
125 |         
126 |         loglik = -full_log_likelihood(full_observations)
127 | 
128 |         proposals = free_RVs_prop
129 |         H = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + loglik
130 | 
131 | # -- this should be an inner loop
132 |         g = []
133 |         g.append(tensor.grad(loglik, frvs))
134 |         
135 |         proposals = [(p - epsilon*gg[0]/2.) for p, gg in zip(proposals, g)]
136 | 
137 |         rvsp = [(rvs + epsilon*rvp) for rvs,rvp in zip(frvs, proposals)]
138 |         
139 |         full_observations = dict(observations)
140 |         full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, rvsp)]))
141 |         new_loglik = -full_log_likelihood(full_observations)
142 |         
143 |         gnew = []
144 |         gnew.append(tensor.grad(new_loglik, rvsp))
145 |         proposals = [(p - epsilon*gn[0]/2.) for p, gn in zip(proposals, gnew)]
146 | # --
147 |         
148 |         Hnew = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + new_loglik
149 | 
150 |         dH = Hnew - H
151 |         accept = tensor.or_(dH < 0., U < tensor.exp(-dH))
152 | 
153 |         return [tensor.switch(accept, -new_loglik, ll)] + \
154 |             [tensor.switch(accept, p, f) for p, f in zip(rvsp, frvs)], \
155 |             {}, theano.scan_module.until(accept)
156 | 
157 |     samples, updates = theano.scan(mcmc, outputs_info = [log_likelihood] + free_RVs_state, n_steps = 10000000)
158 |     
159 |     updates[log_likelihood] = samples[0][-1]
160 |     updates.update(dict([(f, s[-1]) for f, s in zip(free_RVs_state, samples[1:])]))
161 |     
162 |     return [free_RVs_state[free_RVs.index(out)] for out in outputs], log_likelihood, updates
163 | 
164 | def mh2_sample(s_rng, outputs, observations = {}, givens = {}):    
165 |     all_vars = ancestors(list(observations.keys()) + list(outputs))
166 |         
167 |     for o in observations:
168 |         assert o in all_vars
169 |         if not is_raw_rv(o):
170 |             raise TypeError(o)
171 |     
172 |     RVs = [v for v in all_vars if is_raw_rv(v)]
173 |     free_RVs = [v for v in RVs if v not in observations]
174 |     
175 |     free_RVs_state = []
176 |     for v in free_RVs:
177 |         f = theano.function([], v,
178 |                 mode=theano.Mode(linker='py', optimizer=None))
179 |         free_RVs_state.append(theano.shared(f()))
180 |     
181 |     U = s_rng.uniform(low=0.0, high=1.0)
182 |     
183 |     rr = []
184 |     for index in range(len(free_RVs)):
185 |         # TODO: why does the compiler crash when we try to expose the likelihood ?
186 |         full_observations = dict(observations)
187 |         full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, free_RVs_state)]))
188 |         log_likelihood = full_log_likelihood(full_observations)
189 |         
190 |         proposal = s_rng.local_proposal(free_RVs[index], free_RVs_state[index])
191 |         proposal_rev = s_rng.local_proposal(free_RVs[index], proposal)
192 | 
193 |         full_observations = dict(observations)
194 |         full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, free_RVs_state)]))
195 |         full_observations.update(dict([(free_RVs[index], proposal)]))
196 |         new_log_likelihood = full_log_likelihood(full_observations)
197 | 
198 |         bw = tensor.sum(lpdf(proposal_rev, free_RVs_state[index]))
199 |         fw = tensor.sum(lpdf(proposal, proposal))
200 | 
201 |         lr = new_log_likelihood-log_likelihood+bw-fw
202 | 
203 |         accept = tensor.gt(lr, tensor.log(U))
204 | 
205 |         updates = {free_RVs_state[index] : tensor.switch(accept, proposal, free_RVs_state[index])}
206 |         rr.append(theano.function([], [accept], updates=updates, givens=givens))
207 |     
208 |     # TODO: this exacte amount of samples given back is still wrong
209 |     def sampler(nr_samples, burnin = 100, lag = 100):
210 |         data = [[] for o in outputs]
211 |         for i in range(nr_samples*lag+burnin):        
212 |             accept = False
213 |             while not accept:
214 |                 index = numpy.random.randint(len(free_RVs))
215 | 
216 |                 accept = rr[index]()            
217 |                 if accept and i > burnin and (i-burnin) % lag == 0:
218 |                     for d, o in zip(data, outputs):
219 |                         # TODO: this can be optimized
220 |                         if is_raw_rv(o):
221 |                             d.append(free_RVs_state[free_RVs.index(o)].get_value())
222 |                         else:
223 |                             full_observations = dict(observations)
224 |                             full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, free_RVs_state)]))
225 |                             d.append(evaluate(evaluate_with_assignments(o, full_observations), givens=givens))
226 |         data = [numpy.asarray(d).squeeze() for d in data]
227 |         
228 |         return data
229 |     
230 |     return sampler


--------------------------------------------------------------------------------
/montetheano/for_theano.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import numpy
  3 | import theano
  4 | from theano import tensor
  5 | from theano.gof import graph
  6 | 
  7 | def evaluate(var, givens=None):
  8 |     f = theano.function([], var, mode=theano.Mode(linker='py', optimizer=None), givens=givens)
  9 |     return f()
 10 | 
 11 | class memoized(object):
 12 |     def __init__(self, func):
 13 |         self.func = func
 14 |         self.cache = {}
 15 |     def __call__(self, *args):
 16 |         try:
 17 |             return self.cache[args]
 18 |         except KeyError:
 19 |             value = self.func(*args)
 20 |             self.cache[args] = value
 21 |             return value
 22 | 
 23 | def as_variable(thing, type=None):
 24 |     if isinstance(thing, theano.Variable):
 25 |         if type is None or thing.type == type:
 26 |             return thing
 27 |         else:
 28 |             return thing
 29 |             # raise TypeError(thing)
 30 |     if hasattr(thing, 'type'):
 31 |         if type is None or thing.type == type:
 32 |             return thing
 33 |         else:
 34 |             raise TypeError(thing)
 35 |     if type is None:
 36 |         #TODO: why there is no theano.constant??
 37 |         return theano.shared(thing)
 38 |     else:
 39 |         return type.Constant(
 40 |                 type,
 41 |                 type.filter(thing, allow_downcast=True))
 42 | 
 43 | class Bincount(theano.Op):
 44 |     """
 45 |     Map a vector to an integer vector containing the sorted positions of
 46 |     non-zeros in the argument.
 47 |     """
 48 |     #TODO: check to see if numpy.bincount supports minlength argument
 49 |     def __eq__(self, other):
 50 |         return type(self) == type(other)
 51 | 
 52 |     def __hash__(self):
 53 |         return hash(type(self))
 54 | 
 55 |     def make_node(self, x, weights=1, minlength=0):
 56 |         x = tensor.as_tensor_variable(x)
 57 |         weights = tensor.as_tensor_variable(weights)
 58 |         minlength = tensor.as_tensor_variable(minlength)
 59 |         if x.ndim != 1:
 60 |             raise NotImplementedError( x)
 61 |         if 'int' not in str(x.dtype):
 62 |             raise TypeError('bincount requires integer argument x', x)
 63 |         # TODO: check that weights and minlength are ok
 64 |         return theano.gof.Apply(self,
 65 |                 [x, weights, minlength],
 66 |                 [tensor.lvector()])
 67 | 
 68 |     def perform(self, node, inputs, outstorage):
 69 |         x, weights, minlength = inputs
 70 |         if weights == 1:
 71 |             rval = numpy.bincount(x)#, minlength=minlength)
 72 |         else:
 73 |             rval = numpy.bincount(*inputs)
 74 |         if len(rval) < minlength:
 75 |             tmp = numpy.zeros((minlength,), dtype=rval.dtype)
 76 |             tmp[:len(rval)] = rval
 77 |             rval = tmp
 78 |         outstorage[0][0] = rval
 79 |     #XXX: infer_shape
 80 | 
 81 | bincount = Bincount()
 82 | 
 83 | 
 84 | class Where(theano.Op):
 85 |     """
 86 |     Map a vector to an integer vector containing the sorted positions of
 87 |     non-zeros in the argument.
 88 |     """
 89 |     def __eq__(self, other):
 90 |         return type(self) == type(other)
 91 | 
 92 |     def __hash__(self):
 93 |         return hash(type(self))
 94 | 
 95 |     def make_node(self, x):
 96 |         if x.ndim != 1:
 97 |             raise NotImplementedError()
 98 |         return theano.gof.Apply(self,
 99 |                 [x],
100 |                 [tensor.lvector()])
101 | 
102 |     def perform(self, node, inputs, outstorage):
103 |         # Fixed by GWT: ensure output from numpy matches expected output dtype
104 |         # Addresses hyperopt issue #58
105 |         outstorage[0][0] = theano._asarray(
106 |             numpy.where(inputs[0])[0], dtype=node.outputs[0].type.dtype)
107 | 
108 |     #XXX: infer_shape
109 | where = Where()
110 | 
111 | 
112 | class BoolTake(theano.Op):
113 |     """
114 |     Return the equivalent of
115 |     [x[i] for i, j in enumerate(tf) if j]
116 | 
117 |     """
118 | 
119 |     def __hash__(self):
120 |         return hash(type(self))
121 | 
122 |     def __eq__(self, other):
123 |         return type(self) == type(other)
124 | 
125 |     def make_node(self, x, tf):
126 |         x = tensor.as_tensor_variable(x)
127 |         tf = tensor.as_tensor_variable(tf)
128 |         if x.ndim < 1: raise TypeError()
129 |         if x.ndim != 1: raise TypeError()
130 |         if 'int' not in tf.dtype: raise TypeError()
131 |         return theano.gof.Apply(self,
132 |                 [x, tf],
133 |                 [x.type()])
134 | 
135 |     def perform(self, node, inputs, output_storage):
136 |         x, tf = inputs
137 |         xx = x[:len(tf)]
138 |         rval = x[:len(tf)][tf > 0]
139 |         output_storage[0][0] = rval
140 | bool_take = BoolTake()
141 | 
142 | 
143 | class Find(theano.Op):
144 |     """
145 |     Returns positions in `query` where elements of `keepset` occur.
146 | 
147 |     Return the equivalent of
148 |     [i for (i, q) in enumerate(query) if q in keepset]
149 | 
150 |     """
151 | 
152 |     def __hash__(self):
153 |         return hash(type(self))
154 | 
155 |     def __eq__(self, other):
156 |         return type(self) == type(other)
157 | 
158 |     def make_node(self, query, keepset):
159 |         query = tensor.as_tensor_variable(query)
160 |         keepset = tensor.as_tensor_variable(keepset)
161 |         if query.ndim != 1: raise TypeError()
162 |         if keepset.ndim != 1: raise TypeError()
163 |         if 'int' not in query.dtype: raise TypeError()
164 |         if 'int' not in keepset.dtype: raise TypeError()
165 |         return theano.gof.Apply(self,
166 |                 [query, keepset],
167 |                 [keepset.type()])
168 | 
169 |     def perform(self, node, inputs, output_storage):
170 |         query, keepset = inputs
171 |         keepset = set(keepset)
172 |         rval = numpy.asarray(
173 |                 [i for i, e in enumerate(query) if e in keepset],
174 |                 dtype=inputs[1].dtype)
175 |         output_storage[0][0] = rval
176 |     #XXX: infer_shape
177 | find = Find()
178 | 
179 | class Argsort(theano.Op):
180 |     """
181 |     Return the equivalent of numpy.argsort(x)
182 |     """
183 | 
184 |     def __hash__(self):
185 |         return hash(type(self))
186 | 
187 |     def __eq__(self, other):
188 |         return type(self) == type(other)
189 | 
190 |     def make_node(self, x):
191 |         x = tensor.as_tensor_variable(x)
192 |         if x.ndim != 1: raise TypeError()
193 |         if 'complex' in str(x.dtype): raise TypeError()
194 |         return theano.gof.Apply(self, [x], [tensor.lvector()])
195 | 
196 |     def perform(self, node, inputs, output_storage):
197 |         # Fixed by GWT: ensure output from numpy matches expected output dtype
198 |         # Addresses hyperopt issue #58
199 |         output_storage[0][0] = theano._asarray(numpy.argsort(inputs[0]),
200 |                                            dtype=node.outputs[0].type.dtype)
201 | 
202 |     #XXX: infer_shape
203 | argsort = Argsort()
204 | 
205 | def elemwise_cond(*args):
206 |     """Build a nested elemwise if elif ... statement.
207 | 
208 |         elemwise_cond(
209 |             a, cond_a,
210 |             b, cond_b,
211 |             c)
212 | 
213 |     Translates roughly to an elementwise version of this...
214 | 
215 |         if cond_a:
216 |             a
217 |         elif cond_b:
218 |             b
219 |         else:
220 |             c
221 |     """
222 |     assert len(args) % 2, 'need an add number of args'
223 |     if len(args) == 1:
224 |         return args[0]
225 |     else:
226 |         return tensor.switch(
227 |                 args[1],
228 |                 args[0],
229 |                 elemwise_cond(*args[2:]))
230 | 
231 | 
232 | class LazySwitch(theano.gof.op.PureOp):
233 |     """
234 |     lazy_switch(which_case, case0, case1, case2, case3, ...)
235 | 
236 |     """
237 | 
238 |     def __init__(self):
239 |         pass
240 | 
241 |     def __eq__(self, other):
242 |         return type(self) == type(other)
243 | 
244 |     def __hash__(self, other):
245 |         return hash(type(self))
246 | 
247 |     def make_node(self, c, arg0, *args):
248 |         for a in args:
249 |             if a.type != arg0.type:
250 |                 raise TypeError(
251 |                         'Switch requires same type for all cases',
252 |                         (a.type, arg0.type))
253 |         return theano.gof.Apply(self,
254 |                 [c, arg0] + list(args),
255 |                 [a.type()])
256 | 
257 |     def make_thunk(self, node, storage_map, compute_map, no_recycling):
258 |         outtype = node.outputs[0].type
259 |         c = node.inputs[0]
260 |         s_output = node.outputs[0]
261 |         ocontainer = storage_map[s_output]
262 |         def thunk():
263 |             if not compute_map[c][0]:
264 |                 return [0]  # ask to compute c
265 |             else:
266 |                 casenum = storage_map[c][0]
267 |                 argvar = node.inputs[casenum+1]
268 |                 if compute_map[argvar][0]:
269 |                     argval = storage_map[argvar][0]
270 |                     ocontainer[0] = outtype.filter(
271 |                             copy.deepcopy(argval))
272 |                     return []  # computations are done
273 |                 else:
274 |                     # ask to compute the input element we need
275 |                     return [casenum+1]
276 |         thunk.lazy = True
277 |         thunk.inputs  = [storage_map[v] for v in node.inputs]
278 |         thunk.outputs = [storage_map[v] for v in node.outputs]
279 |         return thunk
280 | 
281 | lazy_switch = LazySwitch()
282 | 
283 | 
284 | def ancestors(variable_list, blockers = None):
285 |     """Return the variables that contribute to those in variable_list (inclusive).
286 | 
287 |     :type variable_list: list of `Variable` instances
288 |     :param variable_list:
289 |         output `Variable` instances from which to search backward through owners
290 |     :rtype: list of `Variable` instances
291 |     :returns:
292 |         all input nodes, in the order found by a left-recursive depth-first search
293 |         started at the nodes in `variable_list`.
294 | 
295 |     """
296 |     def expand(r):
297 |         if r.owner and (not blockers or r not in blockers):
298 |             l = list(r.owner.inputs)
299 |             l.reverse()
300 |             return l
301 |     dfs_variables = graph.stack_search(graph.deque(variable_list), expand, 'dfs')
302 |     return dfs_variables
303 | 
304 | 
305 | def clone_keep_replacements(i, o, replacements=None):
306 |     """Duplicate nodes from i -> o inclusive.
307 | 
308 |     i - sequence of variables
309 |     o - sequence of variables
310 |     replacements - dictionary mapping each old node to its new one.
311 |         (this is modified in-place as described in `clone_get_equiv`)
312 | 
313 |     By default new inputs are actually the same as old inputs, but
314 |     when a replacements dictionary is provided this will not generally be the
315 |     case.
316 |     """
317 |     equiv = clone_get_equiv(i, o, replacements)
318 |     return [equiv[input] for input in i], [equiv[output] for output in o]
319 | 
320 | 
321 | def clone_get_equiv(i, o, replacements=None):
322 |     """Duplicate nodes from `i` to `o` inclusive.
323 | 
324 |     Returns replacements dictionary, mapping each old node to its new one.
325 | 
326 |     i - sequence of variables
327 |     o - sequence of variables
328 |     replacements - initial value for return value, modified in place.
329 | 
330 |     """
331 |     if replacements is None:
332 |         d = {}
333 |     else:
334 |         d = replacements
335 | 
336 | #    for old, new in replacements.items():
337 | #        if new in replacements:
338 | #            # I think we want to do something recursive here, but
339 | #            # it feels like it might get tricky? This reminds me of the
340 | #            # 'sorted_givens' branch on github/jaberg/Theano
341 | #            raise NotImplementedError('think before implementing')
342 | #        replacements[new] = new
343 | 
344 |     for input in i:
345 |         if input not in d:
346 |             d[input] = input
347 | 
348 |     for apply in graph.io_toposort(i, o):
349 |         for input in apply.inputs:
350 |             if input not in d:
351 |                 d[input] = input
352 | 
353 |         new_apply = apply.clone_with_new_inputs([d[i] for i in apply.inputs])
354 |         if apply not in d:
355 |             d[apply] = new_apply
356 | 
357 |         for output, new_output in zip(apply.outputs, new_apply.outputs):
358 |             if output not in d:
359 |                 d[output] = new_output
360 | 
361 |     for output in o:
362 |         if output not in d:
363 |             d[output] = output.clone()
364 | 
365 |     return d
366 | 
367 | 
368 | #XXX: rename -> clone_with_assignment
369 | def evaluate_with_assignments(f, assignment):
370 |     dfs_variables = ancestors([f], blockers=assignment.keys())
371 |     frontier = [r for r in dfs_variables
372 |             if r.owner is None or r in assignment.keys()]
373 |     cloned_inputs, cloned_outputs = clone_keep_replacements(frontier, [f],
374 |             replacements=assignment)
375 |     out, = cloned_outputs
376 |     return out
377 | 
378 | 
379 | #
380 | # SHAPE INFERENCE
381 | #
382 | 
383 | # Shape.infer_shape
384 | if not hasattr(theano.tensor.basic.Shape, 'infer_shape'):
385 |     def shape_infer_shape(self, node, ishapes):
386 |         return [(node.inputs[0].ndim,)]
387 |     theano.tensor.basic.Shape.infer_shape = shape_infer_shape
388 | 
389 | # MakeVector.infer_shape
390 | if not hasattr(theano.tensor.opt.MakeVector, 'infer_shape'):
391 |     def makevector_infer_shape(self, node, ishapes):
392 |         return [(len(node.inputs),)]
393 |     theano.tensor.opt.MakeVector.infer_shape = makevector_infer_shape
394 | 
395 | def infer_shape_helper(v, assume_shared_size_fixed):
396 |     if not isinstance(v.type, tensor.TensorType):
397 |         return None
398 | 
399 |     if v.owner:
400 |         if len(v.owner.outputs) > 1:
401 |             output_pos = v.owner.outputs.index(v)
402 |         else:
403 |             output_pos = 0
404 |         ishapes = [infer_shape_helper(i, assume_shared_size_fixed)
405 |                 for i in v.owner.inputs]
406 |         return v.owner.op.infer_shape(v.owner, ishapes)[output_pos]
407 | 
408 | 
409 |     if isinstance(v, theano.Constant):
410 |         return v.data.shape
411 | 
412 |     if isinstance(v, theano.compile.SharedVariable):
413 |         if assume_shared_size_fixed:
414 |             return v.get_value(borrow=True).shape
415 |         else:
416 |             raise ValueError('shared var')
417 | 
418 | def infer_shape(v, assume_shared_size_fixed=True):
419 |     rval = infer_shape_helper(v, assume_shared_size_fixed)
420 |     if None is rval:
421 |         raise TypeError('some ancestor was not a TensorType var')
422 |     def as_int(o):
423 |         if hasattr(o, 'data'):
424 |             return int(o.data)
425 |         elif hasattr(o, 'type'):
426 |             f = theano.function([], o,
427 |                     mode=theano.Mode(linker='py', optimizer=None))
428 |             return f()
429 |         else:
430 |             return int(o)
431 |     return tuple([as_int(r) for r in rval])
432 | 
433 | 
434 | 


--------------------------------------------------------------------------------
/montetheano/test_distributions.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy
  3 | 
  4 | import theano
  5 | from theano import tensor
  6 | 
  7 | from rstreams import RandomStreams
  8 | import distributions
  9 | from sample import rejection_sample, mh_sample, hybridmc_sample, mh2_sample
 10 | from rv import is_rv, is_raw_rv, full_log_likelihood, lpdf
 11 | import for_theano
 12 | from for_theano import evaluate, ancestors, infer_shape, memoized
 13 | 
 14 | import pylab
 15 | 
 16 | def test_dirichlet():
 17 |     R = RandomStreams(234)
 18 |     n = R.dirichlet(alpha=numpy.ones(10,), draw_shape=(5,))
 19 |     
 20 |     f = theano.function([], n)
 21 |     
 22 |     assert f().shape == (5, 10)
 23 | 
 24 | 
 25 | def test_multinomial():
 26 |     R = RandomStreams(234)
 27 |     n = R.multinomial(5, numpy.ones(5,)/5, draw_shape=(2,))
 28 |     
 29 |     f = theano.function([], n)
 30 |     
 31 |     assert f().shape == (2, 5)
 32 | 
 33 | 
 34 | class TestBasicBinomial(unittest.TestCase):
 35 |     def setUp(self):
 36 |         s_rng = self.s_rng = RandomStreams(23424)
 37 |         p = 0.5
 38 |         self.A = s_rng.binomial(1, p)
 39 |         self.B = s_rng.binomial(1, p)
 40 |         self.C = s_rng.binomial(1, p)
 41 |         self.D = self.A+self.B+self.C
 42 |         self.condition = tensor.ge(self.D, 2)
 43 | 
 44 |     def test_rejection_sampler(self):
 45 |         sample, updates = rejection_sample([self.A, self.B, self.C], self.condition)
 46 | 
 47 |         # create a runnable function
 48 |         sampler = theano.function(inputs=[], outputs = sample, updates = updates)
 49 | 
 50 |         # generate some data
 51 |         data = []
 52 |         for i in range(100):
 53 |             data.append(sampler())
 54 | 
 55 |         # plot histogram
 56 |         pylab.hist(numpy.asarray(data))
 57 |         pylab.show()
 58 | 
 59 |     def test_rejection_sampler_no_cond(self):
 60 |         sample, updates = rejection_sample([self.A, self.B, self.C])
 61 | 
 62 |         # create a runnable function
 63 |         sampler = theano.function(inputs=[], outputs = sample, updates = updates)
 64 | 
 65 |         # generate some data
 66 |         data = []
 67 |         for i in range(100):
 68 |             data.append(sampler())
 69 | 
 70 |         # plot histogram
 71 |         pylab.hist(numpy.asarray(data))
 72 |         pylab.show()
 73 | 
 74 | 
 75 | class TestQuantizedLogNormalMixture(unittest.TestCase):
 76 |     def setUp(self):
 77 |         s_rng = self.s_rng = RandomStreams(23424)
 78 |         self.weights = tensor.dvector()
 79 |         self.mus = tensor.dvector()
 80 |         self.sigmas = tensor.dvector()
 81 | 
 82 |     def test_draw_1(self):
 83 |         q = self.s_rng.quantized_lognormal_mixture(
 84 |                 self.weights,
 85 |                 self.mus,
 86 |                 self.sigmas,
 87 |                 step=2)
 88 |         f = theano.function([self.weights, self.mus, self.sigmas],
 89 |                 q)
 90 |         assert f([1.0], [0.0], [0.01]) == 2.0
 91 |         assert f([0.5, 0.5], [0.0, 0.0], [0.01, 0.001]) == 2.0
 92 | 
 93 |     def test_draw_0(self):
 94 |         q = self.s_rng.quantized_lognormal_mixture(
 95 |                 self.weights,
 96 |                 self.mus,
 97 |                 self.sigmas,
 98 |                 step=2,
 99 |                 draw_shape = (0,))
100 |         f = theano.function([self.weights, self.mus, self.sigmas],
101 |                 q)
102 |         assert list(f([1.0], [0.0], [0.01])) == []
103 |         assert list(f([0.5, 0.5], [0.0, 0.0], [0.01, 0.001])) == []
104 | 
105 | 
106 | 
107 | # first example: http://projects.csail.mit.edu/church/wiki/Learning_as_Conditional_Inference
108 | class TestCoin(unittest.TestCase):
109 |     def setUp(self):
110 |         s_rng = self.s_rng = RandomStreams(23424)
111 | 
112 |         self.fair_prior = 0.999
113 |         self.fair_coin = s_rng.binomial(1, self.fair_prior)
114 |         
115 |         make_coin = lambda x: s_rng.binomial((4,), 1, x)    
116 |         self.coin = make_coin(tensor.switch(self.fair_coin > 0.5, 0.5, 0.95))
117 | 
118 |         self.data = tensor.as_tensor_variable([[1, 1, 1, 1]])
119 |         
120 |     def test_tt(self):
121 |         sample, updates = rejection_sample([self.fair_coin,], tensor.eq(tensor.sum(tensor.eq(self.coin, self.data)), 5))
122 |         sampler = theano.function([], sample, updates=updates)
123 |         
124 |         # TODO: this is super-slow, how can bher do this fast?
125 |         for i in range(100):
126 |             print sampler()
127 | 
128 | 
129 | class TestCoin2(): #unittest.TestCase):
130 |     def setUp(self):
131 |         s_rng = self.s_rng = RandomStreams(23424)
132 | 
133 |         self.repetitions = 100        
134 |         self.coin_weight = s_rng.uniform(low=0, high=1)
135 |         self.coin = s_rng.binomial((self.repetitions,), 1, self.coin_weight)
136 |         
137 |     def test_tt(self):
138 |         true_sampler = theano.function([self.coin_weight], self.coin)
139 | 
140 |         sample, ll, updates = mh_sample(self.s_rng, [self.coin_weight])
141 |         sampler = theano.function([self.coin], sample, updates=updates)
142 | 
143 |         for i in range(100):
144 |             print sampler(true_sampler(0.9))
145 | 
146 | 
147 | class TestGMM(unittest.TestCase):
148 |     def setUp(self):
149 |         s_rng = self.s_rng = RandomStreams(23424)
150 | 
151 |         self.p = tensor.scalar()
152 |         self.m1 = tensor.scalar() 
153 |         self.m2 = tensor.scalar() 
154 |         self.v = tensor.scalar() 
155 |         
156 |         self.C = s_rng.binomial(1, p)
157 |         self.m = tensor.switch(self.C, self.m1, self.m2)
158 |         self.D = s_rng.normal(self.m, self.v)        
159 |     
160 |         self.D_data = tensor.as_tensor_variable([1, 1.2, 3, 3.4])
161 |         
162 |     def test_tt(self):
163 |         RVs = dict([(self.D, self.D_data)])
164 |         lik = full_log_likelihood(RVs)
165 |         
166 |         lf = theano.function([self.m1, self.m2, self.C], lik)
167 |         
168 |         print lf(1,3,0)
169 |         print lf(1,3,1)
170 | 
171 |         # EM:
172 |         #     E-step:
173 |         #         C = expectation p(C | data, params)
174 |         #     M-step:
175 |         #         params = argmax p(params | C, data)
176 |         #
177 |         # MCMC (Gibbs):
178 |         #     p(params | data, C)
179 |         #     p(C | data, params)
180 | 
181 | 
182 | class TestHierarchicalNormal(): #unittest.TestCase):
183 |     def setUp(self):
184 |         s_rng = self.s_rng = RandomStreams(23424)
185 |         a = 0.0
186 |         b = 1.0
187 |         c = 1.5
188 |         d = 2.0
189 | 
190 |         self.M = s_rng.normal(a, b)
191 |         self.V = s_rng.normal(c, d)
192 |         self.V_ = abs(self.V) + .1
193 |         self.X = s_rng.normal((4,), self.M, self.V_)
194 | 
195 |         self.X_data = tensor.as_tensor_variable([1, 2, 3, 2.4])
196 | 
197 |     def test_sample_gets_all_rvs(self):
198 |         outs, dct = sample(self.s_rng, [self.X], ())
199 |         assert outs == [self.X]
200 |         assert len(dct) == 3
201 | 
202 |     def test_sample_can_be_generated(self):
203 |         outs, dct = sample(self.s_rng, [self.X], ())
204 |         f = theano.function([], [dct[self.X], dct[self.M],
205 |             dct[self.V.owner.inputs[0]]])
206 |         x0, m0, v0 = f()
207 |         x1, m1, v1 = f()
208 |         assert not numpy.any(x0 == x1)
209 |         assert x0.shape == (4,)
210 |         assert m0.shape == ()
211 |         assert v1.shape == ()
212 |         print x0, m0, v0
213 | 
214 |     def test_likelihood(self):
215 |         outs, obs = sample(self.s_rng, [self.X], ())
216 | 
217 |         lik = likelihood(obs)
218 | 
219 |         f = theano.function([], lik)
220 | 
221 |         print f()
222 | 
223 |     def test_mh_sample(self):
224 |         sample, ll, updates = mh_sample(self.s_rng, [self.M, self.V], observations={self.X: self.X_data}, lag = 100)
225 |         sampler = theano.function([], sample, updates=updates)
226 |         
227 |         data = []
228 |         for i in range(100):
229 |             print i
230 |             data.append(sampler())
231 |         
232 |         pylab.subplot(211)
233 |         pylab.hist(numpy.asarray(data)[:,0])
234 |         pylab.subplot(212)
235 |         pylab.hist(numpy.asarray(data)[:,1])
236 |         pylab.show()
237 | 
238 | 
239 | class Fitting1D(unittest.TestCase):
240 |     def setUp(self):
241 |         self.obs = tensor.as_tensor_variable(
242 |                 numpy.asarray([0.0, 1.01, 0.7, 0.65, 0.3]))
243 |         self.rstream = RandomStreams(234)
244 |         self.n = self.rstream.normal()
245 |         self.u = self.rstream.uniform()
246 | 
247 |     def test_normal_ml(self):
248 |         up = self.rstream.ml(self.n, self.obs)
249 |         p = self.rstream.params(self.n)
250 |         f = theano.function([], [up[p[0]], up[p[1]]])
251 |         m,v = f()
252 |         assert numpy.allclose([m,v], [.532, 0.34856276335])
253 | 
254 |     def test_uniform_ml(self):
255 |         up = self.rstream.ml(self.u, self.obs)
256 |         p = self.rstream.params(self.u)
257 |         f = theano.function([], [up[p[0]], up[p[1]]])
258 |         l,h = f()
259 |         assert numpy.allclose([l,h], [0.0, 1.01])
260 | 
261 | 
262 | class TestHMM(): #unittest.TestCase):
263 |     def setUp(self):
264 |         s_rng = self.s_rng = RandomStreams(23424)
265 | 
266 |         self.nr_states = 5
267 |         self.nr_obs = 3
268 |         
269 |         self.observation_model = memoized(lambda state: s_rng.dirichlet([1]*self.nr_obs))
270 |         self.transition_model = memoized(lambda state: s_rng.dirichlet([1]*self.nr_states))
271 |         
272 |         self.transition = lambda state: s_rng.multinomial(1, self.tranisition_model(state))
273 |         self.observation = lambda state: s_rng.multinomial(1, self.observation_model(state))
274 |         
275 |         def transition(obs, state):
276 |             return [self.observation(state), self.transition(state)] ,{}, until(state == numpy.asarray([0,0,0,0,1])) 
277 |             
278 |         [self.sampled_words, self.sampled_states], updates = scan([], [obs, state])
279 |         
280 |     def test(self):
281 |         print evaluate(self.sample_words([1,0,0,0,0]))
282 | 
283 | 
284 | class TestGMM1(unittest.TestCase):
285 |     def setUp(self):
286 |         R = RandomStreams(234)
287 |         weights = tensor.dvector()
288 |         mus = tensor.dvector()
289 |         sigmas = tensor.dvector()
290 |         draw_shape = tensor.ivector()
291 |         xsca = R.GMM1(weights, mus, sigmas, draw_shape=draw_shape, ndim=0)
292 |         xvec = R.GMM1(weights, mus, sigmas, draw_shape=draw_shape, ndim=1)
293 |         xmat = R.GMM1(weights, mus, sigmas, draw_shape=draw_shape, ndim=2)
294 | 
295 |         self.__dict__.update(locals())
296 |         del self.self
297 | 
298 |     def test1(self):
299 |         assert self.xsca.ndim == 0
300 |         assert self.xvec.ndim == 1
301 |         assert self.xmat.ndim == 2
302 | 
303 |         assert self.xsca.dtype == 'float64'
304 |         assert self.xvec.dtype == 'float64'
305 |         assert self.xmat.dtype == 'float64'
306 | 
307 |     def test_mu_is_used_correctly(self):
308 |         f = theano.function(
309 |                 [self.weights, self.mus, self.sigmas, self.draw_shape],
310 |                 self.xsca)
311 |         assert numpy.allclose(10, f([1], [10.0], [0.0000001], []))
312 | 
313 |     def test_sigma_is_used_correctly(self):
314 |         f = theano.function(
315 |                 [self.weights, self.mus, self.sigmas, self.draw_shape],
316 |                 self.xvec)
317 |         samples = f([1], [0.0], [10.0], [1000])
318 |         assert 9 < numpy.std(samples) < 11
319 | 
320 |     def test_mus_make_variance(self):
321 |         f = theano.function(
322 |                 [self.weights, self.mus, self.sigmas, self.draw_shape],
323 |                 self.xvec)
324 | 
325 |         samples = f([.5, .5], [0.0, 1.0], [0.000001, 0.000001], [1000])
326 |         print samples.shape
327 |         #import matplotlib.pyplot as plt
328 |         #plt.hist(samples)
329 |         #plt.show()
330 |         assert .45 < numpy.mean(samples) < .55, numpy.mean(samples)
331 |         assert .2 < numpy.var(samples) < .3, numpy.var(samples)
332 | 
333 |     def test_weights(self):
334 |         f = theano.function(
335 |                 [self.weights, self.mus, self.sigmas, self.draw_shape],
336 |                 self.xvec)
337 | 
338 |         samples = f([.9999, .0001], [0.0, 1.0], [0.000001, 0.000001], [1000])
339 |         assert samples.shape == (1000,)
340 |         #import matplotlib.pyplot as plt
341 |         #plt.hist(samples)
342 |         #plt.show()
343 |         assert -.001 < numpy.mean(samples) < .001, numpy.mean(samples)
344 |         assert numpy.var(samples) < .0001, numpy.var(samples)
345 | 
346 |     def test_mat_output(self):
347 |         f = theano.function(
348 |                 [self.weights, self.mus, self.sigmas, self.draw_shape],
349 |                 self.xmat)
350 | 
351 |         samples = f([.9999, .0001], [0.0, 1.0], [0.000001, 0.000001], [40, 20])
352 |         assert samples.shape == (40, 20)
353 |         assert -.001 < numpy.mean(samples) < .001, numpy.mean(samples)
354 |         assert numpy.var(samples) < .0001, numpy.var(samples)
355 | 
356 |     def test_lpdf_scalar_one_component(self):
357 |         xval = tensor.dscalar()
358 |         ll = lpdf(self.xsca, xval)
359 |         assert ll.ndim == 0, ll.type
360 |         f = theano.function(
361 |                 [xval, self.weights, self.mus, self.sigmas, self.draw_shape],
362 |                 ll)
363 |         llval = f(1.0, # x
364 |                 [1.],  # weights
365 |                 [1.0], # mu
366 |                 [2.0], # sigma
367 |                 [] # shape
368 |                 )
369 |         assert llval.shape == ()
370 |         assert numpy.allclose(llval,
371 |                 numpy.log(1.0 / numpy.sqrt(2 * numpy.pi * 2.0**2)))
372 | 
373 |     def test_lpdf_scalar_N_components(self):
374 |         xval = tensor.dscalar()
375 |         ll = lpdf(self.xsca, xval)
376 |         assert ll.ndim == 0, ll.type
377 |         f = theano.function(
378 |                 [xval, self.weights, self.mus, self.sigmas, self.draw_shape],
379 |                 ll)
380 |         llval = f(1.0, # x
381 |                 [0.25, 0.25, .5],  # weights
382 |                 [0.0, 1.0, 2.0], # mu
383 |                 [1.0, 2.0, 5.0], # sigma
384 |                 [] # shape
385 |                 )
386 | 
387 |         a = (.25 / numpy.sqrt(2 * numpy.pi * 1.0 ** 2)
388 |                 * numpy.exp(-.5 * (1.0)**2))
389 |         a += (.25 / numpy.sqrt(2 * numpy.pi * 2.0 ** 2))
390 |         a += (.5 /  numpy.sqrt(2 * numpy.pi * 5.0 ** 2)
391 |                 * numpy.exp(-.5 * (1.0 / 5.0) ** 2))
392 | 
393 |     def test_lpdf_vector_N_components(self):
394 |         xval = tensor.dvector()
395 |         ll = lpdf(self.xvec, xval)
396 |         assert ll.ndim == 1, ll.type
397 |         f = theano.function(
398 |                 [xval, self.weights, self.mus, self.sigmas],
399 |                 ll)
400 |         llval = f([1.0, 0.0],     # x
401 |                 [0.25, 0.25, .5], # weights
402 |                 [0.0, 1.0, 2.0],  # mu
403 |                 [1.0, 2.0, 5.0],  # sigma
404 |                 )
405 | 
406 |         # case x = 1.0
407 |         a = (.25 / numpy.sqrt(2 * numpy.pi * 1.0 ** 2)
408 |                 * numpy.exp(-.5 * (1.0)**2))
409 |         a += (.25 / numpy.sqrt(2 * numpy.pi * 2.0 ** 2))
410 |         a += (.5 /  numpy.sqrt(2 * numpy.pi * 5.0 ** 2)
411 |                 * numpy.exp(-.5 * (1.0 / 5.0) ** 2))
412 | 
413 |         assert llval.shape == (2,)
414 |         assert numpy.allclose(llval[0], numpy.log(a))
415 | 
416 | 
417 |         # case x = 0.0
418 |         a = (.25 / numpy.sqrt(2 * numpy.pi * 1.0 ** 2))
419 |         a += (.25 / numpy.sqrt(2 * numpy.pi * 2.0 ** 2)
420 |                 * numpy.exp(-.5 * (1.0 / 2.0) ** 2))
421 |         a += (.5 /  numpy.sqrt(2 * numpy.pi * 5.0 ** 2)
422 |                 * numpy.exp(-.5 * (2.0 / 5.0) ** 2))
423 |         assert numpy.allclose(llval[1], numpy.log(a))
424 | 
425 |     def test_lpdf_matrix_N_components(self):
426 |         xval = tensor.dmatrix()
427 |         ll = lpdf(self.xmat, xval)
428 |         assert ll.ndim == 2, ll.type
429 |         f = theano.function(
430 |                 [xval, self.weights, self.mus, self.sigmas],
431 |                 ll)
432 |         llval = f([[1.0, 0.0, 0.0], [0, 0, 1]], # x
433 |                 [0.25, 0.25, .5],  # weights
434 |                 [0.0, 1.0, 2.0], # mu
435 |                 [1.0, 2.0, 5.0], # sigma
436 |                 )
437 | 
438 |         a = (.25 / numpy.sqrt(2 * numpy.pi * 1.0 ** 2)
439 |                 * numpy.exp(-.5 * (1.0)**2))
440 |         a += (.25 / numpy.sqrt(2 * numpy.pi * 2.0 ** 2))
441 |         a += (.5 /  numpy.sqrt(2 * numpy.pi * 5.0 ** 2)
442 |                 * numpy.exp(-.5 * (1.0 / 5.0) ** 2))
443 | 
444 |         assert llval.shape == (2,3)
445 |         assert numpy.allclose(llval[0,0], numpy.log(a))
446 |         assert numpy.allclose(llval[1,2], numpy.log(a))
447 | 
448 | 
449 |         a = (.25 / numpy.sqrt(2 * numpy.pi * 1.0 ** 2))
450 |         a += (.25 / numpy.sqrt(2 * numpy.pi * 2.0 ** 2)
451 |                 * numpy.exp(-.5 * (1.0 / 2.0)**2))
452 |         a += (.5 /  numpy.sqrt(2 * numpy.pi * 5.0 ** 2)
453 |                 * numpy.exp(-.5 * (2.0 / 5.0) ** 2))
454 | 
455 |         assert numpy.allclose(llval[0,1], numpy.log(a))
456 |         assert numpy.allclose(llval[0,2], numpy.log(a))
457 |         assert numpy.allclose(llval[1,0], numpy.log(a))
458 |         assert numpy.allclose(llval[1,1], numpy.log(a))
459 | 
460 |     # XXX: make sure lpdf calculation includes logsum
461 | 
462 |     if 0:
463 |         def test_illustrate(self):
464 |             f = theano.function(
465 |                     [self.weights, self.mus, self.sigmas, self.draw_shape],
466 |                     self.xvec)
467 | 
468 |             samples = f(#numpy.arange(16)/numpy.arange(16).sum(),
469 |                     numpy.ones(16)/16,
470 |                     numpy.arange(16),
471 |                     #.02 * (numpy.arange(16)+1),
472 |                     .2 * numpy.ones(16),
473 |                     [10000])
474 |             import matplotlib.pyplot as plt
475 |             plt.hist(samples, bins=100)
476 |             plt.show()
477 | 


--------------------------------------------------------------------------------
/montetheano/distributions.py:
--------------------------------------------------------------------------------
   1 | """
   2 | Math for various distributions.
   3 | 
   4 | """
   5 | import __builtin__
   6 | import copy
   7 | import logging
   8 | 
   9 | logger = logging.getLogger(__file__)
  10 | 
  11 | import numpy
  12 | import theano
  13 | import scipy
  14 | import scipy.special
  15 | from theano import tensor
  16 | from for_theano import elemwise_cond, ancestors, infer_shape, evaluate
  17 | from rstreams import rng_register, rv_dist_name
  18 | 
  19 | 
  20 | # TODOs:
  21 | # - Additional distributions of interest:
  22 | #   - Wishart
  23 | #   - Dirichlet process / CRP
  24 | # - REFACTOR: GMM1, BGMM1, lognormal_mixture are largely cut-and-pasted
  25 | 
  26 | # -------
  27 | # Random integer
  28 | # -------
  29 | 
  30 | @rng_register
  31 | def random_integers_sampler(rstream, low=0, high=1, ndim=None, draw_shape=None, dtype=numpy.dtype('int32')):
  32 |     # TODO: this should be only integer, nothing else: check dtype check boundaries
  33 | 
  34 |     low = tensor.as_tensor_variable(low)
  35 |     high = tensor.as_tensor_variable(high)
  36 |         
  37 |     ndim, draw_shape, bcast = tensor.raw_random._infer_ndim_bcast(ndim, draw_shape, low, high)
  38 |     op = tensor.raw_random.RandomFunction('random_integers',
  39 |             tensor.TensorType(dtype=dtype, broadcastable=bcast))
  40 |             
  41 |     rstate = rstream.new_shared_rstate()
  42 |     new_rstate, out = op(rstate, draw_shape, low, high)
  43 |     rstream.add_default_update(out, rstate, new_rstate)
  44 |     return out
  45 | 
  46 | @rng_register
  47 | def random_integers_lpdf(node, sample, kw):
  48 |     rstate, shape, low, high = node.inputs
  49 | 
  50 |     # TODO: Check that sample is integer !
  51 | 
  52 |     rval = elemwise_cond(
  53 |         numpy.array(float('-inf')), sample < low,
  54 |         -tensor.log(high-low+1.), sample <= high,
  55 |         numpy.array(float('-inf')))
  56 |     return rval
  57 | 
  58 | 
  59 | # -------
  60 | # Uniform
  61 | # -------
  62 | 
  63 | @rng_register
  64 | def uniform_sampler(rstream, low=0.0, high=1.0, ndim=None, draw_shape=None,
  65 |         dtype=theano.config.floatX):
  66 |     low = tensor.as_tensor_variable(low)
  67 |     high = tensor.as_tensor_variable(high)
  68 |     rstate = rstream.new_shared_rstate()
  69 | 
  70 |     # James: why is this required? fails in draw_shape is not provided
  71 |     # if isinstance(draw_shape, (list, tuple)):
  72 |     #     draw_shape = tensor.stack(*draw_shape)
  73 | 
  74 |     new_rstate, out = tensor.raw_random.uniform(
  75 |             rstate, draw_shape, low, high, ndim, dtype)
  76 |     rstream.add_default_update(out, rstate, new_rstate)
  77 |     return out
  78 | 
  79 | 
  80 | @rng_register
  81 | def uniform_lpdf(node, sample, kw):
  82 |     rstate, shape, low, high = node.inputs
  83 |     rval = elemwise_cond(
  84 |         numpy.array(float('-inf')), sample < low,
  85 |         -tensor.log(high - low), sample <= high,
  86 |         numpy.array(float('-inf')))
  87 |     return rval
  88 | 
  89 | 
  90 | @rng_register
  91 | def uniform_ml(node, sample, weights):
  92 |     rstate, shape, low, high = node.inputs
  93 |     return Updates({
  94 |         low: sample.min(),
  95 |         high: sample.max()})
  96 | 
  97 | 
  98 | @rng_register
  99 | def uniform_params(node):
 100 |     rstate, shape, low, high = node.inputs
 101 |     return [low, high]
 102 | 
 103 | def uniform_get_low(v):
 104 |     # look in uniform_sampler to see the positions of these things
 105 |     if rv_dist_name(v) == 'uniform':
 106 |         return v.owner.inputs[2]
 107 |     raise ValueError('v is not a uniform draw', v)
 108 | 
 109 | 
 110 | def uniform_get_high(v):
 111 |     # look in uniform_sampler to see the positions of these things
 112 |     if rv_dist_name(v) == 'uniform':
 113 |         return v.owner.inputs[3]
 114 |     raise ValueError('v is not a uniform draw', v)
 115 | 
 116 | 
 117 | # ------
 118 | # Normal
 119 | # ------
 120 | def normal_get_mu(v):
 121 |     # look in uniform_sampler to see the positions of these things
 122 |     if rv_dist_name(v) == 'normal':
 123 |         return v.owner.inputs[2]
 124 |     raise ValueError('v is not a normal draw', v)
 125 | 
 126 | def normal_get_mu(v):
 127 |     # look in uniform_sampler to see the positions of these things
 128 |     if rv_dist_name(v) == 'normal':
 129 |         return v.owner.inputs[2]
 130 |     raise ValueError('v is not a normal draw', v)
 131 | 
 132 | def normal_get_sigma(v):
 133 |     # look in uniform_sampler to see the positions of these things
 134 |     if rv_dist_name(v) == 'normal':
 135 |         return v.owner.inputs[3]
 136 |     raise ValueError('v is not a normal draw', v)
 137 | 
 138 | @rng_register
 139 | def normal_sampler(rstream, mu=0.0, sigma=1.0, draw_shape=None, ndim=None,
 140 |         dtype=None):
 141 |     mu = tensor.as_tensor_variable(mu)
 142 |     sigma = tensor.as_tensor_variable(sigma)
 143 |     rstate = rstream.new_shared_rstate()
 144 | 
 145 |     new_rstate, out = tensor.raw_random.normal(
 146 |             rstate, draw_shape, mu, sigma, dtype=dtype)
 147 |     rstream.add_default_update(out, rstate, new_rstate)
 148 |     return out
 149 | 
 150 | @rng_register
 151 | def normal_lpdf(node, sample, kw):
 152 |     # make sure that the division is done at least with float32 precision
 153 |     one = tensor.as_tensor_variable(numpy.asarray(1, dtype='float32'))
 154 |     rstate, shape, mu, sigma = node.inputs
 155 |     Z = tensor.sqrt(2 * numpy.pi * sigma**2)
 156 |     E = 0.5 * ((mu - sample)/(one*sigma))**2
 157 |     return - E - tensor.log(Z)
 158 | 
 159 | @rng_register
 160 | def normal_ml(node, sample, weights):
 161 |     rstate, shape, mu, sigma = node.inputs
 162 |     eps = 1e-8
 163 |     if weights is None:
 164 |         new_mu = tensor.mean(sample)
 165 |         new_sigma = tensor.std(sample)
 166 | 
 167 |     else:
 168 |         denom = tensor.maximum(tensor.sum(weights), eps)
 169 |         new_mu = tensor.sum(sample*weights) / denom
 170 |         new_sigma = tensor.sqrt(
 171 |                 tensor.sum(weights * (sample - new_mu)**2)
 172 |                 / denom)
 173 |     return Updates({
 174 |         mu: new_mu,
 175 |         sigma: new_sigma})
 176 | 
 177 | @rng_register
 178 | def normal_params(node):
 179 |     rstate, shape, mu, sigma = node.inputs
 180 |     return [mu, sigma]
 181 | 
 182 | @rng_register
 183 | def normal_proposal(rstream, node, sample, kw):
 184 |     # TODO: how do we determine the variance?
 185 |     return rstream.normal(sample, 0.1, draw_shape = infer_shape(node.outputs[1]))
 186 | 
 187 | 
 188 | # ---------
 189 | # Binomial
 190 | # ---------
 191 | 
 192 | @rng_register
 193 | def binomial_sampler(rstream, n=1, p=0.5, ndim=0, draw_shape=None, dtype=theano.config.floatX):
 194 |     if not isinstance(n, theano.Variable):
 195 |         n = tensor.shared(numpy.asarray(n, dtype=int))
 196 |     if not isinstance(p, theano.Variable):
 197 |         p = tensor.shared(numpy.asarray(p, dtype=theano.config.floatX))
 198 |     rstate = rstream.new_shared_rstate()
 199 | 
 200 |     new_rstate, out = tensor.raw_random.binomial(rstate, draw_shape, n, p, dtype=dtype)
 201 |     rstream.add_default_update(out, rstate, new_rstate)
 202 |     return out
 203 | 
 204 | @rng_register
 205 | def binomial_lpdf(node, x, kw):
 206 |     random_state, size, n, p = node.inputs
 207 | 
 208 |     # for the n > 1 the "choose" operation is required
 209 |     # TODO assert n == 1
 210 |     
 211 |     return tensor.switch(tensor.eq(x, 1.), tensor.log(p), tensor.log(1. - p))
 212 | 
 213 | @rng_register
 214 | def binomial_params(node):
 215 |     rstate, shape, n, p = node.inputs
 216 |     return [n, p]
 217 | 
 218 | 
 219 | # ---------
 220 | # Lognormal
 221 | # ---------
 222 | 
 223 | 
 224 | def lognormal_get_mu(v):
 225 |     # look in uniform_sampler to see the positions of these things
 226 |     if rv_dist_name(v) == 'lognormal':
 227 |         return v.owner.inputs[2]
 228 |     raise ValueError('v is not a lognormal draw', v)
 229 | 
 230 | def lognormal_get_sigma(v):
 231 |     # look in uniform_sampler to see the positions of these things
 232 |     if rv_dist_name(v) == 'lognormal':
 233 |         return v.owner.inputs[3]
 234 |     raise ValueError('v is not a lognormal draw', v)
 235 | 
 236 | @rng_register
 237 | def lognormal_sampler(rstream, mu=0.0, sigma=1.0, draw_shape=None, ndim=None, dtype=theano.config.floatX):
 238 |     """
 239 |     Sample from a log-normal distribution centered (in the log domain) on avg
 240 |     with the specified standard deviation (std).
 241 | 
 242 |     If the size argument is ambiguous on the number of dimensions, ndim
 243 |     may be a plain integer to supplement the missing information.
 244 | 
 245 |     If size is None, the output shape will be determined by the shapes
 246 |     of avg and std.
 247 | 
 248 |     If dtype is not specified, it will be inferred from the dtype of
 249 |     avg and std, but will be at least as precise as floatX.
 250 |     """
 251 | 
 252 |     if 'int' in str(dtype):
 253 |         return quantized_lognormal_sampler(rstream, mu, sigma, 1,
 254 |                 draw_shape, ndim, dtype=theano.config.floatX)
 255 | 
 256 |     mu = tensor.as_tensor_variable(mu)
 257 |     sigma = tensor.as_tensor_variable(sigma)
 258 | 
 259 |     if dtype == None:
 260 |         dtype = tensor.scal.upcast(
 261 |                 theano.config.floatX, mu.dtype, sigma.dtype)
 262 |     rstate = rstream.new_shared_rstate()
 263 |     ndim, draw_shape, bcast = tensor.raw_random._infer_ndim_bcast(
 264 |             ndim, draw_shape, mu, sigma)
 265 |     op = tensor.raw_random.RandomFunction('lognormal',
 266 |             tensor.TensorType(dtype=dtype, broadcastable=bcast))
 267 |     new_rstate, out = op(rstate, draw_shape, mu, sigma)
 268 |     rstream.add_default_update(out, rstate, new_rstate)
 269 |     return out
 270 | 
 271 | @rng_register
 272 | def lognormal_lpdf(node, x, kw):
 273 |     r, shape, mu, sigma = node.inputs
 274 |     return lognormal_lpdf_math(x, mu, sigma)
 275 | 
 276 | def lognormal_cdf_math(x, mu, sigma, eps=1e-12):
 277 |     # wikipedia claims cdf is
 278 |     # .5 + .5 erf( log(x) - mu / sqrt(2 sigma^2))
 279 |     #
 280 |     # the maximum is used to move negative values and 0 up to a point
 281 |     # where they do not cause nan or inf, but also don't contribute much
 282 |     # to the cdf.
 283 |     return .5 + .5 * tensor.erf(
 284 |             (tensor.log(tensor.maximum(x, eps)) - mu)
 285 |             / tensor.sqrt(2 * sigma**2))
 286 | 
 287 | def lognormal_lpdf_math(x, mu, sigma, step=1):
 288 |     # formula copied from wikipedia
 289 |     # http://en.wikipedia.org/wiki/Log-normal_distribution
 290 |     Z = sigma * x * numpy.sqrt(2 * numpy.pi)
 291 |     E = 0.5 * ((tensor.log(x) - mu) / sigma)**2
 292 |     return -E - tensor.log(Z)
 293 | 
 294 | 
 295 | # -------------------
 296 | # Quantized Lognormal
 297 | # -------------------
 298 | 
 299 | def quantized_lognormal_get_mu(v):
 300 |     # look in uniform_sampler to see the positions of these things
 301 |     if rv_dist_name(v) == 'quantized_lognormal':
 302 |         return v.owner.inputs[2]
 303 |     raise ValueError('v is not a quantized_lognormal draw', v)
 304 | 
 305 | def quantized_lognormal_get_sigma(v):
 306 |     # look in uniform_sampler to see the positions of these things
 307 |     if rv_dist_name(v) == 'quantized_lognormal':
 308 |         return v.owner.inputs[3]
 309 |     raise ValueError('v is not a quantized_lognormal draw', v)
 310 | 
 311 | def quantized_lognormal_get_round(v):
 312 |     # look in uniform_sampler to see the positions of these things
 313 |     if rv_dist_name(v) == 'quantized_lognormal':
 314 |         return v.owner.inputs[4]
 315 |     raise ValueError('v is not a quantized_lognormal draw', v)
 316 | 
 317 | class QuantizedLognormal(theano.Op):
 318 |     dist_name = 'quantized_lognormal'
 319 | 
 320 |     def __init__(self, otype, destructive=False):
 321 |         self.destructive = destructive
 322 |         self.otype = otype
 323 |         if destructive:
 324 |             self.destroy_map = {0:[0]}
 325 |         else:
 326 |             self.destroy_map = {}
 327 | 
 328 |     def __eq__(self, other):
 329 |         return (type(self) == type(other)
 330 |                 and self.destructive == other.destructive
 331 |                 and self.otype == other.otype)
 332 | 
 333 |     def __hash__(self):
 334 |         return hash((type(self), self.destructive, self.otype))
 335 | 
 336 |     def make_node(self, s_rstate, draw_shape, mu, sigma, step):
 337 |         draw_shape = tensor.as_tensor_variable(draw_shape)
 338 |         mu = tensor.as_tensor_variable(mu)
 339 |         sigma = tensor.as_tensor_variable(sigma)
 340 |         step = tensor.as_tensor_variable(step)
 341 |         return theano.gof.Apply(self,
 342 |                 [s_rstate, draw_shape, mu, sigma, step],
 343 |                 [s_rstate.type(), self.otype()])
 344 | 
 345 |     def perform(self, node, inputs, outstor):
 346 |         rng, shp, mu, sigma, step = inputs
 347 |         if not self.destructive:
 348 |             rng = copy.deepcopy(rng)
 349 |         shp = tuple(shp)
 350 |         sample = rng.lognormal(mean=mu, sigma=sigma, size=shp)
 351 |         sample = numpy.ceil(sample / step) * step
 352 |         assert sample.shape == shp
 353 |         if sample.size: assert sample.min() > 0
 354 |         sample = self.otype.filter(sample, allow_downcast=True)
 355 |         if sample.size: assert sample.min() > 0
 356 |         outstor[0][0] = rng
 357 |         outstor[1][0] = sample
 358 | 
 359 |     def infer_shape(self, node, ishapes):
 360 |         return [None, [node.inputs[1][i] for i in range(self.otype.ndim)]]
 361 | 
 362 | @rng_register
 363 | def quantized_lognormal_sampler(rstream, mu=0.0, sigma=1.0, step=1, draw_shape=None, ndim=None,
 364 |         dtype=theano.config.floatX):
 365 |     """
 366 |     Sample from a quantized log-normal distribution centered on avg with
 367 |     the specified standard deviation (std).
 368 | 
 369 |     If the size argument is ambiguous on the number of dimensions, ndim
 370 |     may be a plain integer to supplement the missing information.
 371 | 
 372 |     If size is None, the output shape will be determined by the shapes
 373 |     of avg and std.
 374 | 
 375 |     If dtype is not specified, it will be inferred from the dtype of
 376 |     avg and std, but will be at least as precise as floatX.
 377 |     """
 378 | 
 379 |     mu = tensor.as_tensor_variable(mu)
 380 |     sigma = tensor.as_tensor_variable(sigma)
 381 |     step = tensor.as_tensor_variable(step)
 382 | 
 383 |     if dtype == None:
 384 |         dtype = tensor.scal.upcast(
 385 |                 theano.config.floatX,
 386 |                 mu.dtype, sigma.dtype, step.dtype)
 387 |     rstate = rstream.new_shared_rstate()
 388 |     ndim, draw_shape, bcast = tensor.raw_random._infer_ndim_bcast(
 389 |             ndim, draw_shape, mu, sigma)
 390 |     op = QuantizedLognormal(
 391 |             otype=tensor.TensorType(dtype=dtype, broadcastable=bcast))
 392 |     new_rstate, out = op(rstate, draw_shape, mu, sigma, step)
 393 |     rstream.add_default_update(out, rstate, new_rstate)
 394 |     return out
 395 | 
 396 | @rng_register
 397 | def quantized_lognormal_lpdf(node, x, kw):
 398 |     r, shape, mu, sigma, step = node.inputs
 399 | 
 400 |     # casting rounds up to nearest step multiple.
 401 |     # so lpdf is log of integral from x-step to x+1 of P(x)
 402 | 
 403 |     # XXX: subtracting two numbers potentially very close together.
 404 |     return tensor.log(
 405 |             lognormal_cdf_math(x, mu, sigma)
 406 |             - lognormal_cdf_math(x - step, mu, sigma))
 407 | 
 408 | 
 409 | # -----------
 410 | # Categorical
 411 | # -----------
 412 | 
 413 | 
 414 | class Categorical(theano.Op):
 415 |     dist_name = 'categorical'
 416 |     def __init__(self, destructive, otype):
 417 |         self.destructive = destructive
 418 |         self.otype = otype
 419 |         if destructive:
 420 |             self.destroy_map = {0:[0]}
 421 |         else:
 422 |             self.destroy_map = {}
 423 | 
 424 |     def __eq__(self, other):
 425 |         return (type(self) == type(other)
 426 |                 and self.destructive == other.destructive
 427 |                 and self.otype == other.otype)
 428 | 
 429 |     def __hash__(self):
 430 |         return hash((type(self), self.destructive, self.otype))
 431 | 
 432 |     def make_node(self, s_rstate, p, draw_shape):
 433 |         p = tensor.as_tensor_variable(p)
 434 |         draw_shape = tensor.as_tensor_variable(draw_shape)
 435 |         return theano.gof.Apply(self,
 436 |                 [s_rstate, p, draw_shape],
 437 |                 [s_rstate.type(), self.otype()])
 438 | 
 439 |     def perform(self, node, inputs, outstor):
 440 |         rng, p, shp = inputs
 441 |         if not self.destructive:
 442 |             rng = copy.deepcopy(rng)
 443 |         n_draws = numpy.prod(shp)
 444 |         sample = rng.multinomial(n=1, pvals=p, size=tuple(shp))
 445 |         assert sample.shape == tuple(shp) + (len(p),)
 446 |         if tuple(shp):
 447 |             rval = numpy.sum(sample * numpy.arange(len(p)), axis=len(shp))
 448 |         else:
 449 |             rval = [numpy.where(rng.multinomial(pvals=p, n=1))[0][0]
 450 |                     for i in xrange(n_draws)]
 451 |             rval = numpy.asarray(rval, dtype=self.otype.dtype)
 452 |         assert (rval.shape == shp).all()
 453 |         #print "categorical drawing samples", rval.shape, (rval==0).sum(), (rval==1).sum()
 454 |         outstor[0][0] = rng
 455 |         outstor[1][0] = self.otype.filter(rval, allow_downcast=True)
 456 | 
 457 |     def infer_shape(self, node, ishapes):
 458 |         return [None, [node.inputs[2][i] for i in range(self.otype.ndim)]]
 459 | 
 460 | 
 461 | @rng_register
 462 | def categorical_sampler(rstream, p, draw_shape, dtype='int32'):
 463 |     if not isinstance(p, theano.Variable):
 464 |         p = tensor._shared(numpy.asarray(p, dtype=theano.config.floatX))
 465 |     if p.ndim != 1:
 466 |         raise NotImplementedError()
 467 |     if draw_shape.ndim != 1:
 468 |         raise TypeError()
 469 |     op = Categorical(False,
 470 |             tensor.TensorType(
 471 |                 broadcastable=(False,)* tensor.get_vector_length(draw_shape),
 472 |                 dtype=dtype))
 473 |     rstate = rstream.new_shared_rstate()
 474 |     new_rstate, out = op(rstate, p, draw_shape)
 475 |     rstream.add_default_update(out, rstate, new_rstate)
 476 |     return out
 477 | 
 478 | 
 479 | @rng_register
 480 | def categorical_lpdf(node, sample, kw):
 481 |     """
 482 |     Return a random integer from 0 .. N-1 inclusive according to the
 483 |     probabilities p[0] .. P[N-1].
 484 | 
 485 |     This is formally equivalent to numpy.where(multinomial(n=1, p))
 486 |     """
 487 |     # WARNING: I think the p[-1] is not used, but assumed to be p[:-1].sum()
 488 |     s_rstate, p, draw_shape = node.inputs
 489 |     return p[sample]
 490 | 
 491 | 
 492 | # ---------
 493 | # LogGamma helper Op
 494 | # ---------
 495 | 
 496 | # class PolyGamma(theano.Op):
 497 | #     def __eq__(self, other):
 498 | #         return type(self) == type(other)
 499 | # 
 500 | #     def __hash__(self):
 501 | #         return hash(type(self))
 502 | # 
 503 | #     def make_node(self, x):
 504 | #         x_ = tensor.as_tensor_variable(x).astype(theano.config.floatX)    
 505 | #         return theano.Apply(self,
 506 | #             inputs=[x_],
 507 | #             outputs=[x_.type()])
 508 | # 
 509 | #     def perform(self, node, inputs, output_storage):      
 510 | #         x, = inputs
 511 | #         output_storage[0][0] = numpy.asarray(scipy.special.polygamma(0, x), dtype=node.outputs[0].dtype)
 512 | #         
 513 | # polyGamma = PolyGamma()
 514 | 
 515 | class LogGamma(theano.Op):
 516 |     def __eq__(self, other):
 517 |         return type(self) == type(other)
 518 | 
 519 |     def __hash__(self):
 520 |         return hash(type(self))
 521 | 
 522 |     def make_node(self, x):
 523 |         x_ = tensor.as_tensor_variable(x).astype(theano.config.floatX)    
 524 |         return theano.Apply(self,
 525 |             inputs=[x_],
 526 |             outputs=[x_.type()])
 527 | 
 528 |     def perform(self, node, inputs, output_storage):      
 529 |         x, = inputs
 530 |         output_storage[0][0] = numpy.asarray(scipy.special.gammaln(x), dtype=node.outputs[0].dtype)
 531 | 
 532 |     # TODO: is this correct ?
 533 |     # def grad(self, inp, grads):
 534 |     #     s, = inp
 535 |     #     dt, = grads        
 536 |     #     return [polyGamma(s)*dt]
 537 | 
 538 | logGamma = LogGamma()
 539 | 
 540 | # ---------
 541 | # Dirichlet
 542 | # ---------
 543 | 
 544 | @rng_register
 545 | def dirichlet_sampler(rstream, alpha, draw_shape=None, ndim=None, dtype=theano.config.floatX):
 546 |     alpha = tensor.as_tensor_variable(alpha)
 547 |     tmp = alpha.T[0].T
 548 | 
 549 |     alpha = tensor.as_tensor_variable(alpha).astype(theano.config.floatX)
 550 |     if dtype == None:
 551 |         dtype = tensor.scal.upcast(theano.config.floatX, alpha.dtype)
 552 | 
 553 |     ndim, draw_shape, bcast = tensor.raw_random._infer_ndim_bcast(ndim, draw_shape, tmp)
 554 |     bcast = bcast+(alpha.type.broadcastable[-1],)
 555 | 
 556 |     op = tensor.raw_random.RandomFunction('dirichlet',
 557 |             tensor.TensorType(dtype=dtype, broadcastable=bcast), ndim_added=1)
 558 | 
 559 |     rstate = rstream.new_shared_rstate()
 560 |     new_rstate, out = op(rstate, draw_shape, alpha)
 561 |     rstream.add_default_update(out, rstate, new_rstate)
 562 |     return out
 563 | 
 564 | def logBeta(alpha):
 565 |     return tensor.sum(logGamma(alpha)) - logGamma(tensor.sum(alpha))
 566 | 
 567 | @rng_register
 568 | def dirichlet_lpdf(node, sample, kw):
 569 |     r, shape, alpha = node.inputs
 570 | 
 571 |     # assert sum(sample) == 1
 572 |     
 573 |     stable = tensor.eq(0, (tensor.sum(alpha <= 0.) + tensor.sum(sample <= 0.)))    
 574 |     ll = -logBeta(alpha) + tensor.sum(tensor.log(sample)*(alpha-1.), axis=0)    
 575 |     return tensor.switch(stable, ll, tensor.as_tensor_variable(float('-inf')))
 576 | 
 577 | # ---------
 578 | # Gamma
 579 | # ---------
 580 | 
 581 | @rng_register
 582 | def gamma_sampler(rstream, k, theta, draw_shape=None, ndim=None, dtype=theano.config.floatX):
 583 |     k = tensor.as_tensor_variable(k)
 584 |     theta = tensor.as_tensor_variable(theta)
 585 |     if dtype == None:
 586 |         dtype = tensor.scal.upcast(theano.config.floatX, k.dtype, theta.dtype)
 587 | 
 588 |     ndim, draw_shape, bcast = tensor.raw_random._infer_ndim_bcast(ndim, draw_shape, k, theta)
 589 |     op = tensor.raw_random.RandomFunction('gamma',
 590 |             tensor.TensorType(dtype=dtype, broadcastable=bcast))
 591 | 
 592 |     rstate = rstream.new_shared_rstate()
 593 |     new_rstate, out = op(rstate, draw_shape, k, theta)
 594 |     rstream.add_default_update(out, rstate, new_rstate)
 595 |     return out
 596 | 
 597 | @rng_register
 598 | def gamma_lpdf(node, x, kw):
 599 |     r, shape, k, theta = node.inputs
 600 | 
 601 |     return tensor.log(x)*(k-1.) - x/theta - tensor.log(theta)*k - logGamma(k)
 602 | 
 603 | # ---------
 604 | # Multinomial
 605 | # ---------
 606 | 
 607 | @rng_register
 608 | def multinomial_sampler(rstream, n=1, p=[0.5, 0.5], draw_shape=None, ndim=None, dtype=theano.config.floatX):
 609 |     if not isinstance(n, theano.Variable):
 610 |         n = tensor.shared(numpy.asarray(n, dtype=int))
 611 |     if not isinstance(p, theano.Variable):
 612 |         p = tensor.shared(numpy.asarray(p, dtype=theano.config.floatX))
 613 |     rstate = rstream.new_shared_rstate()
 614 | 
 615 |     new_rstate, out = tensor.raw_random.multinomial(rstate, draw_shape, n, p, dtype=dtype)
 616 |     rstream.add_default_update(out, rstate, new_rstate)
 617 |     return out
 618 | 
 619 | def logFactorial(x):
 620 |     return logGamma(x+1.)
 621 | 
 622 | @rng_register
 623 | def multinomial_lpdf(node, x, kw):
 624 |     r, shape, n, p = node.inputs
 625 | 
 626 |     # TODO: how do I check this ?
 627 |     # assert n == tensor.sum(x)
 628 |         
 629 |     x = tensor.as_tensor_variable(x).astype(theano.config.floatX)    
 630 |     
 631 |     return logFactorial(n) - tensor.sum(logFactorial(x), axis=1) + tensor.sum(tensor.log(p)*x, axis=1)
 632 | 
 633 | # some weirdness because raw_random uses a helper function
 634 | # TODO: is there a clear way to fix this ?
 635 | @rng_register
 636 | def multinomial_helper_sampler(*args, **kwargs):
 637 |     return multinomial_sampler(*args, **kwargs)
 638 | 
 639 | @rng_register
 640 | def multinomial_helper_lpdf(*args, **kwargs):
 641 |     return multinomial_lpdf(*args, **kwargs)
 642 | 
 643 | # --------------------------------------------------
 644 | # Dirichlet-Multinomial
 645 | #
 646 | # Only the LPDF is implemented, the sampler is bogus. Could be use as an optimization to remove
 647 | # a dirichlet-multinomial to a single op
 648 | # ---------
 649 | 
 650 | class DM(theano.Op):
 651 |     dist_name = 'DM'
 652 |     def __init__(self, otype):
 653 |         self.otype = otype
 654 | 
 655 |     def make_node(self, s_rstate, alpha):
 656 |         alpha = tensor.as_tensor_variable(alpha)
 657 |         return theano.gof.Apply(self,
 658 |                 [s_rstate, alpha],
 659 |                 [s_rstate.type(), self.otype()])
 660 | 
 661 |     def perform(self, node, inputs, output_storage):
 662 |         raise NotImplemented
 663 | 
 664 | @rng_register
 665 | def DM_sampler(rstream, alpha, draw_shape=None, ndim=None, dtype=None):
 666 |     shape = infer_shape(rstream.dirichlet(alpha, draw_shape=draw_shape))
 667 |     rstate = rstream.new_shared_rstate()
 668 |     op = DM(tensor.TensorType(broadcastable=(False,)* tensor.get_vector_length(shape), dtype=theano.config.floatX))
 669 |     rs, out = op(rstate, alpha)
 670 |     rstream.add_default_update(out, rstate, rs)
 671 |     return out
 672 | 
 673 | @rng_register
 674 | def DM_lpdf(node, sample, kw):
 675 |     r, alpha = node.inputs
 676 |     return logBeta(sample + alpha) - logBeta(alpha)
 677 | 
 678 | 
 679 | # --------------------------------
 680 | # Gaussian Mixture Model 1D (GMM1)
 681 | # --------------------------------
 682 | 
 683 | class GMM1(theano.Op):
 684 |     """
 685 |     1-dimensional Gaussian Mixture - distributed random variable
 686 | 
 687 |     weights - vector (M,) of prior mixture component probabilities
 688 |     mus - vector (M, ) of component centers
 689 |     sigmas - vector (M,) of component variances (already squared)
 690 |     """
 691 | 
 692 |     dist_name = 'GMM1'
 693 |     def __init__(self, otype):
 694 |         self.otype = otype
 695 | 
 696 |     def __hash__(self):
 697 |         return hash((type(self), self.otype))
 698 | 
 699 |     def __eq__(self, other):
 700 |         return type(self) == type(other) and self.otype == other.otype
 701 | 
 702 |     def make_node(self, s_rstate, weights, mus, sigmas, draw_shape):
 703 |         weights = tensor.as_tensor_variable(weights)
 704 |         mus = tensor.as_tensor_variable(mus)
 705 |         sigmas = tensor.as_tensor_variable(sigmas)
 706 |         if weights.ndim != 1:
 707 |             raise TypeError('weights', weights)
 708 |         if mus.ndim != 1:
 709 |             raise TypeError('mus', mus)
 710 |         if sigmas.ndim != 1:
 711 |             raise TypeError('sigmas', sigmas)
 712 |         return theano.gof.Apply(self,
 713 |                 [s_rstate, weights, mus, sigmas, draw_shape],
 714 |                 [s_rstate.type(), self.otype()])
 715 | 
 716 |     def perform(self, node, inputs, output_storage):
 717 |         rstate, weights, mus, sigmas, draw_shape = inputs
 718 | 
 719 |         n_samples = numpy.prod(draw_shape)
 720 |         rstate = copy.copy(rstate)
 721 | 
 722 |         active = numpy.argmax(
 723 |                 rstate.multinomial(1, weights, (n_samples,)),
 724 |                 axis=1)
 725 |         assert len(active) == n_samples
 726 |         samples = rstate.normal(loc=mus[active], scale=sigmas[active])
 727 |         samples = numpy.asarray(
 728 |                 numpy.reshape(samples, draw_shape),
 729 |                 dtype=self.otype.dtype)
 730 |         output_storage[0][0] = rstate
 731 |         output_storage[1][0] = samples
 732 | 
 733 |     def infer_shape(self, node, ishapes):
 734 |         rstate, weights, mus, sigmas, draw_shape = node.inputs
 735 |         return [None, [draw_shape[i] for i in range(self.otype.ndim)]]
 736 | 
 737 | @rng_register
 738 | def GMM1_sampler(rstream, weights, mus, sigmas,
 739 |         draw_shape=None, ndim=None, dtype=None):
 740 |     rstate = rstream.new_shared_rstate()
 741 | 
 742 |     # shape prep
 743 |     if draw_shape is None:
 744 |         raise NotImplementedError()
 745 |     elif draw_shape is tensor.as_tensor_variable(draw_shape):
 746 |         shape = draw_shape
 747 |         if ndim is None:
 748 |             ndim = tensor.get_vector_length(shape)
 749 |     else:
 750 |         shape = tensor.hstack(*draw_shape)
 751 |         if ndim is None:
 752 |             ndim = len(draw_shape)
 753 |         assert tensor.get_vector_length(shape) == ndim
 754 | 
 755 |     # XXX: be smarter about inferring broadcastable
 756 |     op = GMM1(
 757 |             tensor.TensorType(
 758 |                 broadcastable=(False,) * ndim,
 759 |                 dtype=theano.config.floatX if dtype is None else dtype))
 760 |     rs, out = op(rstate, weights, mus, sigmas, shape)
 761 |     rstream.add_default_update(out, rstate, rs)
 762 |     return out
 763 | 
 764 | @rng_register
 765 | def GMM1_lpdf(node, sample, kw):
 766 |     r, weights, mus, sigmas, draw_shape = node.inputs
 767 |     assert weights.ndim == 1
 768 |     assert mus.ndim == 1
 769 |     assert sigmas.ndim == 1
 770 |     _sample = sample
 771 |     if sample.ndim != 1:
 772 |         sample = sample.flatten()
 773 | 
 774 |     dist = (sample.dimshuffle(0, 'x') - mus)
 775 |     mahal = ((dist ** 2) / (sigmas ** 2))
 776 |     # POSTCONDITION: mahal.shape == (n_samples, n_components)
 777 | 
 778 |     Z = tensor.sqrt(2 * numpy.pi * sigmas**2)
 779 |     rval = tensor.log(tensor.sum(
 780 |             tensor.exp(-.5 * mahal) * weights / Z,
 781 |             axis=1))
 782 |     if not sample is _sample:
 783 |         rval = rval.reshape(_sample.shape)
 784 |         assert rval.ndim != 1
 785 |     return rval
 786 | 
 787 | 
 788 | # -----------------------------------------
 789 | # Bounded Gaussian Mixture Model 1D (BGMM1)
 790 | # -----------------------------------------
 791 | 
 792 | class BGMM1(theano.Op):
 793 |     """
 794 |     Bounded 1-dimensional Gaussian Mixture - distributed random variable
 795 | 
 796 |     weights - vector (M,) of prior mixture component probabilities
 797 |     mus - vector (M, ) of component centers
 798 |     sigmas - vector (M,) of component variances (already squared)
 799 |     low - scalar
 800 |     high - scalar
 801 | 
 802 |     This density is a Gaussian Mixture model truncated both below (`low`) and
 803 |     above (`high`).
 804 |     """
 805 | 
 806 |     dist_name = 'BGMM1'
 807 |     def __init__(self, otype):
 808 |         self.otype = otype
 809 | 
 810 |     def __hash__(self):
 811 |         return hash((type(self), self.otype))
 812 | 
 813 |     def __eq__(self, other):
 814 |         return type(self) == type(other) and self.otype == other.otype
 815 | 
 816 |     def make_node(self, s_rstate, weights, mus, sigmas, low, high, draw_shape):
 817 |         weights = tensor.as_tensor_variable(weights)
 818 |         mus = tensor.as_tensor_variable(mus)
 819 |         sigmas = tensor.as_tensor_variable(sigmas)
 820 |         low = tensor.as_tensor_variable(low)
 821 |         high = tensor.as_tensor_variable(high)
 822 |         if weights.ndim != 1:
 823 |             raise TypeError('weights', weights)
 824 |         if mus.ndim != 1:
 825 |             raise TypeError('mus', mus)
 826 |         if sigmas.ndim != 1:
 827 |             raise TypeError('sigmas', sigmas)
 828 |         if low.ndim != 0:
 829 |             raise TypeError('low', low)
 830 |         if high.ndim != 0:
 831 |             raise TypeError('low', high)
 832 |         return theano.gof.Apply(self,
 833 |                 [s_rstate, weights, mus, sigmas, low, high, draw_shape],
 834 |                 [s_rstate.type(), self.otype()])
 835 | 
 836 |     def perform(self, node, inputs, output_storage):
 837 |         rstate, weights, mus, sigmas, low, high, draw_shape = inputs
 838 | 
 839 |         n_samples = numpy.prod(draw_shape)
 840 |         n_components = len(weights)
 841 |         rstate = copy.copy(rstate)
 842 | 
 843 |         # rejection sampling, one sample at a time...
 844 |         samples = []
 845 |         while len(samples) < n_samples:
 846 |             active = numpy.argmax(rstate.multinomial(1, weights))
 847 |             draw = rstate.normal(loc=mus[active], scale=sigmas[active])
 848 |             if low < draw < high:
 849 |                 samples.append(draw)
 850 |         samples = numpy.asarray(
 851 |                 numpy.reshape(samples, draw_shape),
 852 |                 dtype=self.otype.dtype)
 853 |         #print "BGMM drawing samples", samples.shape, samples.flatten()[:4]
 854 |         output_storage[0][0] = rstate
 855 |         output_storage[1][0] = samples
 856 | 
 857 |     def infer_shape(self, node, ishapes):
 858 |         rstate, weights, mus, sigmas, low, high, draw_shape = node.inputs
 859 |         return [None, [draw_shape[i] for i in range(self.otype.ndim)]]
 860 | 
 861 | @rng_register
 862 | def BGMM1_sampler(rstream, weights, mus, sigmas, low, high,
 863 |         draw_shape=None, ndim=None, dtype=None):
 864 |     rstate = rstream.new_shared_rstate()
 865 | 
 866 |     # shape prep
 867 |     if draw_shape is None:
 868 |         raise NotImplementedError()
 869 |     elif draw_shape is tensor.as_tensor_variable(draw_shape):
 870 |         shape = draw_shape
 871 |         if ndim is None:
 872 |             ndim = tensor.get_vector_length(shape)
 873 |     else:
 874 |         shape = tensor.hstack(*draw_shape)
 875 |         if ndim is None:
 876 |             ndim = len(draw_shape)
 877 |         assert tensor.get_vector_length(shape) == ndim
 878 | 
 879 |     # XXX: be smarter about inferring broadcastable
 880 |     op = BGMM1(
 881 |             tensor.TensorType(
 882 |                 broadcastable=(False,) * ndim,
 883 |                 dtype=theano.config.floatX if dtype is None else dtype))
 884 |     rs, out = op(rstate, weights, mus, sigmas, low, high, shape)
 885 |     rstream.add_default_update(out, rstate, rs)
 886 |     return out
 887 | 
 888 | @rng_register
 889 | def BGMM1_lpdf(node, sample, kw):
 890 |     r, weights, mus, sigmas, low, high, draw_shape = node.inputs
 891 |     assert weights.ndim == 1
 892 |     assert mus.ndim == 1
 893 |     assert sigmas.ndim == 1
 894 |     _sample = sample
 895 |     if sample.ndim != 1:
 896 |         sample = sample.flatten()
 897 | 
 898 |     erf = theano.tensor.erf
 899 | 
 900 |     effective_weights = 0.5 * weights * (
 901 |             erf((high - mus) / sigmas) - erf((low - mus) / sigmas))
 902 | 
 903 |     dist = (sample.dimshuffle(0, 'x') - mus)
 904 |     mahal = ((dist ** 2) / (sigmas ** 2))
 905 |     # POSTCONDITION: mahal.shape == (n_samples, n_components)
 906 | 
 907 |     Z = tensor.sqrt(2 * numpy.pi * sigmas**2)
 908 |     rval = tensor.log(
 909 |             tensor.sum(
 910 |                 tensor.true_div(
 911 |                     tensor.exp(-.5 * mahal) * weights,
 912 |                     Z * effective_weights.sum()),
 913 |                 axis=1))
 914 |     if not sample is _sample:
 915 |         rval = rval.reshape(_sample.shape)
 916 |         assert rval.ndim != 1
 917 |     return rval
 918 | 
 919 | 
 920 | # -------------------------
 921 | # Mixture of Lognormal (1D)
 922 | # -------------------------
 923 | 
 924 | class LognormalMixture(theano.Op):
 925 |     """
 926 |     1-dimensional Gaussian Mixture - distributed random variable
 927 | 
 928 |     weights - vector (M,) of prior mixture component probabilities
 929 |     mus - vector (M, ) of component centers
 930 |     sigmas - vector (M,) of component variances (already squared)
 931 |     """
 932 | 
 933 |     dist_name = 'lognormal_mixture'
 934 |     def __init__(self, otype):
 935 |         self.otype = otype
 936 | 
 937 |     def __hash__(self):
 938 |         return hash((type(self), self.otype))
 939 | 
 940 |     def __eq__(self, other):
 941 |         return type(self) == type(other) and self.otype == other.otype
 942 | 
 943 |     def make_node(self, s_rstate, weights, mus, sigmas, draw_shape):
 944 |         weights = tensor.as_tensor_variable(weights)
 945 |         mus = tensor.as_tensor_variable(mus)
 946 |         sigmas = tensor.as_tensor_variable(sigmas)
 947 |         if weights.ndim != 1:
 948 |             raise TypeError('weights', weights)
 949 |         if mus.ndim != 1:
 950 |             raise TypeError('mus', mus)
 951 |         if sigmas.ndim != 1:
 952 |             raise TypeError('sigmas', sigmas)
 953 |         return theano.gof.Apply(self,
 954 |                 [s_rstate, weights, mus, sigmas, draw_shape],
 955 |                 [s_rstate.type(), self.otype()])
 956 | 
 957 |     def perform(self, node, inputs, output_storage):
 958 |         rstate, weights, mus, sigmas, draw_shape = inputs
 959 | 
 960 |         n_samples = numpy.prod(draw_shape)
 961 |         n_components = len(weights)
 962 |         rstate = copy.copy(rstate)
 963 | 
 964 |         active = numpy.argmax(
 965 |                 rstate.multinomial(1, weights, (n_samples,)),
 966 |                 axis=1)
 967 |         assert len(active) == n_samples
 968 |         samples = numpy.exp(
 969 |                 rstate.normal(
 970 |                     loc=mus[active],
 971 |                     scale=sigmas[active]))
 972 |         if not numpy.all(numpy.isfinite(samples)):
 973 |             logger.warning('overflow in LognormalMixture')
 974 |             logger.warning('  mu = %s' % str(mus[active]))
 975 |             logger.warning('  sigma = %s' % str(sigmas[active]))
 976 |             logger.warning('  samples = %s' % str(samples))
 977 |         samples = numpy.asarray(
 978 |                 numpy.reshape(samples, draw_shape),
 979 |                 dtype=self.otype.dtype)
 980 |         if not numpy.all(numpy.isfinite(samples)):
 981 |             logger.warning('overflow in LognormalMixture after astype')
 982 |             logger.warning('  mu = %s' % str(mus[active]))
 983 |             logger.warning('  sigma = %s' % str(sigmas[active]))
 984 |             logger.warning('  samples = %s' % str(samples))
 985 |         output_storage[0][0] = rstate
 986 |         output_storage[1][0] = samples
 987 | 
 988 |     def infer_shape(self, node, ishapes):
 989 |         rstate, weights, mus, sigmas, draw_shape = node.inputs
 990 |         return [None, [draw_shape[i] for i in range(self.otype.ndim)]]
 991 | 
 992 | @rng_register
 993 | def lognormal_mixture_sampler(rstream, weights, mus, sigmas,
 994 |         draw_shape=None, ndim=None, dtype=None):
 995 |     rstate = rstream.new_shared_rstate()
 996 |     # shape prep
 997 |     if draw_shape is None:
 998 |         raise NotImplementedError()
 999 |     elif draw_shape is tensor.as_tensor_variable(draw_shape):
1000 |         shape = draw_shape
1001 |         if ndim is None:
1002 |             ndim = tensor.get_vector_length(shape)
1003 |     else:
1004 |         shape = tensor.hstack(*draw_shape)
1005 |         if ndim is None:
1006 |             ndim = len(draw_shape)
1007 |         assert tensor.get_vector_length(shape) == ndim
1008 | 
1009 |     # XXX: be smarter about inferring broadcastable
1010 |     op = LognormalMixture(
1011 |             tensor.TensorType(
1012 |                 broadcastable=(False,) * ndim,
1013 |                 dtype=theano.config.floatX if dtype is None else dtype))
1014 |     rs, out = op(rstate, weights, mus, sigmas, shape)
1015 |     rstream.add_default_update(out, rstate, rs)
1016 |     return out
1017 | 
1018 | @rng_register
1019 | def lognormal_mixture_lpdf(node, sample, kw):
1020 |     r, weights, mus, sigmas, draw_shape = node.inputs
1021 |     assert weights.ndim == 1
1022 |     assert mus.ndim == 1
1023 |     assert sigmas.ndim == 1
1024 |     _sample = sample
1025 |     if sample.ndim != 1:
1026 |         sample = sample.flatten()
1027 | 
1028 |     # compute the lpdf of each sample under each component
1029 |     lpdfs = lognormal_lpdf_math(sample.dimshuffle(0, 'x'), mus, sigmas)
1030 |     assert lpdfs.ndim == 2
1031 | 
1032 |     # XXX: Make sure this is done in a numerically good way
1033 |     rval = tensor.log(
1034 |             tensor.sum(
1035 |                 tensor.exp(lpdfs) * weights,
1036 |                 axis=1))
1037 | 
1038 |     if not sample is _sample:
1039 |         rval = rval.reshape(_sample.shape)
1040 |         assert rval.ndim != 1
1041 |     return rval
1042 | 
1043 | 
1044 | # -------------------------
1045 | # Mixture of Lognormal (1D)
1046 | # -------------------------
1047 | 
1048 | class QuantizedLognormalMixture(theano.Op):
1049 |     """
1050 |     1-dimensional Gaussian Mixture - distributed random variable
1051 | 
1052 |     weights - vector (M,) of prior mixture component probabilities
1053 |     mus - vector (M, ) of component centers
1054 |     sigmas - vector (M,) of component variances (already squared)
1055 |     """
1056 | 
1057 |     dist_name = 'quantized_lognormal_mixture'
1058 |     def __init__(self, otype):
1059 |         self.otype = otype
1060 | 
1061 |     def __hash__(self):
1062 |         return hash((type(self), self.otype))
1063 | 
1064 |     def __eq__(self, other):
1065 |         return type(self) == type(other) and self.otype == other.otype
1066 | 
1067 |     def make_node(self, s_rstate, draw_shape, weights, mus, sigmas, step):
1068 |         weights = tensor.as_tensor_variable(weights)
1069 |         mus = tensor.as_tensor_variable(mus)
1070 |         sigmas = tensor.as_tensor_variable(sigmas)
1071 |         step = tensor.as_tensor_variable(step)
1072 |         if weights.ndim != 1:
1073 |             raise TypeError('weights', weights)
1074 |         if mus.ndim != 1:
1075 |             raise TypeError('mus', mus)
1076 |         if sigmas.ndim != 1:
1077 |             raise TypeError('sigmas', sigmas)
1078 |         if step.ndim != 0:
1079 |             raise TypeError('step', step)
1080 |         return theano.gof.Apply(self,
1081 |                 [s_rstate, draw_shape, weights, mus, sigmas, step],
1082 |                 [s_rstate.type(), self.otype()])
1083 | 
1084 |     def perform(self, node, inputs, output_storage):
1085 |         rstate, draw_shape, weights, mus, sigmas, step = inputs
1086 | 
1087 |         if len(weights) != len(mus):
1088 |             raise ValueError('length mismatch between weights and mus',
1089 |                     (weights.shape, mus.shape))
1090 |         if len(weights) != len(sigmas):
1091 |             raise ValueError('length mismatch between weights and sigmas',
1092 |                     (weights.shape, sigmas.shape))
1093 |         if len(weights) == 0:
1094 |             raise ValueError('length of weights vector must be positive',
1095 |                     weights.shape)
1096 | 
1097 |         n_samples = numpy.prod(draw_shape)
1098 |         n_components = len(weights)
1099 |         # XXX: add destructive version
1100 |         rstate = copy.copy(rstate)
1101 | 
1102 |         if n_samples == 0:
1103 |             samples = numpy.empty((0,), dtype=node.outputs[1].dtype)
1104 |         elif n_samples == 1:
1105 |             active = numpy.argmax(rstate.multinomial(1, weights))
1106 |             samples = rstate.lognormal(
1107 |                         mean=mus[active],
1108 |                         sigma=sigmas[active])
1109 |             samples = numpy.asarray(numpy.ceil(samples / step) * step)
1110 |             assert samples.ndim == 0
1111 |             if len(draw_shape) == 0:
1112 |                 samples.shape = ()
1113 |             else:
1114 |                 samples.shape = (1,)
1115 |         else:
1116 |             active = numpy.argmax(
1117 |                     rstate.multinomial(1, weights, (n_samples,)),
1118 |                     axis=1)
1119 |             assert len(active) == n_samples
1120 |             samples = rstate.lognormal(
1121 |                         mean=mus[active],
1122 |                         sigma=sigmas[active])
1123 |             assert len(samples) == n_samples
1124 |             samples = numpy.ceil(samples / step) * step
1125 |             samples.shape = tuple(draw_shape)
1126 | 
1127 |         if not numpy.all(numpy.isfinite(samples)):
1128 |             logger.warning('overflow in LognormalMixture after astype')
1129 |             logger.warning('  mu = %s' % str(mus[active]))
1130 |             logger.warning('  sigma = %s' % str(sigmas[active]))
1131 |             logger.warning('  samples = %s' % str(samples))
1132 | 
1133 |         if samples.size:
1134 |             assert samples.min() > 0
1135 |         samples = self.otype.filter(samples, allow_downcast=True)
1136 |         if samples.size:
1137 |             assert samples.min() > 0
1138 | 
1139 |         output_storage[0][0] = rstate
1140 |         output_storage[1][0] = samples
1141 | 
1142 |     def infer_shape(self, node, ishapes):
1143 |         rstate, draw_shape, weights, mus, sigmas, step = node.inputs
1144 |         return [None, [draw_shape[i] for i in range(self.otype.ndim)]]
1145 | 
1146 | @rng_register
1147 | def quantized_lognormal_mixture_sampler(rstream, weights, mus, sigmas, step,
1148 |         draw_shape=None, ndim=None, dtype=None):
1149 |     rstate = rstream.new_shared_rstate()
1150 |     # shape prep
1151 |     if draw_shape is None:
1152 |         raise NotImplementedError()
1153 |     elif draw_shape is tensor.as_tensor_variable(draw_shape):
1154 |         shape = draw_shape
1155 |         if ndim is None:
1156 |             ndim = tensor.get_vector_length(shape)
1157 |     elif tuple(draw_shape) == ():
1158 |         ndim = 0
1159 |         shape = tensor.as_tensor_variable(
1160 |                 numpy.asarray([], dtype='int'))
1161 |     else:
1162 |         shape = tensor.stack(*draw_shape)
1163 |         if ndim is None:
1164 |             ndim = len(draw_shape)
1165 |         assert tensor.get_vector_length(shape) == ndim
1166 | 
1167 |     # XXX: be smarter about inferring broadcastable
1168 |     op = QuantizedLognormalMixture(
1169 |             tensor.TensorType(
1170 |                 broadcastable=(False,) * ndim,
1171 |                 dtype=theano.config.floatX if dtype is None else dtype))
1172 |     rs, out = op(rstate, shape, weights, mus, sigmas, step)
1173 |     rstream.add_default_update(out, rstate, rs)
1174 |     return out
1175 | 
1176 | @rng_register
1177 | def quantized_lognormal_mixture_lpdf(node, sample, kw):
1178 |     r, draw_shape, weights, mus, sigmas, step = node.inputs
1179 |     assert weights.ndim == 1
1180 |     assert mus.ndim == 1
1181 |     assert sigmas.ndim == 1
1182 |     assert step.ndim == 0
1183 |     _sample = sample
1184 |     if sample.ndim != 1:
1185 |         sample = sample.flatten()
1186 | 
1187 |     # compute the lpdf of each sample under each component
1188 |     lpdfs = tensor.log(
1189 |             lognormal_cdf_math(
1190 |                 sample.dimshuffle(0, 'x'),
1191 |                 mus,
1192 |                 sigmas)
1193 |             - lognormal_cdf_math(
1194 |                 sample.dimshuffle(0, 'x') - step,
1195 |                 mus,
1196 |                 sigmas)
1197 |             + 1.0e-7)
1198 |     assert lpdfs.ndim == 2
1199 |     # XXX: Make sure this is done in a numerically good way
1200 |     rval = tensor.log(
1201 |             tensor.sum(
1202 |                 tensor.exp(lpdfs) * weights,
1203 |                 axis=1))
1204 |     if not sample is _sample:
1205 |         rval = rval.reshape(_sample.shape)
1206 |         assert rval.ndim != 1
1207 |     return rval
1208 | 


--------------------------------------------------------------------------------