├── README
├── data
    └── faithful.txt
├── setup.py
└── src
    ├── __init__.py
    ├── gmm.py
    ├── normal.py
    ├── plot_gmm.py
    ├── plot_normal.py
    ├── randcov.py
    ├── test_func.py
    ├── test_gmm.py
    └── test_normal.py


/README:
--------------------------------------------------------------------------------
 1 | Gaussian Mixture Models in Python
 2 | 
 3 | Author: Jeremy Stober
 4 | Contact: stober@gmail.com
 5 | Version: 0.01
 6 | 
 7 | This is a standalone Pythonic implementation of Gaussian Mixture
 8 | Models. Various initialization strategies are included along with a
 9 | standard EM algorithm for determining the model parameters based on
10 | data.
11 | 
12 | Example code for the GMM and Normal classes can be found in the
13 | src/test_*.py files. The GMM and the underlying Normal class both
14 | support conditioning on data and marginalization for any subset of the
15 | variables. This makes this implementation ideal for experimenting with
16 | Gaussian Mixture Regression. For example, the following code learns
17 | the cosine function:
18 | 
19 | 
20 | import numpy as np
21 | from gmm import GMM
22 | from plot_gmm import draw2dgmm
23 | from test_func import noisy_cosine
24 | import pylab as pl
25 | 
26 | x,y = noisy_cosine()
27 | data = np.vstack([x,y]).transpose()
28 | pl.scatter(data[:,0],data[:,1])
29 | 
30 | gmm = GMM(dim = 2, ncomps = 2, data = data, method = "kmeans")
31 | draw2dgmm(gmm)
32 | 
33 | nx = np.arange(0,2 * np.pi, 0.1)
34 | ny = []
35 | for i in nx:
36 |     ngmm = gmm.condition([0],[i])
37 |     ny.append(ngmm.mean()) 
38 | 
39 | pl.plot(nx,ny,color='red')
40 | pl.show()
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/data/faithful.txt:
--------------------------------------------------------------------------------
  1 | 3.600000 79.000000
  2 | 1.800000 54.000000
  3 | 3.333000 74.000000
  4 | 2.283000 62.000000
  5 | 4.533000 85.000000
  6 | 2.883000 55.000000
  7 | 4.700000 88.000000
  8 | 3.600000 85.000000
  9 | 1.950000 51.000000
 10 | 4.350000 85.000000
 11 | 1.833000 54.000000
 12 | 3.917000 84.000000
 13 | 4.200000 78.000000
 14 | 1.750000 47.000000
 15 | 4.700000 83.000000
 16 | 2.167000 52.000000
 17 | 1.750000 62.000000
 18 | 4.800000 84.000000
 19 | 1.600000 52.000000
 20 | 4.250000 79.000000
 21 | 1.800000 51.000000
 22 | 1.750000 47.000000
 23 | 3.450000 78.000000
 24 | 3.067000 69.000000
 25 | 4.533000 74.000000
 26 | 3.600000 83.000000
 27 | 1.967000 55.000000
 28 | 4.083000 76.000000
 29 | 3.850000 78.000000
 30 | 4.433000 79.000000
 31 | 4.300000 73.000000
 32 | 4.467000 77.000000
 33 | 3.367000 66.000000
 34 | 4.033000 80.000000
 35 | 3.833000 74.000000
 36 | 2.017000 52.000000
 37 | 1.867000 48.000000
 38 | 4.833000 80.000000
 39 | 1.833000 59.000000
 40 | 4.783000 90.000000
 41 | 4.350000 80.000000
 42 | 1.883000 58.000000
 43 | 4.567000 84.000000
 44 | 1.750000 58.000000
 45 | 4.533000 73.000000
 46 | 3.317000 83.000000
 47 | 3.833000 64.000000
 48 | 2.100000 53.000000
 49 | 4.633000 82.000000
 50 | 2.000000 59.000000
 51 | 4.800000 75.000000
 52 | 4.716000 90.000000
 53 | 1.833000 54.000000
 54 | 4.833000 80.000000
 55 | 1.733000 54.000000
 56 | 4.883000 83.000000
 57 | 3.717000 71.000000
 58 | 1.667000 64.000000
 59 | 4.567000 77.000000
 60 | 4.317000 81.000000
 61 | 2.233000 59.000000
 62 | 4.500000 84.000000
 63 | 1.750000 48.000000
 64 | 4.800000 82.000000
 65 | 1.817000 60.000000
 66 | 4.400000 92.000000
 67 | 4.167000 78.000000
 68 | 4.700000 78.000000
 69 | 2.067000 65.000000
 70 | 4.700000 73.000000
 71 | 4.033000 82.000000
 72 | 1.967000 56.000000
 73 | 4.500000 79.000000
 74 | 4.000000 71.000000
 75 | 1.983000 62.000000
 76 | 5.067000 76.000000
 77 | 2.017000 60.000000
 78 | 4.567000 78.000000
 79 | 3.883000 76.000000
 80 | 3.600000 83.000000
 81 | 4.133000 75.000000
 82 | 4.333000 82.000000
 83 | 4.100000 70.000000
 84 | 2.633000 65.000000
 85 | 4.067000 73.000000
 86 | 4.933000 88.000000
 87 | 3.950000 76.000000
 88 | 4.517000 80.000000
 89 | 2.167000 48.000000
 90 | 4.000000 86.000000
 91 | 2.200000 60.000000
 92 | 4.333000 90.000000
 93 | 1.867000 50.000000
 94 | 4.817000 78.000000
 95 | 1.833000 63.000000
 96 | 4.300000 72.000000
 97 | 4.667000 84.000000
 98 | 3.750000 75.000000
 99 | 1.867000 51.000000
100 | 4.900000 82.000000
101 | 2.483000 62.000000
102 | 4.367000 88.000000
103 | 2.100000 49.000000
104 | 4.500000 83.000000
105 | 4.050000 81.000000
106 | 1.867000 47.000000
107 | 4.700000 84.000000
108 | 1.783000 52.000000
109 | 4.850000 86.000000
110 | 3.683000 81.000000
111 | 4.733000 75.000000
112 | 2.300000 59.000000
113 | 4.900000 89.000000
114 | 4.417000 79.000000
115 | 1.700000 59.000000
116 | 4.633000 81.000000
117 | 2.317000 50.000000
118 | 4.600000 85.000000
119 | 1.817000 59.000000
120 | 4.417000 87.000000
121 | 2.617000 53.000000
122 | 4.067000 69.000000
123 | 4.250000 77.000000
124 | 1.967000 56.000000
125 | 4.600000 88.000000
126 | 3.767000 81.000000
127 | 1.917000 45.000000
128 | 4.500000 82.000000
129 | 2.267000 55.000000
130 | 4.650000 90.000000
131 | 1.867000 45.000000
132 | 4.167000 83.000000
133 | 2.800000 56.000000
134 | 4.333000 89.000000
135 | 1.833000 46.000000
136 | 4.383000 82.000000
137 | 1.883000 51.000000
138 | 4.933000 86.000000
139 | 2.033000 53.000000
140 | 3.733000 79.000000
141 | 4.233000 81.000000
142 | 2.233000 60.000000
143 | 4.533000 82.000000
144 | 4.817000 77.000000
145 | 4.333000 76.000000
146 | 1.983000 59.000000
147 | 4.633000 80.000000
148 | 2.017000 49.000000
149 | 5.100000 96.000000
150 | 1.800000 53.000000
151 | 5.033000 77.000000
152 | 4.000000 77.000000
153 | 2.400000 65.000000
154 | 4.600000 81.000000
155 | 3.567000 71.000000
156 | 4.000000 70.000000
157 | 4.500000 81.000000
158 | 4.083000 93.000000
159 | 1.800000 53.000000
160 | 3.967000 89.000000
161 | 2.200000 45.000000
162 | 4.150000 86.000000
163 | 2.000000 58.000000
164 | 3.833000 78.000000
165 | 3.500000 66.000000
166 | 4.583000 76.000000
167 | 2.367000 63.000000
168 | 5.000000 88.000000
169 | 1.933000 52.000000
170 | 4.617000 93.000000
171 | 1.917000 49.000000
172 | 2.083000 57.000000
173 | 4.583000 77.000000
174 | 3.333000 68.000000
175 | 4.167000 81.000000
176 | 4.333000 81.000000
177 | 4.500000 73.000000
178 | 2.417000 50.000000
179 | 4.000000 85.000000
180 | 4.167000 74.000000
181 | 1.883000 55.000000
182 | 4.583000 77.000000
183 | 4.250000 83.000000
184 | 3.767000 83.000000
185 | 2.033000 51.000000
186 | 4.433000 78.000000
187 | 4.083000 84.000000
188 | 1.833000 46.000000
189 | 4.417000 83.000000
190 | 2.183000 55.000000
191 | 4.800000 81.000000
192 | 1.833000 57.000000
193 | 4.800000 76.000000
194 | 4.100000 84.000000
195 | 3.966000 77.000000
196 | 4.233000 81.000000
197 | 3.500000 87.000000
198 | 4.366000 77.000000
199 | 2.250000 51.000000
200 | 4.667000 78.000000
201 | 2.100000 60.000000
202 | 4.350000 82.000000
203 | 4.133000 91.000000
204 | 1.867000 53.000000
205 | 4.600000 78.000000
206 | 1.783000 46.000000
207 | 4.367000 77.000000
208 | 3.850000 84.000000
209 | 1.933000 49.000000
210 | 4.500000 83.000000
211 | 2.383000 71.000000
212 | 4.700000 80.000000
213 | 1.867000 49.000000
214 | 3.833000 75.000000
215 | 3.417000 64.000000
216 | 4.233000 76.000000
217 | 2.400000 53.000000
218 | 4.800000 94.000000
219 | 2.000000 55.000000
220 | 4.150000 76.000000
221 | 1.867000 50.000000
222 | 4.267000 82.000000
223 | 1.750000 54.000000
224 | 4.483000 75.000000
225 | 4.000000 78.000000
226 | 4.117000 79.000000
227 | 4.083000 78.000000
228 | 4.267000 78.000000
229 | 3.917000 70.000000
230 | 4.550000 79.000000
231 | 4.083000 70.000000
232 | 2.417000 54.000000
233 | 4.183000 86.000000
234 | 2.217000 50.000000
235 | 4.450000 90.000000
236 | 1.883000 54.000000
237 | 1.850000 54.000000
238 | 4.283000 77.000000
239 | 3.950000 79.000000
240 | 2.333000 64.000000
241 | 4.150000 75.000000
242 | 2.350000 47.000000
243 | 4.933000 86.000000
244 | 2.900000 63.000000
245 | 4.583000 85.000000
246 | 3.833000 82.000000
247 | 2.083000 57.000000
248 | 4.367000 82.000000
249 | 2.133000 67.000000
250 | 4.350000 74.000000
251 | 2.200000 54.000000
252 | 4.450000 83.000000
253 | 3.567000 73.000000
254 | 4.500000 73.000000
255 | 4.150000 88.000000
256 | 3.817000 80.000000
257 | 3.917000 71.000000
258 | 4.450000 83.000000
259 | 2.000000 56.000000
260 | 4.283000 79.000000
261 | 4.767000 78.000000
262 | 4.533000 84.000000
263 | 1.850000 58.000000
264 | 4.250000 83.000000
265 | 1.983000 43.000000
266 | 2.250000 60.000000
267 | 4.750000 75.000000
268 | 4.117000 81.000000
269 | 2.150000 46.000000
270 | 4.417000 90.000000
271 | 1.817000 46.000000
272 | 4.467000 74.000000
273 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | """
 3 | @author: stober
 4 | """
 5 | 
 6 | 
 7 | from distutils.core import setup
 8 | 
 9 | setup(name='gmm',
10 |       version='0.1',
11 |       description='Gaussian Mixture Models',
12 |       author='Jeremy Stober',
13 |       author_email='stober@gmail.com',
14 |       package_dir={'gmm':'src'},
15 |       packages=['gmm'],
16 |       )
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | """
 3 | Author: Jeremy M. Stober
 4 | Program: __INIT__.PY
 5 | Date: Wednesday, May 23 2012
 6 | """
 7 | 
 8 | 
 9 | from normal import Normal
10 | from gmm import GMM, shownormal
11 | 
12 | 


--------------------------------------------------------------------------------
/src/gmm.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | """
  3 | Author: Jeremy M. Stober
  4 | Program: GMM.PY
  5 | Date: Friday, June 24 2011/Volumes/NO NAME/seds/nodes/gmm.py
  6 | Description: A python class for creating and manipulating GMMs.
  7 | """
  8 | 
  9 | import scipy.cluster.vq as vq
 10 | import numpy as np
 11 | import numpy.linalg as la
 12 | import numpy.random as npr
 13 | import random as pr
 14 | npa = np.array
 15 | 
 16 | import sys; sys.path.append('.')
 17 | import pdb
 18 | 
 19 | #import matplotlib
 20 | import pylab
 21 | from normal import Normal
 22 | 
 23 | class GMM(object):
 24 | 
 25 |     def __init__(self, dim = None, ncomps = None, data = None,  method = None, filename = None, params = None):
 26 | 
 27 |         if not filename is None:  # load from file
 28 |             self.load_model(filename)
 29 | 
 30 |         elif not params is None: # initialize with parameters directly
 31 |             self.comps = params['comps']
 32 |             self.ncomps = params['ncomps']
 33 |             self.dim = params['dim']
 34 |             self.priors = params['priors']
 35 | 
 36 |         elif not data is None: # initialize from data
 37 | 
 38 |             assert dim and ncomps, "Need to define dim and ncomps."
 39 | 
 40 |             self.dim = dim
 41 |             self.ncomps = ncomps
 42 |             self.comps = []
 43 | 
 44 |             if method is "uniform":
 45 |                 # uniformly assign data points to components then estimate the parameters
 46 |                 npr.shuffle(data)
 47 |                 n = len(data)
 48 |                 s = n / ncomps
 49 |                 for i in range(ncomps):
 50 |                     self.comps.append(Normal(dim, data = data[i * s: (i+1) * s]))
 51 | 
 52 |                 self.priors = np.ones(ncomps, dtype = "double") / ncomps
 53 | 
 54 |             elif method is "random":
 55 |                 # choose ncomp points from data randomly then estimate the parameters
 56 |                 mus = pr.sample(data,ncomps)
 57 |                 clusters = [[] for i in range(ncomps)]
 58 |                 for d in data:
 59 |                     i = np.argmin([la.norm(d - m) for m in mus])
 60 |                     clusters[i].append(d)
 61 | 
 62 |                 for i in range(ncomps):
 63 |                     print mus[i], clusters[i]
 64 |                     self.comps.append(Normal(dim, mu = mus[i], sigma = np.cov(clusters[i], rowvar=0)))
 65 | 
 66 |                 self.priors = np.ones(ncomps, dtype="double") / np.array([len(c) for c in clusters])
 67 | 
 68 |             elif method is "kmeans":
 69 |                 # use kmeans to initialize the parameters
 70 |                 (centroids, labels) = vq.kmeans2(data, ncomps, minit="points", iter=100)
 71 |                 clusters = [[] for i in range(ncomps)]
 72 |                 for (l,d) in zip(labels,data):
 73 |                     clusters[l].append(d)
 74 | 
 75 |                 # will end up recomputing the cluster centers
 76 |                 for cluster in clusters:
 77 |                     self.comps.append(Normal(dim, data = cluster))
 78 | 
 79 |                 self.priors = np.ones(ncomps, dtype="double") / np.array([len(c) for c in clusters])
 80 | 
 81 |             else:
 82 |                 raise ValueError, "Unknown method type!"
 83 | 
 84 |         else:
 85 | 
 86 |             # these need to be defined
 87 |             assert dim and ncomps, "Need to define dim and ncomps."
 88 | 
 89 |             self.dim = dim
 90 |             self.ncomps = ncomps
 91 | 
 92 |             self.comps = []
 93 | 
 94 |             for i in range(ncomps):
 95 |                 self.comps.append(Normal(dim))
 96 | 
 97 |             self.priors = np.ones(ncomps,dtype='double') / ncomps
 98 | 
 99 |     def __str__(self):
100 |         res = "%d" % self.dim
101 |         res += "\n%s" % str(self.priors)
102 |         for comp in self.comps:
103 |             res += "\n%s" % str(comp)
104 |         return res
105 | 
106 |     def save_model(self):
107 |         pass
108 | 
109 |     def load_model(self):
110 |         pass
111 | 
112 |     def mean(self):
113 |         return np.sum([self.priors[i] * self.comps[i].mean() for i in range(self.ncomps)], axis=0)
114 | 
115 |     def covariance(self): # computed using Dan's method
116 |         m = self.mean()
117 |         s = -np.outer(m,m)
118 | 
119 |         for i in range(self.ncomps):
120 |             cm = self.comps[i].mean()
121 |             cvar = self.comps[i].covariance()
122 |             s += self.priors[i] * (np.outer(cm,cm) + cvar)
123 | 
124 |         return s
125 | 
126 |     def pdf(self, x):
127 |         responses = [comp.pdf(x) for comp in self.comps]
128 |         return np.dot(self.priors, responses)
129 | 
130 |     def condition(self, indices, x):
131 |         """
132 |         Create a new GMM conditioned on data x at indices.
133 |         """
134 |         condition_comps = []
135 |         marginal_comps = []
136 | 
137 |         for comp in self.comps:
138 |             condition_comps.append(comp.condition(indices, x))
139 |             marginal_comps.append(comp.marginalize(indices))
140 | 
141 |         new_priors = []
142 |         for (i,prior) in enumerate(self.priors):
143 |             new_priors.append(prior * marginal_comps[i].pdf(x))
144 |         new_priors = npa(new_priors) / np.sum(new_priors)
145 | 
146 |         params = {'ncomps' : self.ncomps, 'comps' : condition_comps,
147 |                   'priors' : new_priors, 'dim' : marginal_comps[0].dim}
148 | 
149 |         return GMM(params = params)
150 | 
151 |     def em(self, data, nsteps = 100):
152 | 
153 |         k = self.ncomps
154 |         d = self.dim
155 |         n = len(data)
156 | 
157 |         for l in range(nsteps):
158 | 
159 |             # E step
160 | 
161 |             responses = np.zeros((k,n))
162 | 
163 |             for j in range(n):
164 |                 for i in range(k):
165 |                     responses[i,j] = self.priors[i] * self.comps[i].pdf(data[j])
166 | 
167 |             responses = responses / np.sum(responses,axis=0) # normalize the weights
168 | 
169 |             # M step
170 | 
171 |             N = np.sum(responses,axis=1)
172 | 
173 |             for i in range(k):
174 |                 mu = np.dot(responses[i,:],data) / N[i]
175 |                 sigma = np.zeros((d,d))
176 | 
177 |                 for j in range(n):
178 |                    sigma += responses[i,j] * np.outer(data[j,:] - mu, data[j,:] - mu)
179 | 
180 |                 sigma = sigma / N[i]
181 | 
182 |                 self.comps[i].update(mu,sigma) # update the normal with new parameters
183 |                 self.priors[i] = N[i] / np.sum(N) # normalize the new priors
184 | 
185 | 
186 | def shownormal(data,gmm):
187 | 
188 |     xnorm = data[:,0]
189 |     ynorm = data[:,1]
190 | 
191 |     # Plot the normalized faithful data points.
192 |     fig = pylab.figure(num = 1, figsize=(4,4))
193 |     axes = fig.add_subplot(111)
194 |     axes.plot(xnorm,ynorm, '+')
195 | 
196 |     # Plot the ellipses representing the principle components of the normals.
197 |     for comp in gmm.comps:
198 |         comp.patch(axes)
199 | 
200 |     pylab.draw()
201 |     pylab.show()
202 | 
203 | 
204 | if __name__ == '__main__':
205 | 
206 |     """
207 |     Tests for gmm module.
208 |     """
209 | 
210 | 
211 |     # x = npr.randn(20, 2)
212 | 
213 |     # print "No data"
214 |     # gmm = GMM(2,1,2) # possibly also broken
215 |     # print gmm
216 | 
217 |     # print "Uniform"
218 |     # gmm = GMM(2,1,2,data = x, method = "uniform")
219 |     # print gmm
220 | 
221 |     # print "Random"
222 |     # gmm = GMM(2,1,2,data = x, method = "random") # broken
223 |     # print gmm
224 | 
225 |     # print "Kmeans"
226 |     # gmm = GMM(2,1,2,data = x, method = "kmeans") # possibly broken
227 |     # print gmm
228 | 
229 | 
230 |     x = np.arange(-10,30)
231 |     #y = x ** 2 + npr.randn(20)
232 |     y = x + npr.randn(40) # simple linear function
233 |     #y = np.sin(x) + npr.randn(20)
234 |     data = np.vstack([x,y]).T
235 |     print data.shape
236 | 
237 | 
238 |     gmm = GMM(dim = 2, ncomps = 4,data = data, method = "random")
239 |     print gmm
240 |     shownormal(data,gmm)
241 | 
242 |     gmm.em(data,nsteps=1000)
243 |     shownormal(data,gmm)
244 |     print gmm
245 |     ngmm = gmm.condition([0],[-3])
246 |     print ngmm.mean()
247 |     print ngmm.covariance()
248 | 


--------------------------------------------------------------------------------
/src/normal.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | """
  3 | Author: Jeremy M. Stober
  4 | Program: NORMAL.PY
  5 | Date: Friday, July 7, 2011
  6 | Description: Manipulating normal distributions.
  7 | """
  8 | 
  9 | import numpy as np
 10 | import numpy.linalg as la
 11 | import numpy.random as npr
 12 | import random as pr
 13 | npa = np.array
 14 | ix  = np.ix_ # urgh - sometimes numpy is ugly!
 15 | 
 16 | class Normal(object):
 17 |     """
 18 |     A class for storing the parameters of a multivariate normal
 19 |     distribution. Supports evaluation, sampling, conditioning and
 20 |     marginalization.
 21 |     """
 22 | 
 23 |     def __init__(self, dim, mu = None, sigma = None, data = None,
 24 |                  parent = None, cond = None, margin = None):
 25 |         """
 26 |         Initialize a normal distribution.
 27 | 
 28 |         Parameters
 29 |         ----------
 30 |         dim : int
 31 |             Number of dimensions (e.g. number of components in the mu parameter).
 32 |         mu : array, optional
 33 |             The mean of the normal distribution.
 34 |         sigma : array, optional
 35 |             The covariance matrix of the normal distribution.
 36 |         data : array, optional
 37 |             If provided, the parameters of the distribution will be estimated from the data. Rows are observations, columns are components.
 38 |         parent : Normal, optional
 39 |             A reference to a parent distribution that was marginalized or conditioned.
 40 |         cond : dict, optional
 41 |             A dict of parameters describing how the parent distribution was conditioned.
 42 |         margin : dict, optional
 43 |             A dict of parameters describing how the parent distribution was marginalized.
 44 | 
 45 |         Examples
 46 |         --------
 47 |         >>> x = Normal(2,mu = np.array([0.1,0.7]), sigma = np.array([[ 0.6,  0.4], [ 0.4,  0.6]]))
 48 |         >>> print x
 49 |         [ 0.1  0.7]
 50 |         [[ 0.6  0.4]
 51 |         [ 0.4  0.6]]
 52 | 
 53 |         To condition on a value (and index):
 54 |         
 55 |         >>> condx = x.condition([0],0.1)
 56 |         >>> print condx
 57 |         [ 0.7]
 58 |         [[ 0.33333333]]
 59 |         
 60 |         """
 61 | 
 62 |         self.dim = dim # full data dimension
 63 | 
 64 |         if not mu is None  and not sigma is None:
 65 |             pass
 66 |         elif not data is None:
 67 |             # estimate the parameters from data - rows are samples, cols are variables
 68 |             mu, sigma = self.estimate(data)
 69 |         else:
 70 |             # generate random means
 71 |             mu = npr.randn(dim)
 72 |             sigma = np.eye(dim)
 73 | 
 74 |         self.cond = cond
 75 |         self.margin = margin
 76 |         self.parent = parent
 77 | 
 78 |         self.update(npa(mu),npa(sigma))
 79 | 
 80 | 
 81 |     def update(self, mu, sigma):
 82 |         """
 83 |         Update the distribution with new parameters.
 84 | 
 85 |         Parameters
 86 |         ----------
 87 |         mu : array
 88 |             The new mean parameters.
 89 |         sigma : array
 90 |             The new covariance matrix.
 91 | 
 92 |         Example
 93 |         -------
 94 | 
 95 |         >>> x = Normal(2,mu = np.array([0.1,0.7]), sigma = np.array([[ 0.6,  0.4], [ 0.4,  0.6]]))
 96 |         >>> print x
 97 |         [ 0.1  0.7]
 98 |         [[ 0.6  0.4]
 99 |         [ 0.4  0.6]]
100 | 
101 |         >>> x.update(np.array([0.0,0.0]), x.E)
102 |         >>> print x
103 |         [ 0.0  0.0]
104 |         [[ 0.6  0.4]
105 |         [ 0.4  0.6]]
106 |         """
107 | 
108 |         self.mu = mu
109 |         self.E = sigma
110 | 
111 |         det = None
112 |         if self.dim == 1:
113 |             self.A = 1.0 / self.E
114 |             det = np.fabs(self.E[0])
115 |         else:
116 |             self.A = la.inv(self.E) # precision matrix
117 |             det = np.fabs(la.det(self.E))
118 | 
119 |         self.factor = (2.0 * np.pi)**(self.dim / 2.0) * (det)**(0.5)
120 | 
121 |     def __str__(self):
122 |         return "%s\n%s" % (str(self.mu), str(self.E))
123 | 
124 |     def mean(self):
125 |         return self.mu
126 | 
127 |     def covariance(self):
128 |         return self.E
129 | 
130 |     def pdf(self, x):
131 |         dx = x - self.mu
132 |         A = self.A
133 |         fE = self.factor
134 | 
135 |         return np.exp(-0.5 * np.dot(np.dot(dx,A),dx)) / fE
136 | 
137 |     def pdf_mesh(self, x, y):
138 |         # for 2d meshgrids
139 |         # use matplotlib.mlab.bivariate_normal -- faster (vectorized)
140 | 
141 |         z = np.zeros((len(y),len(x)))
142 |         
143 |         for (i,v) in enumerate(x):
144 |             for (j,w) in enumerate(y):
145 |                 z[j,i] = self.pdf([v,w])
146 |         
147 |         return z
148 | 
149 |     def simulate(self, ndata = 100):
150 |         """
151 |         Draw pts from the distribution.
152 |         """
153 |         return npr.multivariate_normal(self.mu, self.E, ndata)
154 | 
155 |     def estimate(self, data):
156 |         mu = np.mean(data, axis=0)
157 |         sigma = np.cov(data, rowvar=0)
158 |         return mu, sigma
159 | 
160 |     def marginalize(self, indices):
161 |         """
162 |         Creates a new marginal normal distribution for ''indices''.
163 |         """
164 |         indices = npa(indices)
165 |         return Normal(len(indices), mu = self.mu[indices], sigma = self.E[ix(indices,indices)], margin = {'indices' : indices}, parent = self)
166 | 
167 |     def condition(self, indices, x):
168 |         """
169 |         Creates a new normal distribution conditioned on the data x at indices.
170 |         """
171 | 
172 |         idim = indices
173 |         odim = npa([i for i in range(self.dim) if not i in indices])
174 | 
175 |         Aaa = self.A[ix(odim,odim)]
176 |         Aab = self.A[ix(odim,idim)]
177 |         iAaa = None
178 |         det = None
179 | 
180 |         if len(odim) == 1: # linalg does not handle d1 arrays
181 |             iAaa = 1.0 / Aaa
182 |             det = np.fabs(iAaa[0])
183 |         else:
184 |             iAaa = la.inv(Aaa)
185 |             det = np.fabs(la.det(iAaa))
186 | 
187 |         # compute the new mu
188 |         premu = np.dot(iAaa, Aab)
189 | 
190 |         mub = self.mu[idim]
191 |         mua = self.mu[odim]
192 |         new_mu = mua - np.dot(premu, (x - mub))
193 | 
194 |         new_E = iAaa
195 |         return Normal(len(odim), mu = new_mu, sigma = new_E,
196 |                       cond = {'data' : x, 'indices' : indices},
197 |                       parent = self)
198 | 
199 | 


--------------------------------------------------------------------------------
/src/plot_gmm.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | """
 3 | Author: Jeremy M. Stober
 4 | Program: PLOT_GMM.PY
 5 | Date: Thursday, November  3 2011
 6 | Description: Code for plotting GMMs
 7 | """
 8 | 
 9 | from plot_normal import draw2dnormal
10 | 
11 | def draw2dgmm(gmm, show = False, axes = None):
12 |     
13 |     for comp in gmm.comps:
14 |         draw2dnormal(comp)
15 | 
16 |     
17 | 


--------------------------------------------------------------------------------
/src/plot_normal.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | """
  3 | Author: Jeremy M. Stober
  4 | Program: PLOT_NORMAL.PY
  5 | Date: Wednesday, October 26 2011
  6 | Description: Visualization of the normal distribution.
  7 | """
  8 | 
  9 | import numpy as np
 10 | import numpy.linalg as la
 11 | import numpy.random as npr
 12 | import random as pr
 13 | import pylab as pl
 14 | import matplotlib
 15 | from matplotlib.ticker import NullFormatter
 16 | from matplotlib.widgets import Slider
 17 | import pdb
 18 | from normal import Normal
 19 | 
 20 | def draw2dnormal(norm, show = False, axes = None):
 21 |     """
 22 |     Just draw a simple 2d normal pdf.
 23 |     """
 24 |     # create a meshgrid centered at mu that takes into account the variance in x and y
 25 |     delta = 0.025
 26 | 
 27 |     lower_xlim = norm.mu[0] - (2.0 * norm.E[0,0])
 28 |     upper_xlim = norm.mu[0] + (2.0 * norm.E[0,0])
 29 |     lower_ylim = norm.mu[1] - (2.0 * norm.E[1,1])
 30 |     upper_ylim = norm.mu[1] + (2.0 * norm.E[1,1])
 31 | 
 32 |     x = np.arange(lower_xlim, upper_xlim, delta)
 33 |     y = np.arange(lower_ylim, upper_ylim, delta)
 34 | 
 35 |     X,Y = np.meshgrid(x,y)
 36 | 
 37 |     # remember sqrts!
 38 |     Z = matplotlib.mlab.bivariate_normal(X, Y, sigmax=np.sqrt(norm.E[0,0]), sigmay=np.sqrt(norm.E[1,1]), mux=norm.mu[0], muy=norm.mu[1], sigmaxy=norm.E[0,1])
 39 | 
 40 |     minlim = min(lower_xlim, lower_ylim)
 41 |     maxlim = max(upper_xlim, upper_ylim)
 42 | 
 43 |     # Plot the normalized faithful data points.
 44 |     if not axes:
 45 |         fig = pl.figure(num = 1, figsize=(4,4))
 46 |         pl.contour(X,Y,Z)
 47 |         #axes.set_xlim(minlim,maxlim)
 48 |         #axes.set_ylim(minlim,maxlim)
 49 |     else:
 50 |         axes.contour(X,Y,Z)
 51 |         #axes.set_xlim(minlim,maxlim)
 52 |         #axes.set_ylim(minlim,maxlim)
 53 | 
 54 |     if show:
 55 |         pl.show()
 56 | 
 57 | def evalpdf(norm):
 58 |     delta = 0.025
 59 |     mu = norm.mu[0]
 60 |     sigma = norm.E[0,0]
 61 |     lower_xlim = mu - (2.0 * sigma)
 62 |     upper_xlim = mu + (2.0 * sigma)
 63 |     x = np.arange(lower_xlim,upper_xlim, delta)
 64 |     y = matplotlib.mlab.normpdf(x, mu, np.sqrt(sigma))
 65 |     return x,y
 66 | 
 67 | def draw1dnormal(norm, show = False, axes = None):
 68 |     """
 69 |     Just draw a simple 1d normal pdf. Used for plotting the conditionals in simple test cases.
 70 |     """
 71 |     x,y = evalpdf(norm)
 72 |     if axes is None:
 73 |         pl.plot(x,y)
 74 |     else:
 75 |         return axes.plot(y,x)
 76 | 
 77 |     if show:
 78 |         pl.show()
 79 | 
 80 | def draw2d1dnormal(norm, cnorm, show = False):
 81 | 
 82 |     pl.figure(1, figsize=(8,8))
 83 | 
 84 |     nullfmt = NullFormatter()
 85 | 
 86 |     rect_2d = [0.1, 0.1, 0.65, 0.65]
 87 |     rect_1d = [0.1 + 0.65 + 0.02, 0.1, 0.2, 0.65]
 88 |     ax2d = pl.axes(rect_2d)
 89 |     ax1d = pl.axes(rect_1d)
 90 |     ax1d.xaxis.set_major_formatter(nullfmt)
 91 |     ax1d.yaxis.set_major_formatter(nullfmt)
 92 |     draw2dnormal(norm, axes = ax2d)
 93 |     draw1dnormal(cnorm, axes = ax1d)
 94 |     y = ax2d.get_ylim()
 95 |     x = [cnorm.cond['data'], cnorm.cond['data']]
 96 |     ax2d.plot(x,y)
 97 | 
 98 | 
 99 | def draw_slider_demo(norm):
100 | 
101 |     fig = pl.figure(1, figsize=(8,8))
102 |         
103 |     nullfmt = NullFormatter()
104 | 
105 |     cnorm = norm.condition([0],2.0)
106 | 
107 |     rect_slide = [0.1, 0.85, 0.65 + 0.1, 0.05]
108 |     rect_2d = [0.1, 0.1, 0.65, 0.65]
109 |     rect_1d = [0.1 + 0.65 + 0.02, 0.1, 0.2, 0.65]
110 |     ax2d = pl.axes(rect_2d)
111 |     ax1d = pl.axes(rect_1d)
112 |     ax1d.xaxis.set_major_formatter(nullfmt)
113 |     ax1d.yaxis.set_major_formatter(nullfmt)
114 |     axslide = pl.axes(rect_slide)
115 |     slider = Slider(axslide, 'Cond', -4.0,4.0,valinit=2.0)
116 |         
117 |     draw2dnormal(norm, axes = ax2d)
118 |     l2, = draw1dnormal(cnorm, axes = ax1d)
119 | 
120 |     y = ax2d.get_ylim()
121 |     x = [cnorm.cond['data'], cnorm.cond['data']]
122 |     l1, = ax2d.plot(x,y)
123 |     
124 |     def update(val):
125 |         cnorm = norm.condition([0],val)
126 |         x = [cnorm.cond['data'], cnorm.cond['data']]
127 |         l1.set_xdata(x)
128 |         x,y = evalpdf(cnorm)
129 |         print cnorm
130 |         #print y
131 |         l2.set_xdata(y)
132 |         l2.set_ydata(x)
133 |         pl.draw()
134 |             
135 | 
136 |     slider.on_changed(update)
137 |     
138 |     return slider
139 | 
140 | if __name__ == '__main__':
141 |     # Tests for the ConditionalNormal class...
142 |     mu = [1.5, 0.5]
143 |     sigma = [[1.0, 0.5], [0.5, 1.0]]
144 |     n = Normal(2, mu = mu, sigma = sigma)
145 |     sl = draw_slider_demo(n)
146 |     pl.show()
147 | 


--------------------------------------------------------------------------------
/src/randcov.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | """
 3 | Author: Jeremy M. Stober
 4 | Program: RANDCOV.PY
 5 | Date: Thursday, October 27 2011
 6 | Description: Generate random cov matrix in numpy.
 7 | """
 8 | 
 9 | from numpy import *
10 | from numpy.linalg import *
11 | from numpy.random import *
12 | 
13 | 
14 | def gencov(n):
15 |     S = randn(n,n)
16 |     S = dot(S.transpose(), S)
17 |     s = sqrt(diag(S))
18 |     t = diag(1.0/s)
19 |     C = dot(dot(t,S),t)
20 |     return C
21 | 
22 | if __name__ == '__main__':
23 |     print gencov(2)
24 | 


--------------------------------------------------------------------------------
/src/test_func.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | """
 3 | Author: Jeremy M. Stober
 4 | Program: TEST_FUNC.PY
 5 | Date: Monday, November  7 2011
 6 | Description: 2-d noisy function for testing GMM/GMR.
 7 | """
 8 | 
 9 | import numpy as np
10 | import pylab as pl
11 | import numpy.random as npr
12 | 
13 | 
14 | def noisy_cosine():
15 |     x = npr.rand(100) * np.pi * 2.0
16 |     x.sort()
17 |     y = np.cos(x) + 0.1 * npr.randn(100)
18 |     return x,y
19 | 
20 | if __name__ == '__main__':
21 |     #pl.plot(*noisy_cosine())
22 |     x,y = noisy_cosine()
23 |     pl.scatter(x,y)
24 |     pl.show()
25 |     
26 | 


--------------------------------------------------------------------------------
/src/test_gmm.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | """
 3 | Author: Jeremy M. Stober
 4 | Program: TEST_GMM.PY
 5 | Date: Thursday, November  3 2011
 6 | Description: Testing code for gmm with new normal distribution.
 7 | """
 8 | 
 9 | import numpy as np
10 | npa = np.array
11 | import pylab as pl
12 | from normal import Normal
13 | from gmm import GMM
14 | from plot_normal import draw2dnormal
15 | from plot_gmm import draw2dgmm
16 | 
17 | if False:
18 |     fp = open("../data/faithful.txt")
19 |     data = []
20 |     for line in fp.readlines():
21 |         x,y = line.split()
22 |         data.append([float(x),float(y)])
23 | 
24 |     data = npa(data)
25 |     pl.scatter(data[:,0],data[:,1])
26 |     gmm = GMM(dim = 2, ncomps = 2, data = data, method = "kmeans")
27 | 
28 |     #x = Normal(2, data=data)
29 |     #draw2dnormal(x,show=True,axes=pl.gca())
30 |     print gmm
31 |     draw2dgmm(gmm)
32 |     pl.show()
33 | 
34 | if False:
35 | 
36 |     from test_func import noisy_cosine
37 | 
38 |     x,y = noisy_cosine()
39 |     data = np.vstack([x,y]).transpose()
40 |     pl.scatter(data[:,0],data[:,1])
41 | 
42 |     gmm = GMM(dim = 2, ncomps = 2, data = data, method = "kmeans")
43 | 
44 |     draw2dgmm(gmm)
45 |     pl.show()
46 |     #print data
47 |     
48 | 
49 | if True:
50 | 
51 |     from test_func import noisy_cosine
52 | 
53 |     x,y = noisy_cosine()
54 |     data = np.vstack([x,y]).transpose()
55 |     pl.scatter(data[:,0],data[:,1])
56 | 
57 |     gmm = GMM(dim = 2, ncomps = 2, data = data, method = "kmeans")
58 | 
59 |     draw2dgmm(gmm)
60 | 
61 |     #pl.show()
62 | 
63 |     nx = np.arange(0,2 * np.pi, 0.1)
64 |     ny = []
65 |     for i in nx:
66 |         ngmm = gmm.condition([0],[i])
67 |         ny.append(ngmm.mean()) 
68 | 
69 |     #ngmm = gmm.condition([0],[0.5])
70 |     #print ngmm.mean()
71 |     #print np.cos(0.5)
72 |     pl.plot(nx,ny,color='red')
73 |     pl.show()
74 |     #print data
75 | 


--------------------------------------------------------------------------------
/src/test_normal.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | """
 3 | Author: Jeremy M. Stober
 4 | Program: TEST_NORMAL.PY
 5 | Date: Wednesday, October 26 2011
 6 | Description: Fit normal to old faithful data.
 7 | """
 8 | 
 9 | 
10 | from normal import Normal
11 | from plot_normal import draw2dnormal
12 | import pylab as pl
13 | import numpy as np
14 | npa = np.array
15 | import pdb
16 | 
17 | 
18 | if False:
19 |     fp = open("faithful.txt")
20 |     data = []
21 |     for line in fp.readlines():
22 |         x,y = line.split()
23 |         data.append([float(x),float(y)])
24 | 
25 |     data = npa(data)
26 |     pl.scatter(data[:,0],data[:,1])
27 |     x = Normal(2, data=data)
28 |     draw2dnormal(x,show=True,axes=pl.gca())
29 | 
30 | if True:
31 |     x = Normal(2,mu = np.array([0.1,0.7]), sigma = np.array([[ 0.6,  0.4], [ 0.4,  0.6]]))
32 |     s = x.simulate()
33 |     draw2dnormal(x)
34 |     pl.scatter(s[:,0],s[:,1])
35 |     pl.show()
36 |     print s
37 | 
38 | if False:
39 |     x = Normal(2,mu = np.array([0.1,0.7]), sigma = np.array([[ 0.6,  0.4], [ 0.4,  0.6]]))
40 |     #draw2dnormal(x,show=True)
41 |     print x
42 |     new = x.condition([0],0.1)
43 |     print new
44 | 
45 | if False:
46 | 
47 |     from randcov import gencov
48 |     import numpy.random as npr
49 |     import numpy.linalg as la
50 | 
51 |     S = gencov(5)
52 |     mu = npr.randn(5)
53 | 
54 |     x = Normal(5,mu = mu, sigma = S)
55 |     newx = x.condition([0,1],np.array([0.1,0.3]))
56 |     print newx
57 | 
58 |     A = la.inv(S)
59 |     newS = la.inv(A[2:,2:])
60 |     newmu = mu[2:] - np.dot(np.dot(newS, A[2:,:2]), (np.array([0.1,0.3])- mu[:2]))
61 | 
62 |     print newmu
63 |     print newS # should match above
64 | 
65 |     
66 |     
67 | 


--------------------------------------------------------------------------------