├── README ├── data └── faithful.txt ├── setup.py └── src ├── __init__.py ├── gmm.py ├── normal.py ├── plot_gmm.py ├── plot_normal.py ├── randcov.py ├── test_func.py ├── test_gmm.py └── test_normal.py /README: -------------------------------------------------------------------------------- 1 | Gaussian Mixture Models in Python 2 | 3 | Author: Jeremy Stober 4 | Contact: stober@gmail.com 5 | Version: 0.01 6 | 7 | This is a standalone Pythonic implementation of Gaussian Mixture 8 | Models. Various initialization strategies are included along with a 9 | standard EM algorithm for determining the model parameters based on 10 | data. 11 | 12 | Example code for the GMM and Normal classes can be found in the 13 | src/test_*.py files. The GMM and the underlying Normal class both 14 | support conditioning on data and marginalization for any subset of the 15 | variables. This makes this implementation ideal for experimenting with 16 | Gaussian Mixture Regression. For example, the following code learns 17 | the cosine function: 18 | 19 | 20 | import numpy as np 21 | from gmm import GMM 22 | from plot_gmm import draw2dgmm 23 | from test_func import noisy_cosine 24 | import pylab as pl 25 | 26 | x,y = noisy_cosine() 27 | data = np.vstack([x,y]).transpose() 28 | pl.scatter(data[:,0],data[:,1]) 29 | 30 | gmm = GMM(dim = 2, ncomps = 2, data = data, method = "kmeans") 31 | draw2dgmm(gmm) 32 | 33 | nx = np.arange(0,2 * np.pi, 0.1) 34 | ny = [] 35 | for i in nx: 36 | ngmm = gmm.condition([0],[i]) 37 | ny.append(ngmm.mean()) 38 | 39 | pl.plot(nx,ny,color='red') 40 | pl.show() 41 | 42 | 43 | -------------------------------------------------------------------------------- /data/faithful.txt: -------------------------------------------------------------------------------- 1 | 3.600000 79.000000 2 | 1.800000 54.000000 3 | 3.333000 74.000000 4 | 2.283000 62.000000 5 | 4.533000 85.000000 6 | 2.883000 55.000000 7 | 4.700000 88.000000 8 | 3.600000 85.000000 9 | 1.950000 51.000000 10 | 4.350000 85.000000 11 | 1.833000 54.000000 12 | 3.917000 84.000000 13 | 4.200000 78.000000 14 | 1.750000 47.000000 15 | 4.700000 83.000000 16 | 2.167000 52.000000 17 | 1.750000 62.000000 18 | 4.800000 84.000000 19 | 1.600000 52.000000 20 | 4.250000 79.000000 21 | 1.800000 51.000000 22 | 1.750000 47.000000 23 | 3.450000 78.000000 24 | 3.067000 69.000000 25 | 4.533000 74.000000 26 | 3.600000 83.000000 27 | 1.967000 55.000000 28 | 4.083000 76.000000 29 | 3.850000 78.000000 30 | 4.433000 79.000000 31 | 4.300000 73.000000 32 | 4.467000 77.000000 33 | 3.367000 66.000000 34 | 4.033000 80.000000 35 | 3.833000 74.000000 36 | 2.017000 52.000000 37 | 1.867000 48.000000 38 | 4.833000 80.000000 39 | 1.833000 59.000000 40 | 4.783000 90.000000 41 | 4.350000 80.000000 42 | 1.883000 58.000000 43 | 4.567000 84.000000 44 | 1.750000 58.000000 45 | 4.533000 73.000000 46 | 3.317000 83.000000 47 | 3.833000 64.000000 48 | 2.100000 53.000000 49 | 4.633000 82.000000 50 | 2.000000 59.000000 51 | 4.800000 75.000000 52 | 4.716000 90.000000 53 | 1.833000 54.000000 54 | 4.833000 80.000000 55 | 1.733000 54.000000 56 | 4.883000 83.000000 57 | 3.717000 71.000000 58 | 1.667000 64.000000 59 | 4.567000 77.000000 60 | 4.317000 81.000000 61 | 2.233000 59.000000 62 | 4.500000 84.000000 63 | 1.750000 48.000000 64 | 4.800000 82.000000 65 | 1.817000 60.000000 66 | 4.400000 92.000000 67 | 4.167000 78.000000 68 | 4.700000 78.000000 69 | 2.067000 65.000000 70 | 4.700000 73.000000 71 | 4.033000 82.000000 72 | 1.967000 56.000000 73 | 4.500000 79.000000 74 | 4.000000 71.000000 75 | 1.983000 62.000000 76 | 5.067000 76.000000 77 | 2.017000 60.000000 78 | 4.567000 78.000000 79 | 3.883000 76.000000 80 | 3.600000 83.000000 81 | 4.133000 75.000000 82 | 4.333000 82.000000 83 | 4.100000 70.000000 84 | 2.633000 65.000000 85 | 4.067000 73.000000 86 | 4.933000 88.000000 87 | 3.950000 76.000000 88 | 4.517000 80.000000 89 | 2.167000 48.000000 90 | 4.000000 86.000000 91 | 2.200000 60.000000 92 | 4.333000 90.000000 93 | 1.867000 50.000000 94 | 4.817000 78.000000 95 | 1.833000 63.000000 96 | 4.300000 72.000000 97 | 4.667000 84.000000 98 | 3.750000 75.000000 99 | 1.867000 51.000000 100 | 4.900000 82.000000 101 | 2.483000 62.000000 102 | 4.367000 88.000000 103 | 2.100000 49.000000 104 | 4.500000 83.000000 105 | 4.050000 81.000000 106 | 1.867000 47.000000 107 | 4.700000 84.000000 108 | 1.783000 52.000000 109 | 4.850000 86.000000 110 | 3.683000 81.000000 111 | 4.733000 75.000000 112 | 2.300000 59.000000 113 | 4.900000 89.000000 114 | 4.417000 79.000000 115 | 1.700000 59.000000 116 | 4.633000 81.000000 117 | 2.317000 50.000000 118 | 4.600000 85.000000 119 | 1.817000 59.000000 120 | 4.417000 87.000000 121 | 2.617000 53.000000 122 | 4.067000 69.000000 123 | 4.250000 77.000000 124 | 1.967000 56.000000 125 | 4.600000 88.000000 126 | 3.767000 81.000000 127 | 1.917000 45.000000 128 | 4.500000 82.000000 129 | 2.267000 55.000000 130 | 4.650000 90.000000 131 | 1.867000 45.000000 132 | 4.167000 83.000000 133 | 2.800000 56.000000 134 | 4.333000 89.000000 135 | 1.833000 46.000000 136 | 4.383000 82.000000 137 | 1.883000 51.000000 138 | 4.933000 86.000000 139 | 2.033000 53.000000 140 | 3.733000 79.000000 141 | 4.233000 81.000000 142 | 2.233000 60.000000 143 | 4.533000 82.000000 144 | 4.817000 77.000000 145 | 4.333000 76.000000 146 | 1.983000 59.000000 147 | 4.633000 80.000000 148 | 2.017000 49.000000 149 | 5.100000 96.000000 150 | 1.800000 53.000000 151 | 5.033000 77.000000 152 | 4.000000 77.000000 153 | 2.400000 65.000000 154 | 4.600000 81.000000 155 | 3.567000 71.000000 156 | 4.000000 70.000000 157 | 4.500000 81.000000 158 | 4.083000 93.000000 159 | 1.800000 53.000000 160 | 3.967000 89.000000 161 | 2.200000 45.000000 162 | 4.150000 86.000000 163 | 2.000000 58.000000 164 | 3.833000 78.000000 165 | 3.500000 66.000000 166 | 4.583000 76.000000 167 | 2.367000 63.000000 168 | 5.000000 88.000000 169 | 1.933000 52.000000 170 | 4.617000 93.000000 171 | 1.917000 49.000000 172 | 2.083000 57.000000 173 | 4.583000 77.000000 174 | 3.333000 68.000000 175 | 4.167000 81.000000 176 | 4.333000 81.000000 177 | 4.500000 73.000000 178 | 2.417000 50.000000 179 | 4.000000 85.000000 180 | 4.167000 74.000000 181 | 1.883000 55.000000 182 | 4.583000 77.000000 183 | 4.250000 83.000000 184 | 3.767000 83.000000 185 | 2.033000 51.000000 186 | 4.433000 78.000000 187 | 4.083000 84.000000 188 | 1.833000 46.000000 189 | 4.417000 83.000000 190 | 2.183000 55.000000 191 | 4.800000 81.000000 192 | 1.833000 57.000000 193 | 4.800000 76.000000 194 | 4.100000 84.000000 195 | 3.966000 77.000000 196 | 4.233000 81.000000 197 | 3.500000 87.000000 198 | 4.366000 77.000000 199 | 2.250000 51.000000 200 | 4.667000 78.000000 201 | 2.100000 60.000000 202 | 4.350000 82.000000 203 | 4.133000 91.000000 204 | 1.867000 53.000000 205 | 4.600000 78.000000 206 | 1.783000 46.000000 207 | 4.367000 77.000000 208 | 3.850000 84.000000 209 | 1.933000 49.000000 210 | 4.500000 83.000000 211 | 2.383000 71.000000 212 | 4.700000 80.000000 213 | 1.867000 49.000000 214 | 3.833000 75.000000 215 | 3.417000 64.000000 216 | 4.233000 76.000000 217 | 2.400000 53.000000 218 | 4.800000 94.000000 219 | 2.000000 55.000000 220 | 4.150000 76.000000 221 | 1.867000 50.000000 222 | 4.267000 82.000000 223 | 1.750000 54.000000 224 | 4.483000 75.000000 225 | 4.000000 78.000000 226 | 4.117000 79.000000 227 | 4.083000 78.000000 228 | 4.267000 78.000000 229 | 3.917000 70.000000 230 | 4.550000 79.000000 231 | 4.083000 70.000000 232 | 2.417000 54.000000 233 | 4.183000 86.000000 234 | 2.217000 50.000000 235 | 4.450000 90.000000 236 | 1.883000 54.000000 237 | 1.850000 54.000000 238 | 4.283000 77.000000 239 | 3.950000 79.000000 240 | 2.333000 64.000000 241 | 4.150000 75.000000 242 | 2.350000 47.000000 243 | 4.933000 86.000000 244 | 2.900000 63.000000 245 | 4.583000 85.000000 246 | 3.833000 82.000000 247 | 2.083000 57.000000 248 | 4.367000 82.000000 249 | 2.133000 67.000000 250 | 4.350000 74.000000 251 | 2.200000 54.000000 252 | 4.450000 83.000000 253 | 3.567000 73.000000 254 | 4.500000 73.000000 255 | 4.150000 88.000000 256 | 3.817000 80.000000 257 | 3.917000 71.000000 258 | 4.450000 83.000000 259 | 2.000000 56.000000 260 | 4.283000 79.000000 261 | 4.767000 78.000000 262 | 4.533000 84.000000 263 | 1.850000 58.000000 264 | 4.250000 83.000000 265 | 1.983000 43.000000 266 | 2.250000 60.000000 267 | 4.750000 75.000000 268 | 4.117000 81.000000 269 | 2.150000 46.000000 270 | 4.417000 90.000000 271 | 1.817000 46.000000 272 | 4.467000 74.000000 273 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | @author: stober 4 | """ 5 | 6 | 7 | from distutils.core import setup 8 | 9 | setup(name='gmm', 10 | version='0.1', 11 | description='Gaussian Mixture Models', 12 | author='Jeremy Stober', 13 | author_email='stober@gmail.com', 14 | package_dir={'gmm':'src'}, 15 | packages=['gmm'], 16 | ) 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | Author: Jeremy M. Stober 4 | Program: __INIT__.PY 5 | Date: Wednesday, May 23 2012 6 | """ 7 | 8 | 9 | from normal import Normal 10 | from gmm import GMM, shownormal 11 | 12 | -------------------------------------------------------------------------------- /src/gmm.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | Author: Jeremy M. Stober 4 | Program: GMM.PY 5 | Date: Friday, June 24 2011/Volumes/NO NAME/seds/nodes/gmm.py 6 | Description: A python class for creating and manipulating GMMs. 7 | """ 8 | 9 | import scipy.cluster.vq as vq 10 | import numpy as np 11 | import numpy.linalg as la 12 | import numpy.random as npr 13 | import random as pr 14 | npa = np.array 15 | 16 | import sys; sys.path.append('.') 17 | import pdb 18 | 19 | #import matplotlib 20 | import pylab 21 | from normal import Normal 22 | 23 | class GMM(object): 24 | 25 | def __init__(self, dim = None, ncomps = None, data = None, method = None, filename = None, params = None): 26 | 27 | if not filename is None: # load from file 28 | self.load_model(filename) 29 | 30 | elif not params is None: # initialize with parameters directly 31 | self.comps = params['comps'] 32 | self.ncomps = params['ncomps'] 33 | self.dim = params['dim'] 34 | self.priors = params['priors'] 35 | 36 | elif not data is None: # initialize from data 37 | 38 | assert dim and ncomps, "Need to define dim and ncomps." 39 | 40 | self.dim = dim 41 | self.ncomps = ncomps 42 | self.comps = [] 43 | 44 | if method is "uniform": 45 | # uniformly assign data points to components then estimate the parameters 46 | npr.shuffle(data) 47 | n = len(data) 48 | s = n / ncomps 49 | for i in range(ncomps): 50 | self.comps.append(Normal(dim, data = data[i * s: (i+1) * s])) 51 | 52 | self.priors = np.ones(ncomps, dtype = "double") / ncomps 53 | 54 | elif method is "random": 55 | # choose ncomp points from data randomly then estimate the parameters 56 | mus = pr.sample(data,ncomps) 57 | clusters = [[] for i in range(ncomps)] 58 | for d in data: 59 | i = np.argmin([la.norm(d - m) for m in mus]) 60 | clusters[i].append(d) 61 | 62 | for i in range(ncomps): 63 | print mus[i], clusters[i] 64 | self.comps.append(Normal(dim, mu = mus[i], sigma = np.cov(clusters[i], rowvar=0))) 65 | 66 | self.priors = np.ones(ncomps, dtype="double") / np.array([len(c) for c in clusters]) 67 | 68 | elif method is "kmeans": 69 | # use kmeans to initialize the parameters 70 | (centroids, labels) = vq.kmeans2(data, ncomps, minit="points", iter=100) 71 | clusters = [[] for i in range(ncomps)] 72 | for (l,d) in zip(labels,data): 73 | clusters[l].append(d) 74 | 75 | # will end up recomputing the cluster centers 76 | for cluster in clusters: 77 | self.comps.append(Normal(dim, data = cluster)) 78 | 79 | self.priors = np.ones(ncomps, dtype="double") / np.array([len(c) for c in clusters]) 80 | 81 | else: 82 | raise ValueError, "Unknown method type!" 83 | 84 | else: 85 | 86 | # these need to be defined 87 | assert dim and ncomps, "Need to define dim and ncomps." 88 | 89 | self.dim = dim 90 | self.ncomps = ncomps 91 | 92 | self.comps = [] 93 | 94 | for i in range(ncomps): 95 | self.comps.append(Normal(dim)) 96 | 97 | self.priors = np.ones(ncomps,dtype='double') / ncomps 98 | 99 | def __str__(self): 100 | res = "%d" % self.dim 101 | res += "\n%s" % str(self.priors) 102 | for comp in self.comps: 103 | res += "\n%s" % str(comp) 104 | return res 105 | 106 | def save_model(self): 107 | pass 108 | 109 | def load_model(self): 110 | pass 111 | 112 | def mean(self): 113 | return np.sum([self.priors[i] * self.comps[i].mean() for i in range(self.ncomps)], axis=0) 114 | 115 | def covariance(self): # computed using Dan's method 116 | m = self.mean() 117 | s = -np.outer(m,m) 118 | 119 | for i in range(self.ncomps): 120 | cm = self.comps[i].mean() 121 | cvar = self.comps[i].covariance() 122 | s += self.priors[i] * (np.outer(cm,cm) + cvar) 123 | 124 | return s 125 | 126 | def pdf(self, x): 127 | responses = [comp.pdf(x) for comp in self.comps] 128 | return np.dot(self.priors, responses) 129 | 130 | def condition(self, indices, x): 131 | """ 132 | Create a new GMM conditioned on data x at indices. 133 | """ 134 | condition_comps = [] 135 | marginal_comps = [] 136 | 137 | for comp in self.comps: 138 | condition_comps.append(comp.condition(indices, x)) 139 | marginal_comps.append(comp.marginalize(indices)) 140 | 141 | new_priors = [] 142 | for (i,prior) in enumerate(self.priors): 143 | new_priors.append(prior * marginal_comps[i].pdf(x)) 144 | new_priors = npa(new_priors) / np.sum(new_priors) 145 | 146 | params = {'ncomps' : self.ncomps, 'comps' : condition_comps, 147 | 'priors' : new_priors, 'dim' : marginal_comps[0].dim} 148 | 149 | return GMM(params = params) 150 | 151 | def em(self, data, nsteps = 100): 152 | 153 | k = self.ncomps 154 | d = self.dim 155 | n = len(data) 156 | 157 | for l in range(nsteps): 158 | 159 | # E step 160 | 161 | responses = np.zeros((k,n)) 162 | 163 | for j in range(n): 164 | for i in range(k): 165 | responses[i,j] = self.priors[i] * self.comps[i].pdf(data[j]) 166 | 167 | responses = responses / np.sum(responses,axis=0) # normalize the weights 168 | 169 | # M step 170 | 171 | N = np.sum(responses,axis=1) 172 | 173 | for i in range(k): 174 | mu = np.dot(responses[i,:],data) / N[i] 175 | sigma = np.zeros((d,d)) 176 | 177 | for j in range(n): 178 | sigma += responses[i,j] * np.outer(data[j,:] - mu, data[j,:] - mu) 179 | 180 | sigma = sigma / N[i] 181 | 182 | self.comps[i].update(mu,sigma) # update the normal with new parameters 183 | self.priors[i] = N[i] / np.sum(N) # normalize the new priors 184 | 185 | 186 | def shownormal(data,gmm): 187 | 188 | xnorm = data[:,0] 189 | ynorm = data[:,1] 190 | 191 | # Plot the normalized faithful data points. 192 | fig = pylab.figure(num = 1, figsize=(4,4)) 193 | axes = fig.add_subplot(111) 194 | axes.plot(xnorm,ynorm, '+') 195 | 196 | # Plot the ellipses representing the principle components of the normals. 197 | for comp in gmm.comps: 198 | comp.patch(axes) 199 | 200 | pylab.draw() 201 | pylab.show() 202 | 203 | 204 | if __name__ == '__main__': 205 | 206 | """ 207 | Tests for gmm module. 208 | """ 209 | 210 | 211 | # x = npr.randn(20, 2) 212 | 213 | # print "No data" 214 | # gmm = GMM(2,1,2) # possibly also broken 215 | # print gmm 216 | 217 | # print "Uniform" 218 | # gmm = GMM(2,1,2,data = x, method = "uniform") 219 | # print gmm 220 | 221 | # print "Random" 222 | # gmm = GMM(2,1,2,data = x, method = "random") # broken 223 | # print gmm 224 | 225 | # print "Kmeans" 226 | # gmm = GMM(2,1,2,data = x, method = "kmeans") # possibly broken 227 | # print gmm 228 | 229 | 230 | x = np.arange(-10,30) 231 | #y = x ** 2 + npr.randn(20) 232 | y = x + npr.randn(40) # simple linear function 233 | #y = np.sin(x) + npr.randn(20) 234 | data = np.vstack([x,y]).T 235 | print data.shape 236 | 237 | 238 | gmm = GMM(dim = 2, ncomps = 4,data = data, method = "random") 239 | print gmm 240 | shownormal(data,gmm) 241 | 242 | gmm.em(data,nsteps=1000) 243 | shownormal(data,gmm) 244 | print gmm 245 | ngmm = gmm.condition([0],[-3]) 246 | print ngmm.mean() 247 | print ngmm.covariance() 248 | -------------------------------------------------------------------------------- /src/normal.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | Author: Jeremy M. Stober 4 | Program: NORMAL.PY 5 | Date: Friday, July 7, 2011 6 | Description: Manipulating normal distributions. 7 | """ 8 | 9 | import numpy as np 10 | import numpy.linalg as la 11 | import numpy.random as npr 12 | import random as pr 13 | npa = np.array 14 | ix = np.ix_ # urgh - sometimes numpy is ugly! 15 | 16 | class Normal(object): 17 | """ 18 | A class for storing the parameters of a multivariate normal 19 | distribution. Supports evaluation, sampling, conditioning and 20 | marginalization. 21 | """ 22 | 23 | def __init__(self, dim, mu = None, sigma = None, data = None, 24 | parent = None, cond = None, margin = None): 25 | """ 26 | Initialize a normal distribution. 27 | 28 | Parameters 29 | ---------- 30 | dim : int 31 | Number of dimensions (e.g. number of components in the mu parameter). 32 | mu : array, optional 33 | The mean of the normal distribution. 34 | sigma : array, optional 35 | The covariance matrix of the normal distribution. 36 | data : array, optional 37 | If provided, the parameters of the distribution will be estimated from the data. Rows are observations, columns are components. 38 | parent : Normal, optional 39 | A reference to a parent distribution that was marginalized or conditioned. 40 | cond : dict, optional 41 | A dict of parameters describing how the parent distribution was conditioned. 42 | margin : dict, optional 43 | A dict of parameters describing how the parent distribution was marginalized. 44 | 45 | Examples 46 | -------- 47 | >>> x = Normal(2,mu = np.array([0.1,0.7]), sigma = np.array([[ 0.6, 0.4], [ 0.4, 0.6]])) 48 | >>> print x 49 | [ 0.1 0.7] 50 | [[ 0.6 0.4] 51 | [ 0.4 0.6]] 52 | 53 | To condition on a value (and index): 54 | 55 | >>> condx = x.condition([0],0.1) 56 | >>> print condx 57 | [ 0.7] 58 | [[ 0.33333333]] 59 | 60 | """ 61 | 62 | self.dim = dim # full data dimension 63 | 64 | if not mu is None and not sigma is None: 65 | pass 66 | elif not data is None: 67 | # estimate the parameters from data - rows are samples, cols are variables 68 | mu, sigma = self.estimate(data) 69 | else: 70 | # generate random means 71 | mu = npr.randn(dim) 72 | sigma = np.eye(dim) 73 | 74 | self.cond = cond 75 | self.margin = margin 76 | self.parent = parent 77 | 78 | self.update(npa(mu),npa(sigma)) 79 | 80 | 81 | def update(self, mu, sigma): 82 | """ 83 | Update the distribution with new parameters. 84 | 85 | Parameters 86 | ---------- 87 | mu : array 88 | The new mean parameters. 89 | sigma : array 90 | The new covariance matrix. 91 | 92 | Example 93 | ------- 94 | 95 | >>> x = Normal(2,mu = np.array([0.1,0.7]), sigma = np.array([[ 0.6, 0.4], [ 0.4, 0.6]])) 96 | >>> print x 97 | [ 0.1 0.7] 98 | [[ 0.6 0.4] 99 | [ 0.4 0.6]] 100 | 101 | >>> x.update(np.array([0.0,0.0]), x.E) 102 | >>> print x 103 | [ 0.0 0.0] 104 | [[ 0.6 0.4] 105 | [ 0.4 0.6]] 106 | """ 107 | 108 | self.mu = mu 109 | self.E = sigma 110 | 111 | det = None 112 | if self.dim == 1: 113 | self.A = 1.0 / self.E 114 | det = np.fabs(self.E[0]) 115 | else: 116 | self.A = la.inv(self.E) # precision matrix 117 | det = np.fabs(la.det(self.E)) 118 | 119 | self.factor = (2.0 * np.pi)**(self.dim / 2.0) * (det)**(0.5) 120 | 121 | def __str__(self): 122 | return "%s\n%s" % (str(self.mu), str(self.E)) 123 | 124 | def mean(self): 125 | return self.mu 126 | 127 | def covariance(self): 128 | return self.E 129 | 130 | def pdf(self, x): 131 | dx = x - self.mu 132 | A = self.A 133 | fE = self.factor 134 | 135 | return np.exp(-0.5 * np.dot(np.dot(dx,A),dx)) / fE 136 | 137 | def pdf_mesh(self, x, y): 138 | # for 2d meshgrids 139 | # use matplotlib.mlab.bivariate_normal -- faster (vectorized) 140 | 141 | z = np.zeros((len(y),len(x))) 142 | 143 | for (i,v) in enumerate(x): 144 | for (j,w) in enumerate(y): 145 | z[j,i] = self.pdf([v,w]) 146 | 147 | return z 148 | 149 | def simulate(self, ndata = 100): 150 | """ 151 | Draw pts from the distribution. 152 | """ 153 | return npr.multivariate_normal(self.mu, self.E, ndata) 154 | 155 | def estimate(self, data): 156 | mu = np.mean(data, axis=0) 157 | sigma = np.cov(data, rowvar=0) 158 | return mu, sigma 159 | 160 | def marginalize(self, indices): 161 | """ 162 | Creates a new marginal normal distribution for ''indices''. 163 | """ 164 | indices = npa(indices) 165 | return Normal(len(indices), mu = self.mu[indices], sigma = self.E[ix(indices,indices)], margin = {'indices' : indices}, parent = self) 166 | 167 | def condition(self, indices, x): 168 | """ 169 | Creates a new normal distribution conditioned on the data x at indices. 170 | """ 171 | 172 | idim = indices 173 | odim = npa([i for i in range(self.dim) if not i in indices]) 174 | 175 | Aaa = self.A[ix(odim,odim)] 176 | Aab = self.A[ix(odim,idim)] 177 | iAaa = None 178 | det = None 179 | 180 | if len(odim) == 1: # linalg does not handle d1 arrays 181 | iAaa = 1.0 / Aaa 182 | det = np.fabs(iAaa[0]) 183 | else: 184 | iAaa = la.inv(Aaa) 185 | det = np.fabs(la.det(iAaa)) 186 | 187 | # compute the new mu 188 | premu = np.dot(iAaa, Aab) 189 | 190 | mub = self.mu[idim] 191 | mua = self.mu[odim] 192 | new_mu = mua - np.dot(premu, (x - mub)) 193 | 194 | new_E = iAaa 195 | return Normal(len(odim), mu = new_mu, sigma = new_E, 196 | cond = {'data' : x, 'indices' : indices}, 197 | parent = self) 198 | 199 | -------------------------------------------------------------------------------- /src/plot_gmm.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | Author: Jeremy M. Stober 4 | Program: PLOT_GMM.PY 5 | Date: Thursday, November 3 2011 6 | Description: Code for plotting GMMs 7 | """ 8 | 9 | from plot_normal import draw2dnormal 10 | 11 | def draw2dgmm(gmm, show = False, axes = None): 12 | 13 | for comp in gmm.comps: 14 | draw2dnormal(comp) 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/plot_normal.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | Author: Jeremy M. Stober 4 | Program: PLOT_NORMAL.PY 5 | Date: Wednesday, October 26 2011 6 | Description: Visualization of the normal distribution. 7 | """ 8 | 9 | import numpy as np 10 | import numpy.linalg as la 11 | import numpy.random as npr 12 | import random as pr 13 | import pylab as pl 14 | import matplotlib 15 | from matplotlib.ticker import NullFormatter 16 | from matplotlib.widgets import Slider 17 | import pdb 18 | from normal import Normal 19 | 20 | def draw2dnormal(norm, show = False, axes = None): 21 | """ 22 | Just draw a simple 2d normal pdf. 23 | """ 24 | # create a meshgrid centered at mu that takes into account the variance in x and y 25 | delta = 0.025 26 | 27 | lower_xlim = norm.mu[0] - (2.0 * norm.E[0,0]) 28 | upper_xlim = norm.mu[0] + (2.0 * norm.E[0,0]) 29 | lower_ylim = norm.mu[1] - (2.0 * norm.E[1,1]) 30 | upper_ylim = norm.mu[1] + (2.0 * norm.E[1,1]) 31 | 32 | x = np.arange(lower_xlim, upper_xlim, delta) 33 | y = np.arange(lower_ylim, upper_ylim, delta) 34 | 35 | X,Y = np.meshgrid(x,y) 36 | 37 | # remember sqrts! 38 | Z = matplotlib.mlab.bivariate_normal(X, Y, sigmax=np.sqrt(norm.E[0,0]), sigmay=np.sqrt(norm.E[1,1]), mux=norm.mu[0], muy=norm.mu[1], sigmaxy=norm.E[0,1]) 39 | 40 | minlim = min(lower_xlim, lower_ylim) 41 | maxlim = max(upper_xlim, upper_ylim) 42 | 43 | # Plot the normalized faithful data points. 44 | if not axes: 45 | fig = pl.figure(num = 1, figsize=(4,4)) 46 | pl.contour(X,Y,Z) 47 | #axes.set_xlim(minlim,maxlim) 48 | #axes.set_ylim(minlim,maxlim) 49 | else: 50 | axes.contour(X,Y,Z) 51 | #axes.set_xlim(minlim,maxlim) 52 | #axes.set_ylim(minlim,maxlim) 53 | 54 | if show: 55 | pl.show() 56 | 57 | def evalpdf(norm): 58 | delta = 0.025 59 | mu = norm.mu[0] 60 | sigma = norm.E[0,0] 61 | lower_xlim = mu - (2.0 * sigma) 62 | upper_xlim = mu + (2.0 * sigma) 63 | x = np.arange(lower_xlim,upper_xlim, delta) 64 | y = matplotlib.mlab.normpdf(x, mu, np.sqrt(sigma)) 65 | return x,y 66 | 67 | def draw1dnormal(norm, show = False, axes = None): 68 | """ 69 | Just draw a simple 1d normal pdf. Used for plotting the conditionals in simple test cases. 70 | """ 71 | x,y = evalpdf(norm) 72 | if axes is None: 73 | pl.plot(x,y) 74 | else: 75 | return axes.plot(y,x) 76 | 77 | if show: 78 | pl.show() 79 | 80 | def draw2d1dnormal(norm, cnorm, show = False): 81 | 82 | pl.figure(1, figsize=(8,8)) 83 | 84 | nullfmt = NullFormatter() 85 | 86 | rect_2d = [0.1, 0.1, 0.65, 0.65] 87 | rect_1d = [0.1 + 0.65 + 0.02, 0.1, 0.2, 0.65] 88 | ax2d = pl.axes(rect_2d) 89 | ax1d = pl.axes(rect_1d) 90 | ax1d.xaxis.set_major_formatter(nullfmt) 91 | ax1d.yaxis.set_major_formatter(nullfmt) 92 | draw2dnormal(norm, axes = ax2d) 93 | draw1dnormal(cnorm, axes = ax1d) 94 | y = ax2d.get_ylim() 95 | x = [cnorm.cond['data'], cnorm.cond['data']] 96 | ax2d.plot(x,y) 97 | 98 | 99 | def draw_slider_demo(norm): 100 | 101 | fig = pl.figure(1, figsize=(8,8)) 102 | 103 | nullfmt = NullFormatter() 104 | 105 | cnorm = norm.condition([0],2.0) 106 | 107 | rect_slide = [0.1, 0.85, 0.65 + 0.1, 0.05] 108 | rect_2d = [0.1, 0.1, 0.65, 0.65] 109 | rect_1d = [0.1 + 0.65 + 0.02, 0.1, 0.2, 0.65] 110 | ax2d = pl.axes(rect_2d) 111 | ax1d = pl.axes(rect_1d) 112 | ax1d.xaxis.set_major_formatter(nullfmt) 113 | ax1d.yaxis.set_major_formatter(nullfmt) 114 | axslide = pl.axes(rect_slide) 115 | slider = Slider(axslide, 'Cond', -4.0,4.0,valinit=2.0) 116 | 117 | draw2dnormal(norm, axes = ax2d) 118 | l2, = draw1dnormal(cnorm, axes = ax1d) 119 | 120 | y = ax2d.get_ylim() 121 | x = [cnorm.cond['data'], cnorm.cond['data']] 122 | l1, = ax2d.plot(x,y) 123 | 124 | def update(val): 125 | cnorm = norm.condition([0],val) 126 | x = [cnorm.cond['data'], cnorm.cond['data']] 127 | l1.set_xdata(x) 128 | x,y = evalpdf(cnorm) 129 | print cnorm 130 | #print y 131 | l2.set_xdata(y) 132 | l2.set_ydata(x) 133 | pl.draw() 134 | 135 | 136 | slider.on_changed(update) 137 | 138 | return slider 139 | 140 | if __name__ == '__main__': 141 | # Tests for the ConditionalNormal class... 142 | mu = [1.5, 0.5] 143 | sigma = [[1.0, 0.5], [0.5, 1.0]] 144 | n = Normal(2, mu = mu, sigma = sigma) 145 | sl = draw_slider_demo(n) 146 | pl.show() 147 | -------------------------------------------------------------------------------- /src/randcov.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | Author: Jeremy M. Stober 4 | Program: RANDCOV.PY 5 | Date: Thursday, October 27 2011 6 | Description: Generate random cov matrix in numpy. 7 | """ 8 | 9 | from numpy import * 10 | from numpy.linalg import * 11 | from numpy.random import * 12 | 13 | 14 | def gencov(n): 15 | S = randn(n,n) 16 | S = dot(S.transpose(), S) 17 | s = sqrt(diag(S)) 18 | t = diag(1.0/s) 19 | C = dot(dot(t,S),t) 20 | return C 21 | 22 | if __name__ == '__main__': 23 | print gencov(2) 24 | -------------------------------------------------------------------------------- /src/test_func.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | Author: Jeremy M. Stober 4 | Program: TEST_FUNC.PY 5 | Date: Monday, November 7 2011 6 | Description: 2-d noisy function for testing GMM/GMR. 7 | """ 8 | 9 | import numpy as np 10 | import pylab as pl 11 | import numpy.random as npr 12 | 13 | 14 | def noisy_cosine(): 15 | x = npr.rand(100) * np.pi * 2.0 16 | x.sort() 17 | y = np.cos(x) + 0.1 * npr.randn(100) 18 | return x,y 19 | 20 | if __name__ == '__main__': 21 | #pl.plot(*noisy_cosine()) 22 | x,y = noisy_cosine() 23 | pl.scatter(x,y) 24 | pl.show() 25 | 26 | -------------------------------------------------------------------------------- /src/test_gmm.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | Author: Jeremy M. Stober 4 | Program: TEST_GMM.PY 5 | Date: Thursday, November 3 2011 6 | Description: Testing code for gmm with new normal distribution. 7 | """ 8 | 9 | import numpy as np 10 | npa = np.array 11 | import pylab as pl 12 | from normal import Normal 13 | from gmm import GMM 14 | from plot_normal import draw2dnormal 15 | from plot_gmm import draw2dgmm 16 | 17 | if False: 18 | fp = open("../data/faithful.txt") 19 | data = [] 20 | for line in fp.readlines(): 21 | x,y = line.split() 22 | data.append([float(x),float(y)]) 23 | 24 | data = npa(data) 25 | pl.scatter(data[:,0],data[:,1]) 26 | gmm = GMM(dim = 2, ncomps = 2, data = data, method = "kmeans") 27 | 28 | #x = Normal(2, data=data) 29 | #draw2dnormal(x,show=True,axes=pl.gca()) 30 | print gmm 31 | draw2dgmm(gmm) 32 | pl.show() 33 | 34 | if False: 35 | 36 | from test_func import noisy_cosine 37 | 38 | x,y = noisy_cosine() 39 | data = np.vstack([x,y]).transpose() 40 | pl.scatter(data[:,0],data[:,1]) 41 | 42 | gmm = GMM(dim = 2, ncomps = 2, data = data, method = "kmeans") 43 | 44 | draw2dgmm(gmm) 45 | pl.show() 46 | #print data 47 | 48 | 49 | if True: 50 | 51 | from test_func import noisy_cosine 52 | 53 | x,y = noisy_cosine() 54 | data = np.vstack([x,y]).transpose() 55 | pl.scatter(data[:,0],data[:,1]) 56 | 57 | gmm = GMM(dim = 2, ncomps = 2, data = data, method = "kmeans") 58 | 59 | draw2dgmm(gmm) 60 | 61 | #pl.show() 62 | 63 | nx = np.arange(0,2 * np.pi, 0.1) 64 | ny = [] 65 | for i in nx: 66 | ngmm = gmm.condition([0],[i]) 67 | ny.append(ngmm.mean()) 68 | 69 | #ngmm = gmm.condition([0],[0.5]) 70 | #print ngmm.mean() 71 | #print np.cos(0.5) 72 | pl.plot(nx,ny,color='red') 73 | pl.show() 74 | #print data 75 | -------------------------------------------------------------------------------- /src/test_normal.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | Author: Jeremy M. Stober 4 | Program: TEST_NORMAL.PY 5 | Date: Wednesday, October 26 2011 6 | Description: Fit normal to old faithful data. 7 | """ 8 | 9 | 10 | from normal import Normal 11 | from plot_normal import draw2dnormal 12 | import pylab as pl 13 | import numpy as np 14 | npa = np.array 15 | import pdb 16 | 17 | 18 | if False: 19 | fp = open("faithful.txt") 20 | data = [] 21 | for line in fp.readlines(): 22 | x,y = line.split() 23 | data.append([float(x),float(y)]) 24 | 25 | data = npa(data) 26 | pl.scatter(data[:,0],data[:,1]) 27 | x = Normal(2, data=data) 28 | draw2dnormal(x,show=True,axes=pl.gca()) 29 | 30 | if True: 31 | x = Normal(2,mu = np.array([0.1,0.7]), sigma = np.array([[ 0.6, 0.4], [ 0.4, 0.6]])) 32 | s = x.simulate() 33 | draw2dnormal(x) 34 | pl.scatter(s[:,0],s[:,1]) 35 | pl.show() 36 | print s 37 | 38 | if False: 39 | x = Normal(2,mu = np.array([0.1,0.7]), sigma = np.array([[ 0.6, 0.4], [ 0.4, 0.6]])) 40 | #draw2dnormal(x,show=True) 41 | print x 42 | new = x.condition([0],0.1) 43 | print new 44 | 45 | if False: 46 | 47 | from randcov import gencov 48 | import numpy.random as npr 49 | import numpy.linalg as la 50 | 51 | S = gencov(5) 52 | mu = npr.randn(5) 53 | 54 | x = Normal(5,mu = mu, sigma = S) 55 | newx = x.condition([0,1],np.array([0.1,0.3])) 56 | print newx 57 | 58 | A = la.inv(S) 59 | newS = la.inv(A[2:,2:]) 60 | newmu = mu[2:] - np.dot(np.dot(newS, A[2:,:2]), (np.array([0.1,0.3])- mu[:2])) 61 | 62 | print newmu 63 | print newS # should match above 64 | 65 | 66 | 67 | --------------------------------------------------------------------------------