├── LICENSE
├── MANIFEST.in
├── README
├── asp_config.yml
├── distribute_setup.py
├── examples
    ├── cluster.py
    ├── diarizer.cfg
    ├── plotting.py
    └── song_recommendation.py
├── gmm_specializer
    ├── __init__.py
    └── gmm.py
├── run_tests.sh
├── setup.py
├── templates
    ├── em_base_helper_funcs.mako
    ├── em_cilk_eval.mako
    ├── em_cilk_helper_funcs.mako
    ├── em_cilk_kernel_decl.mako
    ├── em_cilk_kernels.mako
    ├── em_cilk_seed_components.mako
    ├── em_cilk_train.mako
    ├── em_cuda_device_helper_funcs.mako
    ├── em_cuda_eval.mako
    ├── em_cuda_host_helper_funcs.mako
    ├── em_cuda_kernels.mako
    ├── em_cuda_launch_decl.mako
    ├── em_cuda_seed_components.mako
    ├── em_cuda_train.mako
    ├── em_tbb_eval.mako
    ├── em_tbb_helper_funcs.mako
    ├── em_tbb_kernel_decl.mako
    ├── em_tbb_kernels.mako
    ├── em_tbb_seed_components.mako
    └── em_tbb_train.mako
└── tests
    ├── em_convert_from_pickle_dump_to_csv.py
    ├── gmm_test.py
    └── speech_data.csv


/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2010-2011, Regents of the University of California
 2 | All rights reserved.
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are met:
 5 | 
 6 |     * Redistributions of source code must retain the above copyright
 7 |       notice, this list of conditions and the following disclaimer.
 8 |     * Redistributions in binary form must reproduce the above copyright
 9 |        notice, this list of conditions and the following disclaimer in the
10 |        documentation and/or other materials provided with the distribution.
11 |     * Neither the name of the University of California, Berkeley nor the
12 |        names of its contributors may be used to endorse or promote products
13 |        derived from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND ANY
16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include distribute_setup.py
2 | include LICENSE
3 | recursive-include specializers *.py *.sh *.mako
4 | recursive-include doc *
5 | prune doc/misc/*.pptx
6 | recursive-include tests *
7 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | A GAUSSIAN MIXTURE MODEL SPECIALIZER BUILT ON ASP
 2 | ------------------------------------------------
 3 | BY H. COOK, E. GONINA, S. KAMIL
 4 | ------------------------------------------------
 5 | Contact egonina@eecs.berkeley.edu
 6 | ------------------------------------------------
 7 | 
 8 | See https://github.com/hcook/gmm/wiki/
 9 | 
10 | 


--------------------------------------------------------------------------------
/asp_config.yml:
--------------------------------------------------------------------------------
1 | GMM:
2 |   autotune: False
3 |   name_of_backend_to_use: "cuda"
4 |   cuda_device_id: 0
5 | 


--------------------------------------------------------------------------------
/distribute_setup.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | """Bootstrap distribute installation
  3 | 
  4 | If you want to use setuptools in your package's setup.py, just include this
  5 | file in the same directory with it, and add this to the top of your setup.py::
  6 | 
  7 |     from distribute_setup import use_setuptools
  8 |     use_setuptools()
  9 | 
 10 | If you want to require a specific version of setuptools, set a download
 11 | mirror, or use an alternate download directory, you can do so by supplying
 12 | the appropriate options to ``use_setuptools()``.
 13 | 
 14 | This file can also be run as a script to install or upgrade setuptools.
 15 | """
 16 | import os
 17 | import sys
 18 | import time
 19 | import fnmatch
 20 | import tempfile
 21 | import tarfile
 22 | from distutils import log
 23 | 
 24 | try:
 25 |     from site import USER_SITE
 26 | except ImportError:
 27 |     USER_SITE = None
 28 | 
 29 | try:
 30 |     import subprocess
 31 | 
 32 |     def _python_cmd(*args):
 33 |         args = (sys.executable,) + args
 34 |         return subprocess.call(args) == 0
 35 | 
 36 | except ImportError:
 37 |     # will be used for python 2.3
 38 |     def _python_cmd(*args):
 39 |         args = (sys.executable,) + args
 40 |         # quoting arguments if windows
 41 |         if sys.platform == 'win32':
 42 |             def quote(arg):
 43 |                 if ' ' in arg:
 44 |                     return '"%s"' % arg
 45 |                 return arg
 46 |             args = [quote(arg) for arg in args]
 47 |         return os.spawnl(os.P_WAIT, sys.executable, *args) == 0
 48 | 
 49 | DEFAULT_VERSION = "0.6.16"
 50 | DEFAULT_URL = "http://pypi.python.org/packages/source/d/distribute/"
 51 | SETUPTOOLS_FAKED_VERSION = "0.6c11"
 52 | 
 53 | SETUPTOOLS_PKG_INFO = """\
 54 | Metadata-Version: 1.0
 55 | Name: setuptools
 56 | Version: %s
 57 | Summary: xxxx
 58 | Home-page: xxx
 59 | Author: xxx
 60 | Author-email: xxx
 61 | License: xxx
 62 | Description: xxx
 63 | """ % SETUPTOOLS_FAKED_VERSION
 64 | 
 65 | 
 66 | def _install(tarball):
 67 |     # extracting the tarball
 68 |     tmpdir = tempfile.mkdtemp()
 69 |     log.warn('Extracting in %s', tmpdir)
 70 |     old_wd = os.getcwd()
 71 |     try:
 72 |         os.chdir(tmpdir)
 73 |         tar = tarfile.open(tarball)
 74 |         _extractall(tar)
 75 |         tar.close()
 76 | 
 77 |         # going in the directory
 78 |         subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0])
 79 |         os.chdir(subdir)
 80 |         log.warn('Now working in %s', subdir)
 81 | 
 82 |         # installing
 83 |         log.warn('Installing Distribute')
 84 |         if not _python_cmd('setup.py', 'install'):
 85 |             log.warn('Something went wrong during the installation.')
 86 |             log.warn('See the error message above.')
 87 |     finally:
 88 |         os.chdir(old_wd)
 89 | 
 90 | 
 91 | def _build_egg(egg, tarball, to_dir):
 92 |     # extracting the tarball
 93 |     tmpdir = tempfile.mkdtemp()
 94 |     log.warn('Extracting in %s', tmpdir)
 95 |     old_wd = os.getcwd()
 96 |     try:
 97 |         os.chdir(tmpdir)
 98 |         tar = tarfile.open(tarball)
 99 |         _extractall(tar)
100 |         tar.close()
101 | 
102 |         # going in the directory
103 |         subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0])
104 |         os.chdir(subdir)
105 |         log.warn('Now working in %s', subdir)
106 | 
107 |         # building an egg
108 |         log.warn('Building a Distribute egg in %s', to_dir)
109 |         _python_cmd('setup.py', '-q', 'bdist_egg', '--dist-dir', to_dir)
110 | 
111 |     finally:
112 |         os.chdir(old_wd)
113 |     # returning the result
114 |     log.warn(egg)
115 |     if not os.path.exists(egg):
116 |         raise IOError('Could not build the egg.')
117 | 
118 | 
119 | def _do_download(version, download_base, to_dir, download_delay):
120 |     egg = os.path.join(to_dir, 'distribute-%s-py%d.%d.egg'
121 |                        % (version, sys.version_info[0], sys.version_info[1]))
122 |     if not os.path.exists(egg):
123 |         tarball = download_setuptools(version, download_base,
124 |                                       to_dir, download_delay)
125 |         _build_egg(egg, tarball, to_dir)
126 |     sys.path.insert(0, egg)
127 |     import setuptools
128 |     setuptools.bootstrap_install_from = egg
129 | 
130 | 
131 | def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
132 |                    to_dir=os.curdir, download_delay=15, no_fake=True):
133 |     # making sure we use the absolute path
134 |     to_dir = os.path.abspath(to_dir)
135 |     was_imported = 'pkg_resources' in sys.modules or \
136 |         'setuptools' in sys.modules
137 |     try:
138 |         try:
139 |             import pkg_resources
140 |             if not hasattr(pkg_resources, '_distribute'):
141 |                 if not no_fake:
142 |                     _fake_setuptools()
143 |                 raise ImportError
144 |         except ImportError:
145 |             return _do_download(version, download_base, to_dir, download_delay)
146 |         try:
147 |             pkg_resources.require("distribute>="+version)
148 |             return
149 |         except pkg_resources.VersionConflict:
150 |             e = sys.exc_info()[1]
151 |             if was_imported:
152 |                 sys.stderr.write(
153 |                 "The required version of distribute (>=%s) is not available,\n"
154 |                 "and can't be installed while this script is running. Please\n"
155 |                 "install a more recent version first, using\n"
156 |                 "'easy_install -U distribute'."
157 |                 "\n\n(Currently using %r)\n" % (version, e.args[0]))
158 |                 sys.exit(2)
159 |             else:
160 |                 del pkg_resources, sys.modules['pkg_resources']    # reload ok
161 |                 return _do_download(version, download_base, to_dir,
162 |                                     download_delay)
163 |         except pkg_resources.DistributionNotFound:
164 |             return _do_download(version, download_base, to_dir,
165 |                                 download_delay)
166 |     finally:
167 |         if not no_fake:
168 |             _create_fake_setuptools_pkg_info(to_dir)
169 | 
170 | def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
171 |                         to_dir=os.curdir, delay=15):
172 |     """Download distribute from a specified location and return its filename
173 | 
174 |     `version` should be a valid distribute version number that is available
175 |     as an egg for download under the `download_base` URL (which should end
176 |     with a '/'). `to_dir` is the directory where the egg will be downloaded.
177 |     `delay` is the number of seconds to pause before an actual download
178 |     attempt.
179 |     """
180 |     # making sure we use the absolute path
181 |     to_dir = os.path.abspath(to_dir)
182 |     try:
183 |         from urllib.request import urlopen
184 |     except ImportError:
185 |         from urllib2 import urlopen
186 |     tgz_name = "distribute-%s.tar.gz" % version
187 |     url = download_base + tgz_name
188 |     saveto = os.path.join(to_dir, tgz_name)
189 |     src = dst = None
190 |     if not os.path.exists(saveto):  # Avoid repeated downloads
191 |         try:
192 |             log.warn("Downloading %s", url)
193 |             src = urlopen(url)
194 |             # Read/write all in one block, so we don't create a corrupt file
195 |             # if the download is interrupted.
196 |             data = src.read()
197 |             dst = open(saveto, "wb")
198 |             dst.write(data)
199 |         finally:
200 |             if src:
201 |                 src.close()
202 |             if dst:
203 |                 dst.close()
204 |     return os.path.realpath(saveto)
205 | 
206 | def _no_sandbox(function):
207 |     def __no_sandbox(*args, **kw):
208 |         try:
209 |             from setuptools.sandbox import DirectorySandbox
210 |             if not hasattr(DirectorySandbox, '_old'):
211 |                 def violation(*args):
212 |                     pass
213 |                 DirectorySandbox._old = DirectorySandbox._violation
214 |                 DirectorySandbox._violation = violation
215 |                 patched = True
216 |             else:
217 |                 patched = False
218 |         except ImportError:
219 |             patched = False
220 | 
221 |         try:
222 |             return function(*args, **kw)
223 |         finally:
224 |             if patched:
225 |                 DirectorySandbox._violation = DirectorySandbox._old
226 |                 del DirectorySandbox._old
227 | 
228 |     return __no_sandbox
229 | 
230 | def _patch_file(path, content):
231 |     """Will backup the file then patch it"""
232 |     existing_content = open(path).read()
233 |     if existing_content == content:
234 |         # already patched
235 |         log.warn('Already patched.')
236 |         return False
237 |     log.warn('Patching...')
238 |     _rename_path(path)
239 |     f = open(path, 'w')
240 |     try:
241 |         f.write(content)
242 |     finally:
243 |         f.close()
244 |     return True
245 | 
246 | _patch_file = _no_sandbox(_patch_file)
247 | 
248 | def _same_content(path, content):
249 |     return open(path).read() == content
250 | 
251 | def _rename_path(path):
252 |     new_name = path + '.OLD.%s' % time.time()
253 |     log.warn('Renaming %s into %s', path, new_name)
254 |     os.rename(path, new_name)
255 |     return new_name
256 | 
257 | def _remove_flat_installation(placeholder):
258 |     if not os.path.isdir(placeholder):
259 |         log.warn('Unkown installation at %s', placeholder)
260 |         return False
261 |     found = False
262 |     for file in os.listdir(placeholder):
263 |         if fnmatch.fnmatch(file, 'setuptools*.egg-info'):
264 |             found = True
265 |             break
266 |     if not found:
267 |         log.warn('Could not locate setuptools*.egg-info')
268 |         return
269 | 
270 |     log.warn('Removing elements out of the way...')
271 |     pkg_info = os.path.join(placeholder, file)
272 |     if os.path.isdir(pkg_info):
273 |         patched = _patch_egg_dir(pkg_info)
274 |     else:
275 |         patched = _patch_file(pkg_info, SETUPTOOLS_PKG_INFO)
276 | 
277 |     if not patched:
278 |         log.warn('%s already patched.', pkg_info)
279 |         return False
280 |     # now let's move the files out of the way
281 |     for element in ('setuptools', 'pkg_resources.py', 'site.py'):
282 |         element = os.path.join(placeholder, element)
283 |         if os.path.exists(element):
284 |             _rename_path(element)
285 |         else:
286 |             log.warn('Could not find the %s element of the '
287 |                      'Setuptools distribution', element)
288 |     return True
289 | 
290 | _remove_flat_installation = _no_sandbox(_remove_flat_installation)
291 | 
292 | def _after_install(dist):
293 |     log.warn('After install bootstrap.')
294 |     placeholder = dist.get_command_obj('install').install_purelib
295 |     _create_fake_setuptools_pkg_info(placeholder)
296 | 
297 | def _create_fake_setuptools_pkg_info(placeholder):
298 |     if not placeholder or not os.path.exists(placeholder):
299 |         log.warn('Could not find the install location')
300 |         return
301 |     pyver = '%s.%s' % (sys.version_info[0], sys.version_info[1])
302 |     setuptools_file = 'setuptools-%s-py%s.egg-info' % \
303 |             (SETUPTOOLS_FAKED_VERSION, pyver)
304 |     pkg_info = os.path.join(placeholder, setuptools_file)
305 |     if os.path.exists(pkg_info):
306 |         log.warn('%s already exists', pkg_info)
307 |         return
308 | 
309 |     log.warn('Creating %s', pkg_info)
310 |     f = open(pkg_info, 'w')
311 |     try:
312 |         f.write(SETUPTOOLS_PKG_INFO)
313 |     finally:
314 |         f.close()
315 | 
316 |     pth_file = os.path.join(placeholder, 'setuptools.pth')
317 |     log.warn('Creating %s', pth_file)
318 |     f = open(pth_file, 'w')
319 |     try:
320 |         f.write(os.path.join(os.curdir, setuptools_file))
321 |     finally:
322 |         f.close()
323 | 
324 | _create_fake_setuptools_pkg_info = _no_sandbox(_create_fake_setuptools_pkg_info)
325 | 
326 | def _patch_egg_dir(path):
327 |     # let's check if it's already patched
328 |     pkg_info = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
329 |     if os.path.exists(pkg_info):
330 |         if _same_content(pkg_info, SETUPTOOLS_PKG_INFO):
331 |             log.warn('%s already patched.', pkg_info)
332 |             return False
333 |     _rename_path(path)
334 |     os.mkdir(path)
335 |     os.mkdir(os.path.join(path, 'EGG-INFO'))
336 |     pkg_info = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
337 |     f = open(pkg_info, 'w')
338 |     try:
339 |         f.write(SETUPTOOLS_PKG_INFO)
340 |     finally:
341 |         f.close()
342 |     return True
343 | 
344 | _patch_egg_dir = _no_sandbox(_patch_egg_dir)
345 | 
346 | def _before_install():
347 |     log.warn('Before install bootstrap.')
348 |     _fake_setuptools()
349 | 
350 | 
351 | def _under_prefix(location):
352 |     if 'install' not in sys.argv:
353 |         return True
354 |     args = sys.argv[sys.argv.index('install')+1:]
355 |     for index, arg in enumerate(args):
356 |         for option in ('--root', '--prefix'):
357 |             if arg.startswith('%s=' % option):
358 |                 top_dir = arg.split('root=')[-1]
359 |                 return location.startswith(top_dir)
360 |             elif arg == option:
361 |                 if len(args) > index:
362 |                     top_dir = args[index+1]
363 |                     return location.startswith(top_dir)
364 |         if arg == '--user' and USER_SITE is not None:
365 |             return location.startswith(USER_SITE)
366 |     return True
367 | 
368 | 
369 | def _fake_setuptools():
370 |     log.warn('Scanning installed packages')
371 |     try:
372 |         import pkg_resources
373 |     except ImportError:
374 |         # we're cool
375 |         log.warn('Setuptools or Distribute does not seem to be installed.')
376 |         return
377 |     ws = pkg_resources.working_set
378 |     try:
379 |         setuptools_dist = ws.find(pkg_resources.Requirement.parse('setuptools',
380 |                                   replacement=False))
381 |     except TypeError:
382 |         # old distribute API
383 |         setuptools_dist = ws.find(pkg_resources.Requirement.parse('setuptools'))
384 | 
385 |     if setuptools_dist is None:
386 |         log.warn('No setuptools distribution found')
387 |         return
388 |     # detecting if it was already faked
389 |     setuptools_location = setuptools_dist.location
390 |     log.warn('Setuptools installation detected at %s', setuptools_location)
391 | 
392 |     # if --root or --preix was provided, and if
393 |     # setuptools is not located in them, we don't patch it
394 |     if not _under_prefix(setuptools_location):
395 |         log.warn('Not patching, --root or --prefix is installing Distribute'
396 |                  ' in another location')
397 |         return
398 | 
399 |     # let's see if its an egg
400 |     if not setuptools_location.endswith('.egg'):
401 |         log.warn('Non-egg installation')
402 |         res = _remove_flat_installation(setuptools_location)
403 |         if not res:
404 |             return
405 |     else:
406 |         log.warn('Egg installation')
407 |         pkg_info = os.path.join(setuptools_location, 'EGG-INFO', 'PKG-INFO')
408 |         if (os.path.exists(pkg_info) and
409 |             _same_content(pkg_info, SETUPTOOLS_PKG_INFO)):
410 |             log.warn('Already patched.')
411 |             return
412 |         log.warn('Patching...')
413 |         # let's create a fake egg replacing setuptools one
414 |         res = _patch_egg_dir(setuptools_location)
415 |         if not res:
416 |             return
417 |     log.warn('Patched done.')
418 |     _relaunch()
419 | 
420 | 
421 | def _relaunch():
422 |     log.warn('Relaunching...')
423 |     # we have to relaunch the process
424 |     # pip marker to avoid a relaunch bug
425 |     if sys.argv[:3] == ['-c', 'install', '--single-version-externally-managed']:
426 |         sys.argv[0] = 'setup.py'
427 |     args = [sys.executable] + sys.argv
428 |     sys.exit(subprocess.call(args))
429 | 
430 | 
431 | def _extractall(self, path=".", members=None):
432 |     """Extract all members from the archive to the current working
433 |        directory and set owner, modification time and permissions on
434 |        directories afterwards. `path' specifies a different directory
435 |        to extract to. `members' is optional and must be a subset of the
436 |        list returned by getmembers().
437 |     """
438 |     import copy
439 |     import operator
440 |     from tarfile import ExtractError
441 |     directories = []
442 | 
443 |     if members is None:
444 |         members = self
445 | 
446 |     for tarinfo in members:
447 |         if tarinfo.isdir():
448 |             # Extract directories with a safe mode.
449 |             directories.append(tarinfo)
450 |             tarinfo = copy.copy(tarinfo)
451 |             tarinfo.mode = 448 # decimal for oct 0700
452 |         self.extract(tarinfo, path)
453 | 
454 |     # Reverse sort directories.
455 |     if sys.version_info < (2, 4):
456 |         def sorter(dir1, dir2):
457 |             return cmp(dir1.name, dir2.name)
458 |         directories.sort(sorter)
459 |         directories.reverse()
460 |     else:
461 |         directories.sort(key=operator.attrgetter('name'), reverse=True)
462 | 
463 |     # Set correct owner, mtime and filemode on directories.
464 |     for tarinfo in directories:
465 |         dirpath = os.path.join(path, tarinfo.name)
466 |         try:
467 |             self.chown(tarinfo, dirpath)
468 |             self.utime(tarinfo, dirpath)
469 |             self.chmod(tarinfo, dirpath)
470 |         except ExtractError:
471 |             e = sys.exc_info()[1]
472 |             if self.errorlevel > 1:
473 |                 raise
474 |             else:
475 |                 self._dbg(1, "tarfile: %s" % e)
476 | 
477 | 
478 | def main(argv, version=DEFAULT_VERSION):
479 |     """Install or upgrade setuptools and EasyInstall"""
480 |     tarball = download_setuptools()
481 |     _install(tarball)
482 | 
483 | 
484 | if __name__ == '__main__':
485 |     main(sys.argv[1:])
486 | 


--------------------------------------------------------------------------------
/examples/cluster.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import pylab as pl
  3 | import matplotlib as mpl
  4 | import itertools
  5 | import sys
  6 | import math
  7 | import timeit
  8 | import copy
  9 | import time
 10 | import struct
 11 | import scipy.stats.mstats as stats
 12 | import ConfigParser
 13 | import os.path
 14 | import getopt
 15 | import h5py
 16 | 
 17 | from gmm_specializer.gmm import *
 18 | 
 19 | 
 20 | MINVALUEFORMINUSLOG = -1000.0
 21 | 
 22 | class Diarizer(object):
 23 | 
 24 |     def __init__(self, f_file_name, sp_file_name):
 25 |         #self.variant_param_spaces = variant_param_spaces
 26 |         #self.device_id = device_id
 27 |         #self.names_of_backends = names_of_backends
 28 | 
 29 |         f = open(f_file_name, "rb")
 30 | 
 31 |         print "...Reading in HTK feature file..."
 32 |         
 33 |         #=== Read Feature File ==
 34 |         try:
 35 |             nSamples = struct.unpack('>i', f.read(4))[0]
 36 |             sampPeriod = struct.unpack('>i', f.read(4))[0]
 37 |             sampSize = struct.unpack('>h', f.read(2))[0]
 38 |             sampKind = struct.unpack('>h', f.read(2))[0]
 39 | 
 40 |             print "INFO: total number of frames read: ", nSamples
 41 |             self.total_num_frames = nSamples
 42 |                 
 43 |             D = sampSize/4 #dimension of feature vector
 44 |             l = []
 45 |             count = 0
 46 |             while count < (nSamples * D):
 47 |                 bFloat = f.read(4)
 48 |                 fl = struct.unpack('>f', bFloat)[0]
 49 |                 l.append(fl)
 50 |                 count = count + 1
 51 |         finally:
 52 |             f.close()
 53 | 
 54 |         #=== Prune to Speech Only ==
 55 |         print "...Reading in speech/nonspeech file..."
 56 |         pruned_list = []
 57 |         num_speech_frames = nSamples            
 58 | 
 59 |         if sp_file_name:
 60 |             sp = open(sp_file_name, "r")
 61 |                         
 62 |             l_start = []
 63 |             l_end = []
 64 |             num_speech_frames = 0
 65 |             for line in sp:
 66 |                 s = line.split(' ')
 67 |                 st = math.floor(100 * float(s[2]) + 0.5)
 68 |                 en = math.floor(100 * float(s[3].replace('\n','')) + 0.5)
 69 |                 st1 = int(st)
 70 |                 en1 = int(en)
 71 |                 l_start.append(st1*19)
 72 |                 l_end.append(en1*19)
 73 |                 num_speech_frames = num_speech_frames + (en1 - st1 + 1)
 74 | 
 75 |             print "INFO: total number of speech frames: ", num_speech_frames
 76 | 
 77 |             total = 0
 78 |             for start in l_start:
 79 |                 end = l_end[l_start.index(start)]
 80 |                 total += (end/19 - start/19 + 1)
 81 |                 x = 0
 82 |                 index = start
 83 |                 while x < (end-start+19):
 84 |                     pruned_list.append(l[index])
 85 |                     index += 1
 86 |                     x += 1
 87 |         else: #no speech file, take in all features
 88 |             pruned_list = l
 89 | 
 90 |         floatArray = np.array(pruned_list, dtype = np.float32)
 91 |         self.X = floatArray.reshape(num_speech_frames, D)
 92 |         
 93 |         self.N = self.X.shape[0]
 94 |         self.D = self.X.shape[1]
 95 | 
 96 | 
 97 |     def write_to_RTTM(self, rttm_file_name, sp_file_name, meeting_name, most_likely, num_gmms, seg_length):
 98 | 
 99 |         print "...Writing out RTTM file..."
100 | 
101 |         #do majority voting in chunks of 250
102 |         duration = seg_length
103 |         chunk = 0
104 |         end_chunk = duration
105 | 
106 |         max_gmm_list = []
107 | 
108 |         smoothed_most_likely = np.array([], dtype=np.float32)
109 | 
110 |             
111 |         while end_chunk < len(most_likely):
112 |             chunk_arr = most_likely[range(chunk, end_chunk)]
113 |             max_gmm = stats.mode(chunk_arr)[0][0]
114 |             max_gmm_list.append(max_gmm)
115 |             smoothed_most_likely = np.append(smoothed_most_likely, max_gmm*np.ones(250))
116 |             chunk += duration
117 |             end_chunk += duration
118 | 
119 |         end_chunk -= duration
120 |         if end_chunk < len(most_likely):
121 |             chunk_arr = most_likely[range(end_chunk, len(most_likely))]
122 |             max_gmm = stats.mode(chunk_arr)[0][0]
123 |             max_gmm_list.append(max_gmm)
124 |             smoothed_most_likely = np.append(smoothed_most_likely, max_gmm*np.ones(len(most_likely)-end_chunk))
125 | 
126 |         most_likely = smoothed_most_likely
127 |         
128 |         out_file = open(rttm_file_name, 'w')
129 | 
130 |         with_non_speech = -1*np.ones(self.total_num_frames)
131 | 
132 |         if sp_file_name:
133 |             speech_seg = np.loadtxt(sp_file_name, delimiter=' ',usecols=(2,3))
134 |             speech_seg_i = np.round(speech_seg*100).astype('int32')
135 |             sizes = np.diff(speech_seg_i)
136 |         
137 |             sizes = sizes.reshape(sizes.size)
138 |             offsets = np.cumsum(sizes)
139 |             offsets = np.hstack((0, offsets[0:-1]))
140 | 
141 |             offsets += np.array(range(len(offsets)))
142 |         
143 |         #populate the array with speech clusters
144 |             speech_index = 0
145 |             counter = 0
146 |             for pair in speech_seg_i:
147 |                 st = pair[0]
148 |                 en = pair[1]
149 |                 speech_index = offsets[counter]
150 |                 
151 |                 counter+=1
152 |                 idx = 0
153 |                 for x in range(st+1, en+1):
154 |                     with_non_speech[x] = most_likely[speech_index+idx]
155 |                     idx += 1
156 |         else:
157 |             with_non_speech = most_likely
158 |             
159 |         cnum = with_non_speech[0]
160 |         cst  = 0
161 |         cen  = 0
162 |         for i in range(1,self.total_num_frames): 
163 |             if with_non_speech[i] != cnum: 
164 |                 if (cnum >= 0):
165 |                     start_secs = ((cst)*0.01)
166 |                     dur_secs = (cen - cst + 2)*0.01
167 |                     out_file.write("SPEAKER " + meeting_name + " 1 " + str(start_secs) + " "+ str(dur_secs) + " <NA> <NA> " + "speaker_" + str(cnum) + " <NA>\n")
168 | 
169 |                     
170 |                 cst = i
171 |                 cen = i
172 |                 cnum = with_non_speech[i]
173 |             else:
174 |                 cen+=1
175 |                   
176 |         if cst < cen:
177 |             cnum = with_non_speech[self.total_num_frames-1]
178 |             if(cnum >= 0):
179 |                 start_secs = ((cst+1)*0.01)
180 |                 dur_secs = (cen - cst + 1)*0.01
181 |                 out_file.write("SPEAKER " + meeting_name + " 1 " + str(start_secs) + " "+ str(dur_secs) + " <NA> <NA> " + "speaker_" + str(cnum) + " <NA>\n")
182 | 
183 | 
184 |         print "DONE writing RTTM file"
185 | 
186 |     def write_to_GMM(self, gmmfile):
187 | 
188 |         gmm_f = open(gmmfile, 'w')
189 | 
190 |         gmm_f.write("Number of clusters: " + str(len(self.gmm_list)) + "\n")
191 |              
192 |         #print parameters
193 |         cluster_count = 0
194 |         for gmm in self.gmm_list:
195 | 
196 |             gmm_f.write("Cluster " + str(cluster_count) + "\n")
197 |             means = gmm.components.means
198 |             covars = gmm.components.covars
199 |             weights = gmm.components.weights
200 | 
201 |             gmm_f.write("Number of Gaussians: "+ str(gmm.M) + "\n")
202 | 
203 |             gmm_count = 0
204 |             for g in range(0, gmm.M):
205 |                 g_means = means[gmm_count]
206 |                 g_covar_full = covars[gmm_count]
207 |                 g_covar = np.diag(g_covar_full)
208 |                 g_weight = weights[gmm_count]
209 | 
210 |                 gmm_f.write("Gaussian: " + str(gmm_count) + "\n")
211 |                 gmm_f.write("Weight: " + str(g_weight) + "\n")
212 |                 
213 |                 for f in range(0, gmm.D):
214 |                     gmm_f.write("Feature " + str(f) + " Mean " + str(g_means[f]) + " Var " + str(g_covar[f]) + "\n")
215 | 
216 |                 gmm_count+=1
217 |                 
218 |             cluster_count+=1
219 | 
220 |         print "DONE writing GMM file"
221 |         
222 |     def new_gmm(self, M, cvtype):
223 |         self.M = M
224 |         self.gmm = GMM(self.M, self.D, cvtype=cvtype)
225 | 
226 |     def new_gmm_list(self, M, K, cvtype):
227 |         self.M = M
228 |         self.init_num_clusters = K
229 |         self.gmm_list = [GMM(self.M, self.D, cvtype=cvtype) for i in range(K)]
230 | 
231 | 
232 | 
233 |     def segment_majority_vote(self, interval_size, em_iters):
234 |         
235 |         num_clusters = len(self.gmm_list)
236 | 
237 |         # Resegment data based on likelihood scoring
238 |         likelihoods = self.gmm_list[0].score(self.X)
239 |         for g in self.gmm_list[1:]:
240 |             likelihoods = np.column_stack((likelihoods, g.score(self.X)))
241 | 
242 |         if num_clusters == 1:
243 |             most_likely = np.zeros(len(self.X))
244 |         else:
245 |             most_likely = likelihoods.argmax(axis=1)
246 | 
247 | 
248 |         # Across 2.5 secs of observations, vote on which cluster they should be associated with
249 | 
250 |         iter_training = {}
251 |         
252 |         for i in range(interval_size, self.N, interval_size):
253 | 
254 |             arr = np.array(most_likely[(range(i-interval_size, i))])
255 |             max_gmm = int(stats.mode(arr)[0][0])
256 |             iter_training.setdefault((self.gmm_list[max_gmm],max_gmm),[]).append(self.X[i-interval_size:i,:])
257 | 
258 |         arr = np.array(most_likely[(range((self.N/interval_size)*interval_size, self.N))])
259 |         max_gmm = int(stats.mode(arr)[0][0])
260 |         iter_training.setdefault((self.gmm_list[max_gmm], max_gmm),[]).append(self.X[(self.N/interval_size)*interval_size:self.N,:])
261 |         
262 |         iter_bic_dict = {}
263 |         iter_bic_list = []
264 | 
265 |         # for each gmm, append all the segments and retrain
266 |         for gp, data_list in iter_training.iteritems():
267 |             g = gp[0]
268 |             p = gp[1]
269 |             cluster_data =  data_list[0]
270 | 
271 |             for d in data_list[1:]:
272 |                 cluster_data = np.concatenate((cluster_data, d))
273 | 
274 |             g.train(cluster_data, max_em_iters=em_iters)
275 | 
276 |             iter_bic_list.append((g,cluster_data))
277 |             iter_bic_dict[p] = cluster_data
278 | 
279 |         return iter_bic_dict, iter_bic_list, most_likely
280 | 
281 | 
282 |     def cluster(self, em_iters, KL_ntop, NUM_SEG_LOOPS_INIT, NUM_SEG_LOOPS, seg_length):
283 | 
284 |         print " ====================== CLUSTERING ====================== "
285 |         main_start = time.time()
286 | 
287 |         # ----------- Uniform Initialization -----------
288 |         # Get the events, divide them into an initial k clusters and train each GMM on a cluster
289 |         per_cluster = self.N/self.init_num_clusters
290 |         init_training = zip(self.gmm_list,np.vsplit(self.X, range(per_cluster, self.N, per_cluster)))
291 |         
292 |         for g, x in init_training:
293 |             g.train(x, max_em_iters=em_iters)
294 | 
295 |         # ----------- First majority vote segmentation loop ---------
296 |         for segment_iter in range(0,NUM_SEG_LOOPS_INIT):
297 |             iter_bic_dict, iter_bic_list, most_likely = self.segment_majority_vote(seg_length, em_iters)
298 | 
299 | 
300 |         # ----------- Main Clustering Loop using BIC ------------
301 | 
302 |         # Perform hierarchical agglomeration based on BIC scores
303 |         best_BIC_score = 1.0
304 |         total_events = 0
305 |         total_loops = 0
306 |         
307 |         while (best_BIC_score > 0 and len(self.gmm_list) > 1):
308 | 
309 |             total_loops+=1
310 |             for segment_iter in range(0,NUM_SEG_LOOPS):
311 |                 iter_bic_dict, iter_bic_list, most_likely = self.segment_majority_vote(seg_length, em_iters)
312 |                             
313 |             # Score all pairs of GMMs using BIC
314 |             best_merged_gmm = None
315 |             best_BIC_score = 0.0
316 |             merged_tuple = None
317 |             merged_tuple_indices = None
318 | 
319 |             # ------- KL distance to compute best pairs to merge -------
320 |             if KL_ntop > 0:
321 | 
322 |                 top_K_gmm_pairs = self.gmm_list[0].find_top_KL_pairs(KL_ntop, self.gmm_list)
323 |                 for pair in top_K_gmm_pairs:
324 |                     score = 0.0
325 |                     gmm1idx = pair[0]
326 |                     gmm2idx = pair[1]
327 |                     g1 = self.gmm_list[gmm1idx]
328 |                     g2 = self.gmm_list[gmm2idx]
329 | 
330 |                     if gmm1idx in iter_bic_dict and gmm2idx in iter_bic_dict:
331 |                         d1 = iter_bic_dict[gmm1idx]
332 |                         d2 = iter_bic_dict[gmm2idx]
333 |                         data = np.concatenate((d1,d2))
334 |                     elif gmm1idx in iter_bic_dict:
335 |                         data = iter_bic_dict[gmm1idx]
336 |                     elif gmm2idx in iter_bic_dict:
337 |                         data = iter_bic_dict[gmm2idx]
338 |                     else:
339 |                         continue
340 | 
341 |                     new_gmm, score = compute_distance_BIC(g1, g2, data, em_iters)
342 |                     
343 |                     #print "Comparing BIC %d with %d: %f" % (gmm1idx, gmm2idx, score)
344 |                     if score > best_BIC_score: 
345 |                         best_merged_gmm = new_gmm
346 |                         merged_tuple = (g1, g2)
347 |                         merged_tuple_indices = (gmm1idx, gmm2idx)
348 |                         best_BIC_score = score
349 | 
350 |             # ------- All-to-all comparison of gmms to merge -------
351 |             else: 
352 |                 l = len(iter_bic_list)
353 | 
354 |                 for gmm1idx in range(l):
355 |                     for gmm2idx in range(gmm1idx+1, l):
356 |                         score = 0.0
357 |                         g1, d1 = iter_bic_list[gmm1idx]
358 |                         g2, d2 = iter_bic_list[gmm2idx] 
359 | 
360 |                         data = np.concatenate((d1,d2))
361 |                         new_gmm, score = compute_distance_BIC(g1, g2, data, em_iters)
362 | 
363 |                         #print "Comparing BIC %d with %d: %f" % (gmm1idx, gmm2idx, score)
364 |                         if score > best_BIC_score: 
365 |                             best_merged_gmm = new_gmm
366 |                             merged_tuple = (g1, g2)
367 |                             merged_tuple_indices = (gmm1idx, gmm2idx)
368 |                             best_BIC_score = score
369 | 
370 |             # Merge the winning candidate pair if its deriable to do so
371 |             if best_BIC_score > 0.0:
372 |                 gmms_with_events = []
373 |                 for gp in iter_bic_list:
374 |                     gmms_with_events.append(gp[0])
375 | 
376 |                 #cleanup the gmm_list - remove empty gmms
377 |                 for g in self.gmm_list:
378 |                     if g not in gmms_with_events and g != merged_tuple[0] and g!= merged_tuple[1]:
379 |                         #remove
380 |                         self.gmm_list.remove(g)
381 | 
382 |                 self.gmm_list.remove(merged_tuple[0])
383 |                 self.gmm_list.remove(merged_tuple[1])
384 |                 self.gmm_list.append(best_merged_gmm)
385 |                
386 | 
387 |             
388 |             print " size of each cluster:", [ g.M for g in self.gmm_list]
389 |             
390 |         print "=== Total clustering time: ", time.time()-main_start
391 |         print "=== Final size of each cluster:", [ g.M for g in self.gmm_list]
392 | 
393 |         return most_likely
394 | 
395 |     
396 | def print_usage():
397 |         print """    ---------------------------------------------------------------------
398 |     Speaker Diarization in Python with Asp and the GMM Specializer usage:
399 |     ---------------------------------------------------------------------
400 |     Arguments for the diarizer are parsed from a config file. 
401 |     Default config file is diarizer.cfg, but you can pass your own file with the '-c' option. 
402 |     Required is the config file header: [Diarizer] and the options are as follows:
403 |     
404 |     --- Required: ---
405 |     basename: \t Basename of the file to process
406 |     mfcc_feats: \t MFCC input feature file
407 |     output_cluster: \t Output clustering file
408 |     gmm_output: \t Output GMMs parameters file
409 |     M_mfcc: \t Amount of gaussains per model for mfcc
410 |     initial_clusters: Number of initial clusters
411 | 
412 |     --- Optional: ---
413 |     spnsp_file: \t spnsp file (all features used by default)
414 |     KL_ntop: \t Nuber of combinations to evaluate BIC on
415 |             \t 0 to deactive KL-divergency (fastmatch-component)
416 |     em_iterations: \t Number of iterations for the standard
417 |                   \t segmentation loop training (3 by default)
418 |     num_seg_iters_init: \t Number of majority vote iterations
419 |                         \t in the initialization phase (2 by default)
420 |     num_seg_iters: \t Number of majority vote iterations
421 |                    \t in the main loop (3 by default)
422 |     seg_length: \t Segment length for majority vote in frames
423 |                 \t (250 frames by default)
424 | 
425 |     For fastest performance, enable KL-divergency (KL_ntop = 3) and set
426 |       \t num_seg_iters_init and num_seg_iters to 1
427 |     """
428 | 
429 |     
430 | def print_no_config():
431 | 
432 |     print "Please supply a config file with -c 'config_file_name.cfg' "
433 |     return
434 | 
435 | def get_config_params(config):
436 |         #read in filenames
437 |     try:
438 |         meeting_name = config.get('Diarizer', 'basename')
439 |     except:
440 |         print "basename not specified in config file! exiting..."
441 |         sys.exit(2)
442 |     try:
443 |         f = config.get('Diarizer', 'mfcc_feats')
444 |     except:
445 |         print "Feature file mfcc_feats not specified in config file! exiting..."
446 |         sys.exit(2)
447 | 
448 |     try:
449 |         sp = config.get('Diarizer', 'spnsp_file')
450 |     except:
451 |         print "Speech file spnsp_file not specified, continuing without it..."
452 |         sp = False
453 | 
454 |     try:
455 |         outfile = config.get('Diarizer', 'output_cluster')
456 |     except:
457 |         print "output_cluster file not specified in config file! exiting..."
458 |         sys.exit(2)
459 | 
460 |     try:
461 |         gmmfile = config.get('Diarizer', 'gmm_output')
462 |     except:
463 |         print "gmm_output file not specified in config file! exiting..."
464 |         sys.exit(2)
465 |         
466 |     #read GMM paramters
467 |     try:
468 |         num_gmms = int(config.get('Diarizer', 'initial_clusters'))
469 |     except:
470 |         print "initial_clusters not specified in config file! exiting..."
471 |         sys.exit(2)
472 | 
473 |     try:
474 |         num_comps = int(config.get('Diarizer', 'M_mfcc'))
475 |     except:
476 |         print "M_mfcc not specified in config file! exiting..."
477 |         sys.exit(2)
478 |         
479 |     #read algorithm configuration
480 |     try:
481 |         kl_ntop = int(config.get('Diarizer', 'KL_ntop'))
482 |     except:
483 |         kl_ntop = 0
484 |     try:
485 |         num_seg_iters_init = int(config.get('Diarizer', 'num_seg_iters_init'))
486 |     except:
487 |         num_seg_iters_init = 2
488 |         
489 |     try:
490 |         num_seg_iters = int(config.get('Diarizer', 'num_seg_iters'))
491 |     except:
492 |         num_seg_iters = 3
493 | 
494 |     try:
495 |         num_em_iters = int(config.get('Diarizer', 'em_iterations'))
496 |     except:
497 |         num_em_iters = 3
498 | 
499 |     try:
500 |         seg_length = int(config.get('Diarizer', 'seg_length'))
501 |     except:
502 |         seg_length = 250
503 | 
504 |         
505 |     return meeting_name, f, sp, outfile, gmmfile, num_gmms, num_comps, num_em_iters, kl_ntop, num_seg_iters_init, num_seg_iters, seg_length
506 | 
507 | 
508 | 
509 | if __name__ == '__main__':
510 |     device_id = 0
511 |     
512 |     
513 |     # Process commandline arguments
514 |     try:
515 |         opts, args = getopt.getopt(sys.argv[1:], "c:", ["help"])
516 |     except getopt.GetoptError, err:
517 |         print_no_config()
518 |         sys.exit(2)
519 | 
520 |     config_file = 'diarizer.cfg'
521 |     config_specified = False
522 |     for o, a in opts:
523 |         if o == '-c':
524 |             config_file = a
525 |             config_specified = True
526 |         if o == '--help':
527 |             print_usage()
528 |             sys.exit(2)
529 |     
530 | 
531 |     if not config_specified:
532 |         print "No config file specified, using defaul 'diarizer.cfg' file"
533 |     else:
534 |         print "Using the config file specified: '", config_file, "'"
535 | 
536 |     try:
537 |         open(config_file)
538 |     except IOError, err:
539 |         print "Error! Config file: '", config_file, "' does not exist"
540 |         sys.exit(2)
541 |         
542 |     # Parse diarizer config file
543 |     config = ConfigParser.ConfigParser()
544 | 
545 |     config.read(config_file)
546 | 
547 |     meeting_name, f, sp, outfile, gmmfile, num_gmms, num_comps, num_em_iters, kl_ntop, num_seg_iters_init, num_seg_iters, seg_length = get_config_params(config)
548 | 
549 |     # Create tester object
550 |     diarizer = Diarizer(f, sp)
551 | 
552 |     # Create the GMM list
553 |     diarizer.new_gmm_list(num_comps, num_gmms, 'diag')
554 | 
555 |     # Cluster
556 |     most_likely = diarizer.cluster(num_em_iters, kl_ntop, num_seg_iters_init, num_seg_iters, seg_length)
557 | 
558 |     # Write out RTTM and GMM parameter files
559 |     diarizer.write_to_RTTM(outfile, sp, meeting_name, most_likely, num_gmms, seg_length)
560 |     diarizer.write_to_GMM(gmmfile)
561 | 
562 | 
563 | 


--------------------------------------------------------------------------------
/examples/diarizer.cfg:
--------------------------------------------------------------------------------
 1 | [Diarizer]
 2 | basename = IS1008a
 3 | mfcc_feats = /home_user/egonina/asp/full_experiment_sets/AMI/features_ff/IS1008a_seg.feat.gauss.htk
 4 | spnsp_file = /home_user/egonina/asp/full_experiment_sets/AMI/spnsp/IS1008a_seg.spch
 5 | output_cluster = /home_user/egonina/asp/output/IS1008a.rttm
 6 | gmm_output = IS.gmm
 7 | 
 8 | em_iterations = 3
 9 | initial_clusters = 16
10 | M_mfcc = 5
11 | 
12 | KL_ntop = 0
13 | num_seg_iters_init = 2
14 | num_seg_iters = 3
15 | seg_length = 250
16 | 


--------------------------------------------------------------------------------
/examples/plotting.py:
--------------------------------------------------------------------------------
 1 | import matplotlib as mpl
 2 | #mpl.use('PDF')  # must be called immediately, and before import pylab
 3 |                  # sets the back-end for matplotlib
 4 | import pylab as pl
 5 | import itertools
 6 | import copy
 7 | 
 8 | from gmm_specializer.gmm import *
 9 | 
10 | def generate_synthetic_data(N):
11 |     np.random.seed(0)
12 |     C = np.array([[0., -0.7], [3.5, .7]])
13 |     C1 = np.array([[-0.4, 1.7], [0.3, .7]])
14 |     Y = np.r_[
15 |         np.dot(np.random.randn(N/3, 2), C1) - np.array([-1,-5]),
16 |         np.dot(np.random.randn(N/3, 2), C),
17 |         np.random.randn(N/3, 2) + np.array([3, 3]),
18 |         ]
19 |     return Y.astype(np.float32)
20 | 
21 | class Plotter(object):
22 | 
23 |     def __init__(self, num_rows, num_cols):
24 |         self.results = {}
25 |         self.plot_base = num_rows*100+num_cols*10
26 |         self.D = 2
27 |         self.N = 600
28 |         self.X = generate_synthetic_data(self.N)
29 | 
30 |     def pure(self, M, cvtype, plot_id):
31 |         gmm = GMM(M, self.D, cvtype=cvtype)
32 |         means, covars = gmm.train_using_python(self.X)
33 |         Y = gmm.predict_using_python(self.X)
34 |         self.results['Pure Python '+cvtype] = (str(self.plot_base+plot_id), means, covars, Y.T)
35 | 
36 |     def special(self, M, cvtype, plot_id):
37 |         gmm = GMM(M, self.D, cvtype=cvtype)
38 |         likelihood = gmm.train(self.X)
39 |         means = gmm.components.means.reshape((M, self.D))
40 |         covars = gmm.components.covars.reshape((M, self.D, self.D))
41 |         Y = gmm.predict(self.X)
42 |         self.results[' '.join(['ASP',cvtype,str(self.D),str(M),str(self.N)])] = (str(self.plot_base+plot_id), copy.deepcopy(means), copy.deepcopy(covars), copy.deepcopy(Y))
43 |         return likelihood
44 |         
45 |     def plot(self):
46 |         for t, r in self.results.iteritems():
47 |             splot = pl.subplot(r[0], title=t)
48 |             color_iter = itertools.cycle (['r', 'g', 'b', 'c'])
49 |             Y_ = r[3]
50 |             for i, (mean, covar, color) in enumerate(zip(r[1], r[2], color_iter)):
51 |                 v, w = np.linalg.eigh(covar)
52 |                 u = w[0] / np.linalg.norm(w[0])
53 |                 pl.scatter(self.X.T[0,Y_==i], self.X.T[1,Y_==i], .8, color=color)
54 |                 angle = np.arctan(u[1]/u[0])
55 |                 angle = 180 * angle / np.pi
56 |                 ell = mpl.patches.Ellipse (mean, v[0], v[1], 180 + angle, color=color)
57 |                 ell.set_clip_box(splot.bbox)
58 |                 ell.set_alpha(0.5)
59 |                 splot.add_artist(ell)
60 |         pl.show()
61 |         pl.savefig('gmm_test')
62 |         
63 | if __name__ == '__main__':
64 |     num_rows = 3
65 |     num_cols = 2
66 |     p = Plotter(num_rows,num_cols)
67 |     p.pure(3, 'diag', 1)
68 |     p.pure(3, 'full', 2)
69 |     p.special(3, 'diag', 3)
70 |     p.special(3, 'full', 4)
71 |     p.special(6, 'diag', 5)
72 |     p.special(6, 'full', 6)
73 |     p.plot()
74 |  
75 | 


--------------------------------------------------------------------------------
/examples/song_recommendation.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import pylab as pl
  3 | import matplotlib as mpl
  4 | import itertools
  5 | import sys
  6 | import math
  7 | import timeit
  8 | import copy
  9 | import time
 10 | import struct
 11 | import scipy.stats.mstats as stats
 12 | import ConfigParser
 13 | import os
 14 | import getopt
 15 | import h5py
 16 | import random as rnd
 17 | import cPickle as pickle
 18 | import operator
 19 | 
 20 | from gmm.gmm_specializer import *
 21 | 
 22 | def get_song_dict():
 23 |     fileList = []
 24 |     rootdir = '/disk1/home_user/egonina/asp/MSD/MillionSongSubset/data/'
 25 |     for root, subFolders, files in os.walk(rootdir):
 26 |         for file in files:
 27 |             fileList.append(os.path.join(root,file))
 28 | 
 29 |     file_tag_dict = {}
 30 |     for file in fileList:
 31 |         print file
 32 | 
 33 |         f = h5py.File(file, 'r')
 34 |         mbtags = f['musicbrainz']['artist_mbtags']
 35 |         list = []
 36 |         for t in mbtags:
 37 |             list.append(t)
 38 |         tags = f['metadata']['artist_terms']
 39 |         tag_freq = f['metadata']['artist_terms_freq']
 40 |         tags_dict = {}
 41 |         for t in range(len(tags)):
 42 |             tags_dict[tags[t]] = tag_freq[t]
 43 | 
 44 |         file_id = str(f['analysis']['songs']['track_id'][0])
 45 |         file_tag_dict[file_id] = {}
 46 |         file_tag_dict[file_id]['artist_mbtags'] = list
 47 |         file_tag_dict[file_id]['artist_terms'] = tags_dict
 48 |         file_tag_dict[file_id]['artist_name'] = str(f['metadata']['songs']['artist_name'][0])
 49 |         file_tag_dict[file_id]['title'] = str(f['metadata']['songs']['title'][0])
 50 |         file_tag_dict[file_id]['segments_timbre'] = np.array(f['analysis']['segments_timbre'], dtype=np.float32)
 51 |         file_tag_dict[file_id]['duration'] = float(f['analysis']['songs']['duration'][0])
 52 |         file_tag_dict[file_id]['tempo'] = float(f['analysis']['songs']['tempo'][0])
 53 |         file_tag_dict[file_id]['time_signature'] = float(f['analysis']['songs']['time_signature'][0])
 54 |         file_tag_dict[file_id]['segments_start'] = np.array(f['analysis']['segments_start'], dtype=np.float32)
 55 |         f.close()
 56 | 
 57 |     p = open("/disk1/home_user/egonina/asp/MSD/all_file_dict_dump.pkl", "wb")
 58 |     pickle.dump(file_tag_dict, p, True)
 59 |     p.close()
 60 |     return file_tag_dict
 61 | 
 62 | 
 63 | 
 64 | def count_songs_by_tag(tags_file_name, output_file_name, fileDict):
 65 | 
 66 |     tags_file = open(tags_file_name, 'r')
 67 |     tag_dict = {}
 68 |     for tag in tags_file:
 69 |         tag = tag[:len(tag)-1] #delete end-of-line characater
 70 |         tag_dict[tag] = 0
 71 | 
 72 |         #---------- READ FILES -----------
 73 |         start = time.time()
 74 |                 
 75 |         for file in fileDict.keys():
 76 | 
 77 |             tags = fileDict[file]
 78 |             if tag in tags:
 79 |                 tag_dict[tag]+=1
 80 | 
 81 |         total = time.time() - start
 82 |         print "songs with keyword [" + tag + "]: "+ str(tag_dict[tag])
 83 |         print "total time: ", total
 84 |         
 85 |     tag_out = open(output_file_name, 'w')
 86 | 
 87 |     for tag in tag_dict.keys():
 88 |         tag_out.write(tag+"\t"+str(tag_dict[tag])+"\n")
 89 | 
 90 |     tag_out.close()
 91 | 
 92 | 
 93 | if __name__ == '__main__':
 94 | 
 95 |     total_start_time = time.time()
 96 | 
 97 |     freq_threshold = 0.8
 98 |     M = 32
 99 |     category_tag = "metal"
100 |     rnd.seed(42)
101 |     
102 |     print "Reading Files"
103 |     #song_dict = get_song_dict()
104 |     st = time.time()
105 | 
106 |     # assume the dictionary has been already read in and pickled
107 |     p = open("/disk1/home_user/egonina/asp/MSD/all_file_dict_dump.pkl", "rb")
108 |     song_dict = pickle.load(p)
109 |     p.close()
110 |     print "--- File Reading:\t", time.time() - st, " -----"
111 |     
112 |     st = time.time()
113 | 
114 |     # collect songs
115 |     songs_with_tag = {}
116 |     songs_without_tag = {}
117 |     song_with_tag_count = 0
118 |     song_without_tag_count = 0
119 |     for song in song_dict.keys():
120 |         if category_tag in song_dict[song]['artist_terms'].keys(): #the song's tag list contains the tag we're looking for
121 |             if song_dict[song]['artist_terms'][category_tag] > freq_threshold:
122 |                 songs_with_tag[song] = song_dict[song]
123 |                 song_with_tag_count += 1
124 |         else:
125 |             songs_without_tag[song] = song_dict[song]
126 |             song_without_tag_count += 1
127 | 
128 |     print "--- Collecting songs for the tag time:\t", time.time() - st, " ----- "
129 |     print "INFO: songs with tag count:", song_with_tag_count
130 |     print "INFO: songs without tag count: ", song_without_tag_count
131 | 
132 |     st = time.time()
133 |     
134 |     # get indices for various sets of songs
135 |     all_positive_indices = range(song_with_tag_count-1)
136 |     all_negative_indices = range(song_without_tag_count-1)
137 |     all_indices = range(len(song_dict.keys()))
138 | 
139 |     #split songs with tag into training/testing sets (70/30)
140 |     training_sample_indices = np.array(rnd.sample(all_positive_indices, int(song_with_tag_count*0.7)))
141 |     testing_sample_indices = np.delete(all_positive_indices, training_sample_indices)
142 |     negative_sample_indices = all_negative_indices
143 | 
144 |     print "INFO: number of training indices:", len(training_sample_indices)
145 |     print "INFO: testing indices:", len(testing_sample_indices)
146 |     print "INFO: negative testing indices:", len(negative_sample_indices)
147 |     
148 |     # get song keys for the:
149 |     # - 70% of total songs for training
150 |     # - 30% of total songs for testing
151 |     # - (total songs - songs with tag) for negative testing
152 |     # - 30% of all song features for UBM model
153 |     song_keys = np.array(songs_with_tag.keys())
154 |     song_neg_keys = np.array(songs_without_tag.keys())
155 |     all_song_keys = np.array(song_dict.keys())
156 | 
157 |     # get the corresponding song keys for each of the sets
158 |     training_song_keys = song_keys[training_sample_indices]
159 |     testing_song_keys = song_keys[testing_sample_indices]
160 |     negative_song_keys = song_neg_keys[negative_sample_indices]
161 |                                    
162 |     # collect features for positive GMM training
163 |     first_song = True
164 |     for song in training_song_keys:
165 |         feats = songs_with_tag[song]['segments_timbre']            
166 |         
167 |         if first_song:
168 |             total_features = feats
169 |             first_song = False
170 |         else:
171 |             total_features = np.concatenate((total_features, feats))
172 |         
173 |     print "--- Collecting training features time:\t", time.time() - st, " ----- "
174 |     print "INFO: total features: ", total_features.shape
175 | 
176 |     # collect features for UBM training
177 |     st = time.time()
178 |     p = open("/disk1/home_user/egonina/asp/MSD/ubm_features_all.pkl", "rb")
179 |     total_ubm_features = np.array(pickle.load(p))
180 |     p.close()
181 | 
182 |     # train the UBM on 30% of the total features from all songs
183 |     training_ubm_features = np.array(rnd.sample(total_ubm_features, int(len(total_ubm_features)*0.3)))
184 | 
185 |     print "--- Collecting ubm features time:\t", time.time() - st, " -----"
186 |     print "INFO: total ubm features: ", total_ubm_features.shape, " 30%: ", training_ubm_features.shape
187 | 
188 |     # train UBM on features
189 |     D = total_ubm_features.shape[1]
190 |     ubm = GMM(M,D,cvtype='diag')
191 |     
192 |     train_st = time.time()
193 |     ubm.train(training_ubm_features)
194 |     train_total = time.time() - train_st
195 |     print "--- UBM training time:\t", train_total, " -----"
196 |     
197 |     # train positive GMM on features
198 |     D = total_features.shape[1]
199 |     gmm = GMM(M, D, means=np.array(ubm.components.means), covars=np.array(ubm.components.covars), weights=np.array(ubm.components.weights), cvtype='diag')
200 |     
201 |     train_st = time.time()
202 |     gmm.train(total_features)
203 |     train_total = time.time() - train_st
204 |     print "--- GMM training time:\t", train_total, " -----"
205 | 
206 |     print "--- Testing Labeled Examples ---"
207 | 
208 |     # testing the labeled test files
209 |     test_st = time.time()
210 |     labeled_songs = {}
211 |     unlabeled_songs = {}
212 | 
213 |     for test_song in testing_song_keys:
214 |         test_feats = songs_with_tag[test_song]['segments_timbre']
215 |         all_lklds = gmm.score(test_feats)
216 |         all_ubm_lklds = ubm.score(test_feats)
217 |         
218 |         avg_lkld = np.average(all_lklds)
219 |         avg_ubm_lkld = np.average(all_ubm_lklds)
220 |         sum_lkld = np.sum(all_lklds)
221 |     
222 |         labeled_songs[str(songs_with_tag[test_song]['artist_name']+ " - "+songs_with_tag[test_song]['title'])] = (avg_lkld, avg_ubm_lkld, avg_lkld - avg_ubm_lkld)
223 |     
224 |     print "--- Testing Unlabeled Examples ---"
225 |     test_st = time.time()
226 | 
227 |     count = 0
228 |     # testing the unlabeled test files
229 |     for test_song in negative_song_keys:
230 |         count+=1
231 |         print count
232 |         test_feats = songs_without_tag[test_song]['segments_timbre']
233 | 
234 |         all_lklds = gmm.score(test_feats)
235 |         all_ubm_lklds = ubm.score(test_feats)
236 |         avg_lkld = np.average(all_lklds)
237 |         avg_ubm_lkld = np.average(all_ubm_lklds)
238 |         sum_lkld = np.sum(all_lklds)
239 |         
240 |         unlabeled_songs[str(songs_without_tag[test_song]['artist_name'] + " - " + songs_without_tag[test_song]['title'])] = (avg_lkld, avg_ubm_lkld, avg_lkld - avg_ubm_lkld)
241 | 
242 |     test_total = time.time() - test_st
243 |     print "--- Total testing time:\t", test_total, " -----"
244 | 
245 |     #print out top 20 labeled suggestions and unlabeled recommendations
246 |     print "======================================================================"
247 |     print "=================== TOP 20 LABELED SAMPLES ==========================="
248 |     print "======================================================================"
249 |     sorted_lab_samples = sorted(labeled_songs.iteritems(), key=lambda k: k[1][2], reverse=True)
250 |     for p in range(20):
251 |         print sorted_lab_samples[p]
252 |         
253 |     print "======================================================================"
254 |     print "=================== TOP 20 UNLABELED SAMPLES ========================="
255 |     print "======================================================================"
256 |     sorted_unlab_samples = sorted(unlabeled_songs.iteritems(), key=lambda k: k[1][2], reverse=True)
257 |     for p in range(20):
258 |         print sorted_unlab_samples[p]
259 | 
260 | 
261 |     print "-------------- DONE ---------------"
262 |     print "--- Total time: ", time.time() - total_start_time, " ---"
263 |     print "-----------------------------------"
264 | 
265 |         
266 | 
267 | 


--------------------------------------------------------------------------------
/gmm_specializer/__init__.py:
--------------------------------------------------------------------------------
1 | # From http://stackoverflow.com/questions/458550/standard-way-to-embed-version-into-python-package
2 | # Author: James Antill (http://stackoverflow.com/users/10314/james-antill)
3 | __version__ = '0.2'
4 | __version_info__ = tuple([ int(num) for num in __version__.split('.')])
5 | 


--------------------------------------------------------------------------------
/run_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | PYTHONPATH=../../:$PYTHONPATH
 4 | 
 5 | echo PYTHONPATH
 6 | echo ${PYTHONPATH}
 7 | 
 8 | if [ -z "${PYTHON}" ]
 9 | then
10 |     PYTHON=python
11 | fi
12 | if [ -z "${PYTHONARGS}" ]
13 | then
14 |     PYTHONARGS=
15 | fi
16 | 
17 | PYTHONPATH=`pwd`:${PYTHONPATH} ${PYTHON} ${PYTHONARGS} tests/gmm_test.py
18 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Based on codepy's setup.py (see http://mathema.tician.de/software/codepy)
 4 | 
 5 | import distribute_setup
 6 | import gmm_specializer
 7 | distribute_setup.use_setuptools()
 8 | 
 9 | from setuptools import setup
10 | import glob
11 | 
12 | setup(name="gmm_specializer",
13 |       version=gmm_specializer.__version__,
14 |       description="This is a SEJITS (selective embedded just-in-time specializer) for Gaussian Mixture Models, built on the ASP framework.",
15 |       long_description="""
16 |       See http://www.armandofox.com/geek/home/sejits/ for more about SEJITS, including links to
17 |       publications. See http://github.com/hcook/gmm/wiki for more about the GMM specializer.
18 |       """,
19 |       classifiers=[
20 |         'Development Status :: 2 - Pre-Alpha',
21 |         'Intended Audience :: Developers',
22 |         'Intended Audience :: Other Audience',
23 |         'Intended Audience :: Science/Research',
24 |         'License :: OSI Approved :: BSD License',
25 |         'Natural Language :: English',
26 |         'Programming Language :: Python',
27 |         'Topic :: Scientific/Engineering',
28 |         'Topic :: Software Development :: Libraries',
29 |         'Topic :: Utilities',
30 |         ],
31 | 
32 |       author=u"Henry Cook, Katya Gonina, Shoaib Kamil",
33 |       url="http://github.com/hcook/gmm/wiki/",
34 |       author_email="egonina@cs.berkeley.edu",
35 |       license = "BSD",
36 | 
37 |       packages=["gmm_specializer"],
38 |       install_requires=[
39 |         "asp",
40 |         "scikit_learn"
41 |           ],
42 |      )
43 | 
44 | 


--------------------------------------------------------------------------------
/templates/em_base_helper_funcs.mako:
--------------------------------------------------------------------------------
  1 | #define PI  3.1415926535897931
  2 | #define COVARIANCE_DYNAMIC_RANGE 1E6
  3 | #define MINVALUEFORMINUSLOG -1000.0
  4 | 
  5 | void print_evals(float* component_memberships, float* loglikelihoods, int num_events, int num_components){
  6 |   for(int m = 0; m < num_components; m++){
  7 |     for(int e = 0; e < num_events; e++)
  8 |         printf("%0.8f ", component_memberships[m*num_events+e]);
  9 |     printf("\n");
 10 |   }
 11 |   for(int e = 0; e < num_events; e++)
 12 |     printf("%0.8f ", loglikelihoods[e]);
 13 |   printf("\n");
 14 | }
 15 | 
 16 | void print_components(components_t * components, int num_components, int num_dimensions){
 17 |   printf("===============\n");
 18 |   for(int m = 0; m < num_components; m++){
 19 |        printf("%0.8f ", components->N[m]);
 20 |   } printf("\n");
 21 |   for(int m = 0; m < num_components; m++){
 22 |        printf("%0.8f ", components->pi[m]);
 23 |   } printf("\n");
 24 |   for(int m = 0; m < num_components; m++){
 25 |        printf("%0.8f ", components->CP[m]);
 26 |   } printf("\n");
 27 |   for(int m = 0; m < num_components; m++){
 28 |        printf("%0.8f ", components->constant[m]);
 29 |   } printf("\n");
 30 |   for(int m = 0; m < num_components; m++){
 31 |        printf("%0.8f ", components->avgvar[m]);
 32 |   } printf("\n");
 33 |   for(int m = 0; m < num_components; m++){
 34 |     for(int d = 0; d < num_dimensions; d++)
 35 |         printf("%0.8f ", components->means[m*num_dimensions+d]);
 36 |     printf("\n");
 37 |   }
 38 |     for(int m = 0; m < num_components; m++){
 39 |         for(int d = 0; d < num_dimensions; d++)
 40 |             for(int d2 = 0; d2 < num_dimensions; d2++)
 41 |                 printf("%0.8f ",
 42 |                 components->R[m*num_dimensions*num_dimensions+d*num_dimensions+d2]);
 43 |         printf("\n");
 44 |     }
 45 | 
 46 |     for(int m = 0; m < num_components; m++){
 47 |         for(int d = 0; d < num_dimensions; d++)
 48 |             for(int d2 = 0; d2 < num_dimensions; d2++)
 49 |                 printf("%0.8f ",
 50 |                 components->Rinv[m*num_dimensions*num_dimensions+d*num_dimensions+d2]);
 51 |         printf("\n");
 52 |     }
 53 |   printf("===============\n");
 54 | }
 55 | 
 56 | typedef struct return_component_container
 57 | {
 58 |   boost::python::object component;
 59 |   float distance;
 60 | } ret_c_con_t;
 61 | 
 62 | ret_c_con_t ret;
 63 | 
 64 | void mvtmeans(float* data_by_event, int num_dimensions, int num_events, float* means) {
 65 |     for(int d=0; d < num_dimensions; d++) {
 66 |         means[d] = 0.0;
 67 |         for(int n=0; n < num_events; n++) {
 68 |             means[d] += data_by_event[n*num_dimensions+d];
 69 |         }
 70 |         means[d] /= (float) num_events;
 71 |     }
 72 | }
 73 | 
 74 | float log_add(float log_a, float log_b) {
 75 |   if(log_a < log_b) {
 76 |       float tmp = log_a;
 77 |       log_a = log_b;
 78 |       log_b = tmp;
 79 |     }
 80 |   //setting MIN...LOG so small, I don't even need to look
 81 |   return (((log_b - log_a) <= MINVALUEFORMINUSLOG) ? log_a : 
 82 |                 log_a + (float)(logf(1.0 + (double)(expf((double)(log_b - log_a))))));
 83 | }
 84 | 
 85 | void normalize_pi(components_t* components, int num_components) {
 86 |     float total = 0;
 87 |     for(int i=0; i<num_components; i++) {
 88 |         total += components->pi[i];
 89 |     }
 90 |     
 91 |     for(int m=0; m < num_components; m++){
 92 |         components->pi[m] /= total; 
 93 |     }
 94 | }
 95 | //=== Data structure pointers ===
 96 | 
 97 | //CPU copies of events
 98 | float *fcs_data_by_event;
 99 | float *fcs_data_by_dimension;
100 | 
101 | // index list for train_on_subset
102 | int* index_list;
103 | 
104 | 
105 | //CPU copies of components
106 | components_t components;
107 | components_t saved_components;
108 | components_t** scratch_component_arr; // for computing distances and merging
109 | static int num_scratch_components = 0;
110 | 
111 | //CPU copies of eval data
112 | float *component_memberships;
113 | float *loglikelihoods;
114 | 
115 | //=== AHC function prototypes ===
116 | void copy_component(components_t *dest, int c_dest, components_t *src, int c_src, int num_dimensions);
117 | void add_components(components_t *components, int c1, int c2, components_t *temp_component, int num_dimensions);
118 | float component_distance(components_t *components, int c1, int c2, components_t *temp_component, int num_dimensions);
119 | 
120 | //=== Helper function prototypes ===
121 | void writeCluster(FILE* f, components_t components, int c,  int num_dimensions);
122 | void printCluster(components_t components, int c, int num_dimensions);
123 | void invert_cpu(float* data, int actualsize, float* log_determinant);
124 | int  invert_matrix(float* a, int n, float* determinant);
125 | 
126 | //============ LUTLOG ==============
127 | 
128 | float *LOOKUP_TABLE;
129 | int N_LOOKUP_SIZE = 12;
130 | 
131 | void do_table(int n,float *lookup_table)
132 | {
133 |   float numlog;
134 |   int *const exp_ptr = ((int*)&numlog);
135 |   int x = *exp_ptr;
136 |   x = 0x00000000;
137 |   x += 127 << 23;
138 |   *exp_ptr = x;
139 |   for(int i=0;i<pow((double) 2,(double) n);i++)
140 |     {
141 |       lookup_table[i]=log2(numlog);
142 |       x+=1 << (23-n);
143 |       *exp_ptr = x;
144 |     }
145 | }
146 | 
147 | void create_lut_log_table() {
148 | 
149 |   unsigned int tablesize = (unsigned int)pow(2.0, 12);
150 |   LOOKUP_TABLE = (float*) malloc(tablesize*sizeof(float));
151 |   do_table(N_LOOKUP_SIZE,LOOKUP_TABLE);
152 | 
153 | }
154 | 
155 | //================ END LUTLOG ============
156 | 
157 | 
158 | //=== Memory Alloc/Free Functions ===
159 | 
160 | components_t* alloc_temp_component_on_CPU(int num_dimensions) {
161 | 
162 |   components_t* scratch_component = (components_t*)malloc(sizeof(components_t));
163 | 
164 |   scratch_component->N = (float*) malloc(sizeof(float));
165 |   scratch_component->pi = (float*) malloc(sizeof(float));
166 |   scratch_component->CP = (float*) malloc(sizeof(float));
167 |   scratch_component->constant = (float*) malloc(sizeof(float));
168 |   scratch_component->avgvar = (float*) malloc(sizeof(float));
169 |   scratch_component->means = (float*) malloc(sizeof(float)*num_dimensions);
170 |   scratch_component->R = (float*) malloc(sizeof(float)*num_dimensions*num_dimensions);
171 |   scratch_component->Rinv = (float*) malloc(sizeof(float)*num_dimensions*num_dimensions);
172 | 
173 |   return scratch_component;
174 | }
175 | 
176 | void dealloc_temp_components_on_CPU() {
177 |   printf("dealloc tempcomponents on CPU\n");
178 |   for(int i = 0; i<num_scratch_components; i++) {
179 |     free(scratch_component_arr[i]->N);
180 |     free(scratch_component_arr[i]->pi);
181 |     free(scratch_component_arr[i]->CP);
182 |     free(scratch_component_arr[i]->constant);
183 |     free(scratch_component_arr[i]->avgvar);
184 |     free(scratch_component_arr[i]->means);
185 |     free(scratch_component_arr[i]->R);
186 |     free(scratch_component_arr[i]->Rinv);
187 |   }
188 |   num_scratch_components = 0;
189 | 
190 |   return;
191 | }
192 | 
193 | 
194 | // ================== Event data allocation on CPU  ================= :
195 | void alloc_events_on_CPU(PyObject *input_data) {
196 | 
197 |   fcs_data_by_event = ((float*)PyArray_DATA(input_data));
198 |   int num_events = PyArray_DIM(input_data,0);
199 |   int num_dimensions = PyArray_DIM(input_data,1);
200 |   // Transpose the event data (allows coalesced access pattern in E-step kernel)
201 |   // This has consecutive values being from the same dimension of the data
202 |   // (num_dimensions by num_events matrix)
203 |   fcs_data_by_dimension  = (float*) malloc(sizeof(float)*num_events*num_dimensions);
204 | 
205 |   for(int e=0; e<num_events; e++) {
206 |     for(int d=0; d<num_dimensions; d++) {
207 |       fcs_data_by_dimension[d*num_events+e] = fcs_data_by_event[e*num_dimensions+d];
208 |     }
209 |   }
210 | }
211 | 
212 | void alloc_index_list_on_CPU(PyObject *input_index_list) {
213 |   index_list =  ((int*)PyArray_DATA(input_index_list));
214 | }
215 | 
216 | void alloc_events_from_index_on_CPU(PyObject *input_data, PyObject *indices, int num_indices, int num_dimensions) {
217 | 
218 |   fcs_data_by_event = (float*)malloc(num_indices*num_dimensions*sizeof(int));
219 |   for(int i = 0; i<num_indices; i++) {
220 |     for(int d = 0; d<num_dimensions; d++) {
221 |       fcs_data_by_event[i*num_dimensions+d] = ((float*)PyArray_DATA(input_data))[((int*)PyArray_DATA(indices))[i]*num_dimensions+d];
222 |     }
223 |   }
224 | 
225 |   fcs_data_by_dimension = (float*)malloc(num_indices*num_dimensions*sizeof(int));
226 |   for(int e=0; e<num_indices; e++) {
227 |     for(int d = 0; d<num_dimensions; d++) {
228 |       fcs_data_by_dimension[d*num_indices+e] = fcs_data_by_event[e*num_dimensions+d];
229 |       //printf("data: %f\n", fcs_data_by_dimension[d*num_indices+e]);
230 |     }
231 |   }
232 | }
233 | 
234 | // ================== Cluster data allocation on CPU  ================= :
235 | 
236 | void alloc_components_on_CPU(int M, int D, PyObject *weights, PyObject *means, PyObject *covars, PyObject *comp_probs) {
237 |   components.pi = ((float*)PyArray_DATA(weights));
238 |   components.means = ((float*)PyArray_DATA(means));
239 |   components.R = ((float*)PyArray_DATA(covars));
240 |   components.CP = ((float*)PyArray_DATA(comp_probs));
241 | 
242 |   components.N = (float*) malloc(sizeof(float)*M);
243 |   components.constant = (float*) malloc(sizeof(float)*M);
244 |   components.avgvar = (float*) malloc(sizeof(float)*M);
245 |   components.Rinv = (float*) malloc(sizeof(float)*M*D*D);
246 | }  
247 | 
248 | //Hacky way to make sure the CPU pointers are aimed at the right component data
249 | void relink_components_on_CPU(PyObject *weights, PyObject *means, PyObject *covars) {
250 |   components.pi = ((float*)PyArray_DATA(weights));
251 |   components.means = ((float*)PyArray_DATA(means));
252 |   components.R = ((float*)PyArray_DATA(covars));
253 | }
254 | 
255 | // ================= Eval data alloc on CPU =============== 
256 | 
257 | void alloc_evals_on_CPU(PyObject *component_mem_np_arr, PyObject *loglikelihoods_np_arr){
258 |   component_memberships = ((float*)PyArray_DATA(component_mem_np_arr));
259 |   loglikelihoods = ((float*)PyArray_DATA(loglikelihoods_np_arr));
260 | }
261 | 
262 | // ================== Event data dellocation on CPU  ================= :
263 | void dealloc_events_on_CPU() {
264 |   //free(fcs_data_by_event);
265 |   free(fcs_data_by_dimension);
266 |   return;
267 | }
268 | 
269 | // ================== Index list dellocation on CPU  ================= :
270 | void dealloc_index_list_on_CPU() {
271 |   free(index_list);
272 |   return;
273 | }
274 | 
275 | // ==================== Cluster data deallocation on CPU =================  
276 | void dealloc_components_on_CPU() {
277 | 
278 |   //free(components.pi);
279 |   //free(components.means);
280 |   //free(components.R);
281 |   //free(components.CP);
282 |   free(components.N);
283 |   free(components.constant);
284 |   free(components.avgvar);
285 |   free(components.Rinv);
286 |   return;
287 | }
288 | 
289 | // ==================== Eval data deallocation on CPU =================  
290 | void dealloc_evals_on_CPU() {
291 |   //free(component_memberships);
292 |   //free(loglikelihoods);
293 |   return;
294 | }
295 | 
296 | 
297 | // ==== Accessor functions for pi, means, covars ====
298 | 
299 | PyObject *get_temp_component_pi(components_t* c){
300 |   npy_intp dims[1] = {1};
301 |   return PyArray_SimpleNewFromData(1, dims, NPY_FLOAT32, c->pi);
302 | }
303 | 
304 | PyObject *get_temp_component_means(components_t* c, int D){
305 |   npy_intp dims[1] = {D};
306 |   return PyArray_SimpleNewFromData(1, dims, NPY_FLOAT32, c->means);
307 | }
308 | 
309 | PyObject *get_temp_component_covars(components_t* c, int D){
310 |   npy_intp dims[2] = {D, D};
311 |   return PyArray_SimpleNewFromData(1, dims, NPY_FLOAT32, c->R);
312 | }
313 | 
314 | //------------------------- AHC FUNCTIONS ----------------------------
315 | 
316 | //============ KL DISTANCE FUNCTIONS =============
317 | inline float lut_log (float val, float *lookup_table, int n)
318 | {
319 |   int *const     exp_ptr = ((int*)&val);
320 |   int            x = *exp_ptr;
321 |   const int      log_2 = ((x >> 23) & 255) - 127;
322 |   x &= 0x7FFFFF;
323 |   x = x >> (23-n);
324 |   val=lookup_table[x];
325 |   // printf("log2:%f\n", log_2);
326 |   return ((val + log_2)* 0.69314718);
327 | 
328 | }
329 | 
330 | // sequentuially add logarithms
331 | float Log_Add(float log_a, float log_b)
332 | {
333 |   float result;
334 |   if(log_a < log_b)
335 |     {
336 |       float tmp = log_a;
337 |       log_a = log_b;
338 |       log_b = tmp;
339 |     }
340 |   //setting MIN...LOG so small, I don't even need to look
341 |   if((log_b - log_a) <= MINVALUEFORMINUSLOG)
342 |     {
343 |       return log_a;
344 |     }
345 |   else
346 |     {
347 |       result = log_a + (float)(lut_log(1.0 + (double)(exp((double)(log_b - log_a))),LOOKUP_TABLE,N_LOOKUP_SIZE));
348 |     }
349 |   return result;
350 | }
351 | 
352 | double Log_Likelihood(int DIM, int m, float *feature, float *means, float *covars, float CP)
353 | {
354 |   //float log_lkld;
355 |   //float in_the_exp = 0.0, den = 0.0;
356 |   double x,y=0,z;
357 |   for(int i=0; i<DIM; i++)
358 |     {
359 |       x = feature[i]-means[DIM*m + i];
360 |       z = covars[m*DIM*DIM + i*DIM+i];
361 |       y += x*x/z;//+lut_log(2*3.141592654*z,LOOKUP_TABLE,N_LOOKUP_SIZE); LINE MODIFIED
362 |       // printf("y = %f, feature[%d]  = %f, mean[%d] = %f \n", y, i, feature[i], i, means[i*m+i], m*DIM*DIM+i*DIM+i, cov\
363 |       ars[m*DIM*DIM + i*DIM+i]);
364 | }
365 | //printf("y  = %f, CP  = %f\n", y, CP);
366 | return((double)-0.5*(y+CP)); //LINE MODIFIED
367 | }
368 | 
369 | 
370 | float Log_Likelihood_KL(float *feature, int DIM, int gmm_M, float *gmm_weights, float *gmm_means, float *gmm_covars, float *gmm_CP)
371 | {
372 | 
373 |   //float res = 0.0;
374 |   float log_lkld= MINVALUEFORMINUSLOG ,aux;
375 |   for(int i=0;i<gmm_M;i++)
376 |     {
377 |       // if(gmm_weights[i])
378 |       // {
379 |       aux = lut_log(gmm_weights[i],LOOKUP_TABLE,N_LOOKUP_SIZE) + Log_Likelihood(DIM, i, feature, gmm_means, gmm_covars, gmm_CP[i]);
380 | 
381 | 
382 |       if(isnan(aux) || !finite(aux))
383 |         {
384 |           aux = MINVALUEFORMINUSLOG;
385 |         }
386 |       log_lkld = Log_Add(log_lkld, aux);
387 |       //}
388 |     }//for
389 |   return log_lkld;
390 | }
391 | 
392 | 
393 | float compute_KL_distance(int DIM, int gmm1_M, int gmm2_M, PyObject *gmm1_weights_in, PyObject *gmm1_means_in, PyObject *gmm1_covars_in, PyObject *gmm1_CP_in, PyObject *gmm2_weights_in, PyObject *gmm2_means_in, PyObject *gmm2_covars_in, PyObject *gmm2_CP_in) {
394 | 
395 |   float aux;
396 |   float log_g1,log_f1,log_g2,log_f2,f_log_g=0,f_log_f=0,g_log_f=0,g_log_g=0;
397 |   float *point_a = new float[DIM];
398 |   float *point_b = new float[DIM];
399 | 
400 |   float *gmm1_weights = ((float*)PyArray_DATA(gmm1_weights_in));
401 |   float *gmm1_means = ((float*)PyArray_DATA(gmm1_means_in));
402 |   float *gmm1_covars = ((float*)PyArray_DATA(gmm1_covars_in));
403 |   float *gmm1_CP = ((float*)PyArray_DATA(gmm1_CP_in));
404 |   float *gmm2_weights = ((float*)PyArray_DATA(gmm2_weights_in));
405 |   float *gmm2_means = ((float*)PyArray_DATA(gmm2_means_in));
406 |   float *gmm2_covars = ((float*)PyArray_DATA(gmm2_covars_in));
407 |   float *gmm2_CP = ((float*)PyArray_DATA(gmm2_CP_in));
408 | 
409 |   for(int i=0;i<gmm1_M;i++)
410 |     {
411 |       log_g1=0;
412 |       log_f1=0;
413 |       for(int k=0;k<DIM;k++)
414 |         {
415 |           //Compute the two points
416 |           for(int j=0;j<DIM;j++)
417 |             {
418 |               if(j==k){
419 |                 aux = sqrt(19.0)*sqrt(gmm1_covars[i*DIM*DIM + k*DIM+k]);
420 |                 point_a[j] = gmm1_means[i*DIM+j] + aux;
421 |                 point_b[j] = gmm1_means[i*DIM+j] - aux;
422 |               }
423 |               else{
424 |                 point_a[j] = gmm1_means[i*DIM+j];
425 |                 point_b[j] = gmm1_means[i*DIM+j];
426 |               }
427 |             }
428 |           log_g1+=Log_Likelihood_KL(point_a, DIM, gmm2_M, gmm2_weights, gmm2_means, gmm2_covars, gmm2_CP)+Log_Likelihood_KL(point_b, DIM, gmm2_M, gmm2_weights, gmm2_means, gmm2_covars, gmm2_CP);
429 |           log_f1+=Log_Likelihood_KL(point_a, DIM, gmm1_M, gmm1_weights, gmm1_means, gmm1_covars, gmm1_CP)+Log_Likelihood_KL(point_b, DIM, gmm1_M, gmm1_weights, gmm1_means, gmm1_covars, gmm1_CP);
430 |         }
431 | 
432 |       f_log_g+=gmm1_weights[i]*log_g1;
433 |       f_log_f+=gmm1_weights[i]*log_f1;
434 | 
435 |     }
436 |   for(int i=0;i<gmm2_M;i++)
437 |     {
438 |       log_g2=0;
439 |       log_f2=0;
440 |       for(int k=0;k<DIM;k++)
441 |         {
442 |           for(int j=0;j<DIM;j++)
443 |             {
444 |               if(j==k){
445 |                 aux = sqrt(19.0)*sqrt(gmm2_covars[i*DIM*DIM + k*DIM+k]);
446 |                 point_a[j] = gmm2_means[i*DIM+j] + aux;
447 |                 point_b[j] = gmm2_means[i*DIM+j] - aux;
448 |               }
449 |               else{
450 |                 point_a[j] = gmm2_means[i*DIM+j];
451 |                 point_b[j] = gmm2_means[i*DIM+j];
452 |               }
453 |             }
454 | 
455 |           log_g2+=Log_Likelihood_KL(point_a, DIM, gmm2_M, gmm2_weights, gmm2_means, gmm2_covars, gmm2_CP)+Log_Likelihood_KL(point_b, DIM, gmm2_M, gmm2_weights, gmm2_means, gmm2_covars, gmm2_CP);
456 |           log_f2+=Log_Likelihood_KL(point_a, DIM, gmm1_M, gmm1_weights, gmm1_means, gmm1_covars, gmm1_CP)+Log_Likelihood_KL(point_b, DIM, gmm1_M, gmm1_weights, gmm1_means, gmm1_covars, gmm1_CP);
457 |         }
458 |       g_log_g+=gmm2_weights[i]*log_g2;
459 |       g_log_f+=gmm2_weights[i]*log_f2;
460 | 
461 |     }
462 |   delete [] point_a;
463 |   delete [] point_b;
464 |   return 1.0/(2.0*DIM)*(f_log_f + g_log_g - f_log_g - g_log_f);
465 | }
466 | 
467 | 
468 | int compute_distance_rissanen(int c1, int c2, int num_dimensions) {
469 |   // compute distance function between the 2 components
470 | 
471 |   components_t *new_component = alloc_temp_component_on_CPU(num_dimensions);
472 | 
473 |   float distance = component_distance(&components,c1,c2,new_component,num_dimensions);
474 |   //printf("distance %d-%d: %f\n", c1, c2, distance);
475 | 
476 |   scratch_component_arr[num_scratch_components] = new_component;
477 |   num_scratch_components++;
478 |   
479 |   ret.component = boost::python::object(boost::python::ptr(new_component));
480 |   ret.distance = distance;
481 | 
482 |   return 0;
483 | 
484 | }
485 | 
486 | void merge_components(int min_c1, int min_c2, components_t *min_component, int num_components, int num_dimensions) {
487 | 
488 |   // Copy new combined component into the main group of components, compact them
489 |   copy_component(&components,min_c1, min_component,0,num_dimensions);
490 | 
491 |   for(int i=min_c2; i < num_components-1; i++) {
492 |   
493 |     copy_component(&components,i,&components,i+1,num_dimensions);
494 |   }
495 | }
496 | 
497 | 
498 | float component_distance(components_t *components, int c1, int c2, components_t *temp_component, int num_dimensions) {
499 |   // Add the components together, this updates pi,means,R,N and stores in temp_component
500 | 
501 |   add_components(components,c1,c2,temp_component,num_dimensions);
502 |   //printf("%f, %f, %f, %f, %f, %f\n", components->N[c1], components->constant[c1], components->N[c2], components->constant[c2], temp_component->N[0], temp_component->constant[0]);
503 |   return components->N[c1]*components->constant[c1] + components->N[c2]*components->constant[c2] - temp_component->N[0]*temp_component->constant[0];
504 |   
505 | }
506 | 
507 | void add_components(components_t *components, int c1, int c2, components_t *temp_component, int num_dimensions) {
508 |   float wt1,wt2;
509 |  
510 |   wt1 = (components->N[c1]) / (components->N[c1] + components->N[c2]);
511 |   wt2 = 1.0f - wt1;
512 |     
513 |   // Compute new weighted means
514 |   for(int i=0; i<num_dimensions;i++) {
515 |     temp_component->means[i] = wt1*components->means[c1*num_dimensions+i] + wt2*components->means[c2*num_dimensions+i];
516 |   }
517 |     
518 |   // Compute new weighted covariance
519 |   for(int i=0; i<num_dimensions; i++) {
520 |     for(int j=i; j<num_dimensions; j++) {
521 |       // Compute R contribution from component1
522 |       temp_component->R[i*num_dimensions+j] = ((temp_component->means[i]-components->means[c1*num_dimensions+i])
523 |                                              *(temp_component->means[j]-components->means[c1*num_dimensions+j])
524 |                                              +components->R[c1*num_dimensions*num_dimensions+i*num_dimensions+j])*wt1;
525 |       // Add R contribution from component2
526 |       temp_component->R[i*num_dimensions+j] += ((temp_component->means[i]-components->means[c2*num_dimensions+i])
527 |                                               *(temp_component->means[j]-components->means[c2*num_dimensions+j])
528 |                                               +components->R[c2*num_dimensions*num_dimensions+i*num_dimensions+j])*wt2;
529 |       // Because its symmetric...
530 |       temp_component->R[j*num_dimensions+i] = temp_component->R[i*num_dimensions+j];
531 |     }
532 |   }
533 |     
534 |   // Compute pi
535 |   temp_component->pi[0] = components->pi[c1] + components->pi[c2];
536 |     
537 |   // compute N
538 |   temp_component->N[0] = components->N[c1] + components->N[c2];
539 | 
540 |   float log_determinant;
541 |   // Copy R to Rinv matrix
542 |   memcpy(temp_component->Rinv,temp_component->R,sizeof(float)*num_dimensions*num_dimensions);
543 |   // Invert the matrix
544 |   invert_cpu(temp_component->Rinv,num_dimensions,&log_determinant);
545 |   // Compute the constant
546 |   temp_component->constant[0] = (-num_dimensions)*0.5*logf(2*PI)-0.5*log_determinant;
547 |     
548 |   // avgvar same for all components
549 |   temp_component->avgvar[0] = components->avgvar[0];
550 | }
551 | 
552 | void copy_component(components_t *dest, int c_dest, components_t *src, int c_src, int num_dimensions) {
553 |   dest->N[c_dest] = src->N[c_src];
554 |   dest->pi[c_dest] = src->pi[c_src];
555 |   dest->constant[c_dest] = src->constant[c_src];
556 |   dest->avgvar[c_dest] = src->avgvar[c_src];
557 |   memcpy(&(dest->means[c_dest*num_dimensions]),&(src->means[c_src*num_dimensions]),sizeof(float)*num_dimensions);
558 |   memcpy(&(dest->R[c_dest*num_dimensions*num_dimensions]),&(src->R[c_src*num_dimensions*num_dimensions]),sizeof(float)*num_dimensions*num_dimensions);
559 |   memcpy(&(dest->Rinv[c_dest*num_dimensions*num_dimensions]),&(src->Rinv[c_src*num_dimensions*num_dimensions]),sizeof(float)*num_dimensions*num_dimensions);
560 |   // do we need to copy memberships?
561 | }
562 | //---------------- END AHC FUNCTIONS ----------------
563 | 
564 | 
565 | void writeCluster(FILE* f, components_t components, int c, int num_dimensions) {
566 |   fprintf(f,"Probability: %f\n", components.pi[c]);
567 |   fprintf(f,"N: %f\n",components.N[c]);
568 |   fprintf(f,"Means: ");
569 |   for(int i=0; i<num_dimensions; i++){
570 |     fprintf(f,"%.3f ",components.means[c*num_dimensions+i]);
571 |   }
572 |   fprintf(f,"\n");
573 | 
574 |   fprintf(f,"\nR Matrix:\n");
575 |   for(int i=0; i<num_dimensions; i++) {
576 |     for(int j=0; j<num_dimensions; j++) {
577 |       fprintf(f,"%.3f ", components.R[c*num_dimensions*num_dimensions+i*num_dimensions+j]);
578 |     }
579 |     fprintf(f,"\n");
580 |   }
581 |   fflush(f);   
582 |   /*
583 |     fprintf(f,"\nR-inverse Matrix:\n");
584 |     for(int i=0; i<num_dimensions; i++) {
585 |     for(int j=0; j<num_dimensions; j++) {
586 |     fprintf(f,"%.3f ", c->Rinv[i*num_dimensions+j]);
587 |     }
588 |     fprintf(f,"\n");
589 |     } 
590 |   */
591 | }
592 | 
593 | void printCluster(components_t components, int c, int num_dimensions) {
594 |   writeCluster(stdout,components,c,num_dimensions);
595 | }
596 | 
597 | static int 
598 | ludcmp(float *a,int n,int *indx,float *d);
599 | 
600 | static void 
601 | lubksb(float *a,int n,int *indx,float *b);
602 | 
603 | /*
604 |  * Inverts a square matrix (stored as a 1D float array)
605 |  * 
606 |  * actualsize - the dimension of the matrix
607 |  *
608 |  * written by Mike Dinolfo 12/98
609 |  * version 1.0
610 |  */
611 | void invert_cpu(float* data, int actualsize, float* log_determinant)  {
612 |   int maxsize = actualsize;
613 |   int n = actualsize;
614 |   *log_determinant = 0.0;
615 | 
616 |   if (actualsize == 1) { // special case, dimensionality == 1
617 |     *log_determinant = logf(data[0]);
618 |     data[0] = 1.0 / data[0];
619 |   } else if(actualsize >= 2) { // dimensionality >= 2
620 |     for (int i=1; i < actualsize; i++) data[i] /= data[0]; // normalize row 0
621 |     for (int i=1; i < actualsize; i++)  { 
622 |       for (int j=i; j < actualsize; j++)  { // do a column of L
623 |         float sum = 0.0;
624 |         for (int k = 0; k < i; k++)  
625 |           sum += data[j*maxsize+k] * data[k*maxsize+i];
626 |         data[j*maxsize+i] -= sum;
627 |       }
628 |       if (i == actualsize-1) continue;
629 |       for (int j=i+1; j < actualsize; j++)  {  // do a row of U
630 |         float sum = 0.0;
631 |         for (int k = 0; k < i; k++)
632 |           sum += data[i*maxsize+k]*data[k*maxsize+j];
633 |         data[i*maxsize+j] = 
634 |           (data[i*maxsize+j]-sum) / data[i*maxsize+i];
635 |       }
636 |     }
637 | 
638 |     for(int i=0; i<actualsize; i++) {
639 |       *log_determinant += logf(fabs(data[i*n+i]));
640 |       //printf("log_determinant: %e\n",*log_determinant); 
641 |     }
642 |     //printf("\n\n");
643 |     for ( int i = 0; i < actualsize; i++ )  // invert L
644 |       for ( int j = i; j < actualsize; j++ )  {
645 |         float x = 1.0;
646 |         if ( i != j ) {
647 |           x = 0.0;
648 |           for ( int k = i; k < j; k++ ) 
649 |             x -= data[j*maxsize+k]*data[k*maxsize+i];
650 |         }
651 |         data[j*maxsize+i] = x / data[j*maxsize+j];
652 |       }
653 |     for ( int i = 0; i < actualsize; i++ )   // invert U
654 |       for ( int j = i; j < actualsize; j++ )  {
655 |         if ( i == j ) continue;
656 |         float sum = 0.0;
657 |         for ( int k = i; k < j; k++ )
658 |           sum += data[k*maxsize+j]*( (i==k) ? 1.0 : data[i*maxsize+k] );
659 |         data[i*maxsize+j] = -sum;
660 |       }
661 |     for ( int i = 0; i < actualsize; i++ )   // final inversion
662 |       for ( int j = 0; j < actualsize; j++ )  {
663 |         float sum = 0.0;
664 |         for ( int k = ((i>j)?i:j); k < actualsize; k++ )  
665 |           sum += ((j==k)?1.0:data[j*maxsize+k])*data[k*maxsize+i];
666 |         data[j*maxsize+i] = sum;
667 |       }
668 |   } else {
669 |     printf("Error: Invalid dimensionality for invert(...)\n");
670 |   }
671 | }
672 | 
673 | 
674 | /*
675 |  * Another matrix inversion function
676 |  * This was modified from the 'component' application by Charles A. Bouman
677 |  */
678 | int invert_matrix(float* a, int n, float* determinant) {
679 |   int  i,j,f,g;
680 |    
681 |   float* y = (float*) malloc(sizeof(float)*n*n);
682 |   float* col = (float*) malloc(sizeof(float)*n);
683 |   int* indx = (int*) malloc(sizeof(int)*n);
684 |   /*
685 |     printf("\n\nR matrix before LU decomposition:\n");
686 |     for(i=0; i<n; i++) {
687 |     for(j=0; j<n; j++) {
688 |     printf("%.2f ",a[i*n+j]);
689 |     }
690 |     printf("\n");
691 |     }*/
692 | 
693 |   *determinant = 0.0;
694 |   if(ludcmp(a,n,indx,determinant)) {
695 |     printf("Determinant mantissa after LU decomposition: %f\n",*determinant);
696 |     printf("\n\nR matrix after LU decomposition:\n");
697 |     for(i=0; i<n; i++) {
698 |       for(j=0; j<n; j++) {
699 |         printf("%.2f ",a[i*n+j]);
700 |       }
701 |       printf("\n");
702 |     }
703 |        
704 |     for(j=0; j<n; j++) {
705 |       *determinant *= a[j*n+j];
706 |     }
707 |      
708 |     printf("determinant: %E\n",*determinant);
709 |      
710 |     for(j=0; j<n; j++) {
711 |       for(i=0; i<n; i++) col[i]=0.0;
712 |       col[j]=1.0;
713 |       lubksb(a,n,indx,col);
714 |       for(i=0; i<n; i++) y[i*n+j]=col[i];
715 |     }
716 | 
717 |     for(i=0; i<n; i++)
718 |       for(j=0; j<n; j++) a[i*n+j]=y[i*n+j];
719 |      
720 |     printf("\n\nMatrix at end of clust_invert function:\n");
721 |     for(f=0; f<n; f++) {
722 |       for(g=0; g<n; g++) {
723 |         printf("%.2f ",a[f*n+g]);
724 |       }
725 |       printf("\n");
726 |     }
727 |     free(y);
728 |     free(col);
729 |     free(indx);
730 |     return(1);
731 |   }
732 |   else {
733 |     *determinant = 0.0;
734 |     free(y);
735 |     free(col);
736 |     free(indx);
737 |     return(0);
738 |   }
739 | }
740 | 
741 | #define TINY 1.0e-20
742 | 
743 | static int
744 | ludcmp(float *a,int n,int *indx,float *d)
745 | {
746 |   int i,imax=0,j,k;
747 |   float big,dum,sum,temp;
748 |   float *vv;
749 | 
750 |   vv= (float*) malloc(sizeof(float)*n);
751 |    
752 |   *d=1.0;
753 |    
754 |   for (i=0;i<n;i++)
755 |     {
756 |       big=0.0;
757 |       for (j=0;j<n;j++)
758 |         if ((temp=fabsf(a[i*n+j])) > big)
759 |           big=temp;
760 |       if (big == 0.0)
761 |         return 0; /* Singular matrix  */
762 |       vv[i]=1.0/big;
763 |     }
764 |        
765 |    
766 |   for (j=0;j<n;j++)
767 |     {  
768 |       for (i=0;i<j;i++)
769 |         {
770 |           sum=a[i*n+j];
771 |           for (k=0;k<i;k++)
772 |             sum -= a[i*n+k]*a[k*n+j];
773 |           a[i*n+j]=sum;
774 |         }
775 |        
776 |       /*
777 |         int f,g;
778 |         printf("\n\nMatrix After Step 1:\n");
779 |         for(f=0; f<n; f++) {
780 |         for(g=0; g<n; g++) {
781 |         printf("%.2f ",a[f*n+g]);
782 |         }
783 |         printf("\n");
784 |         }*/
785 |        
786 |       big=0.0;
787 |       dum=0.0;
788 |       for (i=j;i<n;i++)
789 |         {
790 |           sum=a[i*n+j];
791 |           for (k=0;k<j;k++)
792 |             sum -= a[i*n+k]*a[k*n+j];
793 |           a[i*n+j]=sum;
794 |           dum=vv[i]*fabsf(sum);
795 |           //printf("sum: %f, dum: %f, big: %f\n",sum,dum,big);
796 |           //printf("dum-big: %E\n",fabs(dum-big));
797 |           if ( (dum-big) >= 0.0 || fabs(dum-big) < 1e-3)
798 |             {
799 |               big=dum;
800 |               imax=i;
801 |               //printf("imax: %d\n",imax);
802 |             }
803 |         }
804 |        
805 |       if (j != imax)
806 |         {
807 |           for (k=0;k<n;k++)
808 |             {
809 |               dum=a[imax*n+k];
810 |               a[imax*n+k]=a[j*n+k];
811 |               a[j*n+k]=dum;
812 |             }
813 |           *d = -(*d);
814 |           vv[imax]=vv[j];
815 |         }
816 |       indx[j]=imax;
817 |        
818 |       /*
819 |         printf("\n\nMatrix after %dth iteration of LU decomposition:\n",j);
820 |         for(f=0; f<n; f++) {
821 |         for(g=0; g<n; g++) {
822 |         printf("%.2f ",a[f][g]);
823 |         }
824 |         printf("\n");
825 |         }
826 |         printf("imax: %d\n",imax);
827 |       */
828 | 
829 | 
830 |       /* Change made 3/27/98 for robustness */
831 |       if ( (a[j*n+j]>=0)&&(a[j*n+j]<TINY) ) a[j*n+j]= TINY;
832 |       if ( (a[j*n+j]<0)&&(a[j*n+j]>-TINY) ) a[j*n+j]= -TINY;
833 | 
834 |       if (j != n-1)
835 |         {
836 |           dum=1.0/(a[j*n+j]);
837 |           for (i=j+1;i<n;i++)
838 |             a[i*n+j] *= dum;
839 |         }
840 |     }
841 |   free(vv);
842 |   return(1);
843 | }
844 | 
845 | #undef TINY
846 | 
847 | static void
848 | lubksb(float *a,int n,int *indx,float *b)
849 | {
850 |   int i,ii,ip,j;
851 |   float sum;
852 | 
853 |   ii = -1;
854 |   for (i=0;i<n;i++)
855 |     {
856 |       ip=indx[i];
857 |       sum=b[ip];
858 |       b[ip]=b[i];
859 |       if (ii >= 0)
860 |         for (j=ii;j<i;j++)
861 |           sum -= a[i*n+j]*b[j];
862 |       else if (sum)
863 |         ii=i;
864 |       b[i]=sum;
865 |     }
866 |   for (i=n-1;i>=0;i--)
867 |     {
868 |       sum=b[i];
869 |       for (j=i+1;j<n;j++)
870 |         sum -= a[i*n+j]*b[j];
871 |       b[i]=sum/a[i*n+i];
872 |     }
873 | }
874 | 
875 | 


--------------------------------------------------------------------------------
/templates/em_cilk_eval.mako:
--------------------------------------------------------------------------------
 1 | void em_cilk_eval${'_'+'_'.join(param_val_list)} (
 2 |                              int num_components, 
 3 |                              int num_dimensions, 
 4 |                              int num_events) 
 5 | {
 6 |   //TODO: Is this necessary, or can we assume the values are still set?
 7 |   // Computes the R matrix inverses, and the gaussian constant
 8 |   constants${'_'+'_'.join(param_val_list)}(&components,num_components,num_dimensions);
 9 |   estep1${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events,loglikelihoods);
10 | }
11 | 


--------------------------------------------------------------------------------
/templates/em_cilk_helper_funcs.mako:
--------------------------------------------------------------------------------
  1 | #define PI  3.1415926535897931
  2 | #define COVARIANCE_DYNAMIC_RANGE 1E6
  3 | #define MINVALUEFORMINUSLOG -1000.0f
  4 | 
  5 | void mvtmeans(float* data_by_event, int num_dimensions, int num_events, float* means) {
  6 |     for(int d=0; d < num_dimensions; d++) {
  7 |         means[d] = 0.0;
  8 |         for(int n=0; n < num_events; n++) {
  9 |             means[d] += data_by_event[n*num_dimensions+d];
 10 |         }
 11 |         means[d] /= (float) num_events;
 12 |     }
 13 | }
 14 | 
 15 | float log_add(float log_a, float log_b) {
 16 |   if(log_a < log_b) {
 17 |       float tmp = log_a;
 18 |       log_a = log_b;
 19 |       log_b = tmp;
 20 |     }
 21 |   //setting MIN...LOG so small, I don't even need to look
 22 |   return (((log_b - log_a) <= MINVALUEFORMINUSLOG) ? log_a : 
 23 |                 log_a + (float)(logf(1.0 + (double)(expf((double)(log_b - log_a))))));
 24 | }
 25 | 
 26 | void normalize_pi(components_t* components, int num_components) {
 27 |     float total = 0;
 28 |     for(int i=0; i<num_components; i++) {
 29 |         total += components->pi[i];
 30 |     }
 31 |     
 32 |     for(int m=0; m < num_components; m++){
 33 |         components->pi[m] /= total; 
 34 |     }
 35 | }
 36 | 
 37 | static int 
 38 | ludcmp(float *a,int n,int *indx,float *d);
 39 | 
 40 | static void 
 41 | lubksb(float *a,int n,int *indx,float *b);
 42 | 
 43 | /*
 44 |  * Inverts a square matrix (stored as a 1D float array)
 45 |  * 
 46 |  * actualsize - the dimension of the matrix
 47 |  *
 48 |  * written by Mike Dinolfo 12/98
 49 |  * version 1.0
 50 |  */
 51 | void invert_cpu(float* data, int actualsize, float* log_determinant)  {
 52 |     int maxsize = actualsize;
 53 |     int n = actualsize;
 54 |     
 55 |         *log_determinant = 0.0f;
 56 |       // sanity check        
 57 |       if (actualsize == 1) {
 58 |         *log_determinant = logf(data[0]);
 59 |         data[0] = 1.0 / data[0];
 60 |       } else {
 61 | 
 62 |           for (int i=1; i < actualsize; i++) data[i] /= data[0]; // normalize row 0
 63 |           for (int i=1; i < actualsize; i++)  { 
 64 |             for (int j=i; j < actualsize; j++)  { // do a column of L
 65 |               float sum = 0.0f;
 66 |               for (int k = 0; k < i; k++)  
 67 |                   sum += data[j*maxsize+k] * data[k*maxsize+i];
 68 |               data[j*maxsize+i] -= sum;
 69 |               }
 70 |             if (i == actualsize-1) continue;
 71 |             for (int j=i+1; j < actualsize; j++)  {  // do a row of U
 72 |               float sum = 0.0f;
 73 |               for (int k = 0; k < i; k++)
 74 |                   sum += data[i*maxsize+k]*data[k*maxsize+j];
 75 |               data[i*maxsize+j] = 
 76 |                  (data[i*maxsize+j]-sum) / data[i*maxsize+i];
 77 |               }
 78 |             }
 79 |             
 80 |             for(int i=0; i<actualsize; i++) {
 81 |                 *log_determinant += logf(fabs(data[i*n+i]));
 82 |             }
 83 |             
 84 |           for ( int i = 0; i < actualsize; i++ )  // invert L
 85 |             for ( int j = i; j < actualsize; j++ )  {
 86 |               float x = 1.0f;
 87 |               if ( i != j ) {
 88 |                 x = 0.0f;
 89 |                 for ( int k = i; k < j; k++ ) 
 90 |                     x -= data[j*maxsize+k]*data[k*maxsize+i];
 91 |                 }
 92 |               data[j*maxsize+i] = x / data[j*maxsize+j];
 93 |               }
 94 |           for ( int i = 0; i < actualsize; i++ )   // invert U
 95 |             for ( int j = i; j < actualsize; j++ )  {
 96 |               if ( i == j ) continue;
 97 |               float sum = 0.0f;
 98 |               for ( int k = i; k < j; k++ )
 99 |                   sum += data[k*maxsize+j]*( (i==k) ? 1.0 : data[i*maxsize+k] );
100 |               data[i*maxsize+j] = -sum;
101 |               }
102 |           for ( int i = 0; i < actualsize; i++ )   // final inversion
103 |             for ( int j = 0; j < actualsize; j++ )  {
104 |               float sum = 0.0f;
105 |               for ( int k = ((i>j)?i:j); k < actualsize; k++ )  
106 |                   sum += ((j==k)?1.0:data[j*maxsize+k])*data[k*maxsize+i];
107 |               data[j*maxsize+i] = sum;
108 |               }
109 |         }
110 | }
111 | 
112 | 
113 | /*
114 |  * Another matrix inversion function
115 |  * This was modified from the 'component' application by Charles A. Bouman
116 |  */
117 | int invert_matrix(float* a, int n, float* determinant) {
118 |   int  i,j,f,g;
119 |    
120 |   float* y = (float*) malloc(sizeof(float)*n*n);
121 |   float* col = (float*) malloc(sizeof(float)*n);
122 |   int* indx = (int*) malloc(sizeof(int)*n);
123 |   /*
124 |     printf("\n\nR matrix before LU decomposition:\n");
125 |     for(i=0; i<n; i++) {
126 |     for(j=0; j<n; j++) {
127 |     printf("%.2f ",a[i*n+j]);
128 |     }
129 |     printf("\n");
130 |     }*/
131 | 
132 |   *determinant = 0.0;
133 |   if(ludcmp(a,n,indx,determinant)) {
134 |     printf("Determinant mantissa after LU decomposition: %f\n",*determinant);
135 |     printf("\n\nR matrix after LU decomposition:\n");
136 |     for(i=0; i<n; i++) {
137 |       for(j=0; j<n; j++) {
138 |         printf("%.2f ",a[i*n+j]);
139 |       }
140 |       printf("\n");
141 |     }
142 |        
143 |     for(j=0; j<n; j++) {
144 |       *determinant *= a[j*n+j];
145 |     }
146 |      
147 |     printf("determinant: %E\n",*determinant);
148 |      
149 |     for(j=0; j<n; j++) {
150 |       for(i=0; i<n; i++) col[i]=0.0;
151 |       col[j]=1.0;
152 |       lubksb(a,n,indx,col);
153 |       for(i=0; i<n; i++) y[i*n+j]=col[i];
154 |     }
155 | 
156 |     for(i=0; i<n; i++)
157 |       for(j=0; j<n; j++) a[i*n+j]=y[i*n+j];
158 |      
159 |     printf("\n\nMatrix at end of clust_invert function:\n");
160 |     for(f=0; f<n; f++) {
161 |       for(g=0; g<n; g++) {
162 |         printf("%.2f ",a[f*n+g]);
163 |       }
164 |       printf("\n");
165 |     }
166 |     free(y);
167 |     free(col);
168 |     free(indx);
169 |     return(1);
170 |   }
171 |   else {
172 |     *determinant = 0.0;
173 |     free(y);
174 |     free(col);
175 |     free(indx);
176 |     return(0);
177 |   }
178 | }
179 | 
180 | #define TINY 1.0e-20
181 | 
182 | static int
183 | ludcmp(float *a,int n,int *indx,float *d)
184 | {
185 |   int i,imax=0,j,k;
186 |   float big,dum,sum,temp;
187 |   float *vv;
188 | 
189 |   vv= (float*) malloc(sizeof(float)*n);
190 |    
191 |   *d=1.0;
192 |    
193 |   for (i=0;i<n;i++)
194 |     {
195 |       big=0.0;
196 |       for (j=0;j<n;j++)
197 |         if ((temp=fabsf(a[i*n+j])) > big)
198 |           big=temp;
199 |       if (big == 0.0)
200 |         return 0; /* Singular matrix  */
201 |       vv[i]=1.0/big;
202 |     }
203 |        
204 |    
205 |   for (j=0;j<n;j++)
206 |     {  
207 |       for (i=0;i<j;i++)
208 |         {
209 |           sum=a[i*n+j];
210 |           for (k=0;k<i;k++)
211 |             sum -= a[i*n+k]*a[k*n+j];
212 |           a[i*n+j]=sum;
213 |         }
214 |        
215 |       /*
216 |         int f,g;
217 |         printf("\n\nMatrix After Step 1:\n");
218 |         for(f=0; f<n; f++) {
219 |         for(g=0; g<n; g++) {
220 |         printf("%.2f ",a[f*n+g]);
221 |         }
222 |         printf("\n");
223 |         }*/
224 |        
225 |       big=0.0;
226 |       dum=0.0;
227 |       for (i=j;i<n;i++)
228 |         {
229 |           sum=a[i*n+j];
230 |           for (k=0;k<j;k++)
231 |             sum -= a[i*n+k]*a[k*n+j];
232 |           a[i*n+j]=sum;
233 |           dum=vv[i]*fabsf(sum);
234 |           //printf("sum: %f, dum: %f, big: %f\n",sum,dum,big);
235 |           //printf("dum-big: %E\n",fabs(dum-big));
236 |           if ( (dum-big) >= 0.0 || fabs(dum-big) < 1e-3)
237 |             {
238 |               big=dum;
239 |               imax=i;
240 |               //printf("imax: %d\n",imax);
241 |             }
242 |         }
243 |        
244 |       if (j != imax)
245 |         {
246 |           for (k=0;k<n;k++)
247 |             {
248 |               dum=a[imax*n+k];
249 |               a[imax*n+k]=a[j*n+k];
250 |               a[j*n+k]=dum;
251 |             }
252 |           *d = -(*d);
253 |           vv[imax]=vv[j];
254 |         }
255 |       indx[j]=imax;
256 |        
257 |       /*
258 |         printf("\n\nMatrix after %dth iteration of LU decomposition:\n",j);
259 |         for(f=0; f<n; f++) {
260 |         for(g=0; g<n; g++) {
261 |         printf("%.2f ",a[f][g]);
262 |         }
263 |         printf("\n");
264 |         }
265 |         printf("imax: %d\n",imax);
266 |       */
267 | 
268 | 
269 |       /* Change made 3/27/98 for robustness */
270 |       if ( (a[j*n+j]>=0)&&(a[j*n+j]<TINY) ) a[j*n+j]= TINY;
271 |       if ( (a[j*n+j]<0)&&(a[j*n+j]>-TINY) ) a[j*n+j]= -TINY;
272 | 
273 |       if (j != n-1)
274 |         {
275 |           dum=1.0/(a[j*n+j]);
276 |           for (i=j+1;i<n;i++)
277 |             a[i*n+j] *= dum;
278 |         }
279 |     }
280 |   free(vv);
281 |   return(1);
282 | }
283 | 
284 | #undef TINY
285 | 
286 | static void
287 | lubksb(float *a,int n,int *indx,float *b)
288 | {
289 |   int i,ii,ip,j;
290 |   float sum;
291 | 
292 |   ii = -1;
293 |   for (i=0;i<n;i++)
294 |     {
295 |       ip=indx[i];
296 |       sum=b[ip];
297 |       b[ip]=b[i];
298 |       if (ii >= 0)
299 |         for (j=ii;j<i;j++)
300 |           sum -= a[i*n+j]*b[j];
301 |       else if (sum)
302 |         ii=i;
303 |       b[i]=sum;
304 |     }
305 |   for (i=n-1;i>=0;i--)
306 |     {
307 |       sum=b[i];
308 |       for (j=i+1;j<n;j++)
309 |         sum -= a[i*n+j]*b[j];
310 |       b[i]=sum/a[i*n+i];
311 |     }
312 | }
313 | 


--------------------------------------------------------------------------------
/templates/em_cilk_kernel_decl.mako:
--------------------------------------------------------------------------------
 1 | 
 2 | void seed_components${'_'+'_'.join(param_val_list)}(float *data, components_t* components, int D, int M, int N);
 3 | void constants${'_'+'_'.join(param_val_list)}(components_t* components, int M, int D);
 4 | void estep1${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N, float* loglikelihoods);
 5 | void estep2${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N, float* likelihood);
 6 | void mstep_n${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N);
 7 | void mstep_n_idx${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_event, int* d_index_list, int num_indices,components_t* d_components, float* component_memberships, int num_dimensions, int num_components, int num_events);
 8 | void mstep_mean${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N);
 9 | void mstep_mean_idx${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_dimension, int* d_index_list, int num_indices, components_t* d_components, float* component_memberships, int num_dimensions, int num_components, int num_events);
10 | void mstep_covar${'_'+'_'.join(param_val_list)}(float* data, components_t* components,float* component_memberships, int D, int M, int N);
11 | void mstep_covar_idx${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_dimension, float* d_fcs_data_by_event,int* d_index_list, int num_indices, components_t* d_components, float* component_memberships, int num_dimensions, int num_components, int num_events);
12 | 


--------------------------------------------------------------------------------
/templates/em_cilk_kernels.mako:
--------------------------------------------------------------------------------
  1 | 
  2 | void seed_covars${'_'+'_'.join(param_val_list)}(components_t* components, float* fcs_data, float* means, int num_dimensions, int num_events, float* avgvar, int num_components) {
  3 | 
  4 |     cilk_for(int i=0; i < num_dimensions*num_dimensions; i++) {
  5 |       int row = (i) / num_dimensions;
  6 |       int col = (i) % num_dimensions;
  7 |       components->R[row*num_dimensions+col] = 0.0f;
  8 |       for(int j=0; j < num_events; j++) {
  9 |         if(row==col) {
 10 |           components->R[row*num_dimensions+col] += (fcs_data[j*num_dimensions + row])*(fcs_data[j*num_dimensions + row]);
 11 |         }
 12 |       }
 13 |       if(row==col) {
 14 |         components->R[row*num_dimensions+col] /= (float) (num_events -1);
 15 |         components->R[row*num_dimensions+col] -= ((float)(num_events)*means[row]*means[row]) / (float)(num_events-1);
 16 |         components->R[row*num_dimensions+col] /= (float)num_components;
 17 |       }
 18 |     }
 19 | }
 20 | 
 21 | void average_variance${'_'+'_'.join(param_val_list)}(float* fcs_data, float* means, int num_dimensions, int num_events, float* avgvar) {
 22 |     
 23 |     cilk::reducer_opadd<float> total(0.0f);
 24 |     // Compute average variance for each dimension
 25 |     cilk_for(int i = 0; i < num_dimensions; i++) {
 26 |         float variance = 0.0f;
 27 |         for(int j=0; j < num_events; j++) {
 28 |             variance += fcs_data[j*num_dimensions + i]*fcs_data[j*num_dimensions + i];
 29 |         }
 30 |         variance /= (float) num_events;
 31 |         variance -= means[i]*means[i];
 32 |         total += variance;
 33 |     }
 34 |     
 35 |     *avgvar = total.get_value() / (float) num_dimensions;
 36 | }
 37 | 
 38 | void constants${'_'+'_'.join(param_val_list)}(components_t* components, int M, int D) {
 39 |     float log_determinant;
 40 |     float* matrix = (float*) malloc(sizeof(float)*D*D);
 41 | 
 42 |     //float sum = 0.0;
 43 |     for(int m=0; m < M; m++) {
 44 |         // Invert covariance matrix
 45 |         memcpy(matrix,&(components->R[m*D*D]),sizeof(float)*D*D);
 46 |         invert_cpu(matrix,D,&log_determinant);
 47 |         memcpy(&(components->Rinv[m*D*D]),matrix,sizeof(float)*D*D);
 48 |     
 49 |         // Compute constant
 50 |         components->constant[m] = -D*0.5f*logf(2*PI) - 0.5f*log_determinant;
 51 |         components->CP[m] = components->constant[m]*2.0;
 52 |     }
 53 |     normalize_pi(components, M);
 54 |     free(matrix);
 55 | }
 56 | 
 57 | void seed_components${'_'+'_'.join(param_val_list)}(float *data_by_event, components_t* components, int num_dimensions, int num_components, int num_events) {
 58 |     float* means = (float*) malloc(sizeof(float)*num_dimensions);
 59 |     float avgvar;
 60 | 
 61 |     // Compute means
 62 |     mvtmeans(data_by_event, num_dimensions, num_events, means);
 63 | 
 64 |     // Compute the average variance
 65 |     seed_covars${'_'+'_'.join(param_val_list)}(components, data_by_event, means, num_dimensions, num_events, &avgvar, num_components);
 66 |     average_variance${'_'+'_'.join(param_val_list)}(data_by_event, means, num_dimensions, num_events, &avgvar);    
 67 |     float seed;
 68 |     if(num_components > 1) {
 69 |        seed = (num_events)/(num_components);
 70 |     } else {
 71 |        seed = 0.0f;
 72 |     }
 73 | 
 74 |     memcpy(components->means, means, sizeof(float)*num_dimensions);
 75 | 
 76 |     for(int c=1; c < num_components; c++) {
 77 |         memcpy(&components->means[c*num_dimensions], &data_by_event[((int)(c*seed))*num_dimensions], sizeof(float)*num_dimensions);
 78 |           
 79 |         for(int i=0; i < num_dimensions*num_dimensions; i++) {
 80 |           components->R[c*num_dimensions*num_dimensions+i] = components->R[i];
 81 |           components->Rinv[c*num_dimensions*num_dimensions+i] = 0.0f;
 82 |         }
 83 |     }
 84 | 
 85 |     //compute pi, N
 86 |     for(int c =0; c<num_components; c++) {
 87 |         components->pi[c] = 1.0f/((float)num_components);
 88 |         components->N[c] = ((float) num_events) / ((float)num_components);
 89 |         components->avgvar[c] = avgvar / COVARIANCE_DYNAMIC_RANGE;
 90 |     }
 91 | 
 92 |     free(means);
 93 | }
 94 | 
 95 | void compute_average_variance${'_'+'_'.join(param_val_list)}( float* fcs_data, components_t* components, int num_dimensions, int num_components, int num_events)
 96 | {
 97 |     float* means = (float*) malloc(sizeof(float)*num_dimensions);
 98 |     float avgvar;
 99 |     
100 |     // Compute the means
101 |     mvtmeans(fcs_data, num_dimensions, num_events, means);
102 |    
103 |     average_variance${'_'+'_'.join(param_val_list)}(fcs_data, means, num_dimensions, num_events, &avgvar);    
104 |     
105 |     for(int c =0; c<num_components; c++) {
106 |         components->avgvar[c] = avgvar / COVARIANCE_DYNAMIC_RANGE;
107 |     }
108 | }
109 | 
110 | void estep1${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N, float* loglikelihoods) {
111 |     // Compute likelihood for every data point in each component
112 |     cilk_for(int m=0; m < M; m++) {
113 |         float component_pi = components->pi[m];
114 |         float component_constant = components->constant[m];
115 |         float* means = &(components->means[m*D]);
116 |         float* Rinv = &(components->Rinv[m*D*D]);
117 |         for(int n=0; n < N; n++) {
118 |             float like = 0.0;
119 | %if cvtype == 'diag':
120 |             for(int i=0; i < D; i++) {
121 |                 like += (data[i*N+n]-means[i])*(data[i*N+n]-means[i])*Rinv[i*D+i];
122 |             }
123 | %else:
124 |             for(int i=0; i < D; i++) {
125 |                 for(int j=0; j < D; j++) {
126 |                     like += (data[i*N+n]-means[i])*(data[j*N+n]-means[j])*Rinv[i*D+j];
127 |                 }
128 |             }
129 | %endif
130 |             component_memberships[m*N+n] = (component_pi > 0.0f) ? -0.5*like + component_constant + logf(component_pi) : MINVALUEFORMINUSLOG;
131 |         }
132 |     }
133 |     //estep1 log_add()
134 |     for(int n=0; n < N; n++) {
135 |         float finalloglike = MINVALUEFORMINUSLOG;
136 |         for(int m=0; m < M; m++) {
137 |             finalloglike = log_add(finalloglike, component_memberships[m*N+n]);
138 |         }
139 |         loglikelihoods[n] = finalloglike;
140 |     }
141 | }
142 | 
143 | float estep2_events${'_'+'_'.join(param_val_list)}(components_t* components, float* component_memberships, int M, int n, int N) {
144 | 	// Finding maximum likelihood for this data point
145 |         float temp = 0.0f;
146 |         float thread_likelihood = 0.0f;
147 | 	float max_likelihood;
148 | 	float denominator_sum = 0.0f;
149 | 
150 | 	max_likelihood = __sec_reduce_max(component_memberships[n:M:N]);
151 |         //max_likelihood = component_memberships[n];
152 |         //for(int m = 1; m < M; m++)
153 |         //    max_likelihood =
154 |         //          fmaxf(max_likelihood,component_memberships[m*N+n]);
155 | 
156 | 	// Computes sum of all likelihoods for this event
157 | 	for(int m=0; m < M; m++) {
158 |             temp = expf(component_memberships[m*N+n] - max_likelihood);
159 |             denominator_sum += temp;
160 | 	}
161 | 	temp = max_likelihood + logf(denominator_sum);
162 |         thread_likelihood += temp;
163 | 
164 | 	// Divide by denominator to get each membership
165 | 	for(int m=0; m < M; m++) {
166 | 	    component_memberships[m*N+n] = expf(component_memberships[m*N+n] - temp);
167 | 	}
168 |         //or component_memberships[n:M:N] = exp(component_memberships[n:M:N] - denominator_sum);
169 | 
170 | 	return thread_likelihood;
171 | }
172 | 
173 | void estep2${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N, float* likelihood) {
174 |     cilk::reducer_opadd<float> total(0.0f);
175 |     cilk_for(int n=0; n < N; n++) {
176 |         total += estep2_events${'_'+'_'.join(param_val_list)}(components, component_memberships, M, n, N);
177 |     }
178 |     *likelihood = total.get_value();
179 | }
180 | 
181 | void mstep_mean${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N) {
182 |     cilk_for(int m=0; m < M; m++) {
183 |         for(int d=0; d < D; d++) {
184 | 	    components->means[m*D+d] = 0.0;
185 | 	    for(int n=0; n < N; n++) {
186 | 		components->means[m*D+d] += data[d*N+n]*component_memberships[m*N+n];
187 | 	    }
188 | 	    components->means[m*D+d] /= components->N[m];
189 |         }
190 |     }
191 | }
192 | 
193 | void mstep_mean_idx${'_'+'_'.join(param_val_list)}(float* data_by_dimension, int* indices, int num_indices, components_t* components, float* component_memberships, int D, int M, int N) {
194 |     cilk_for(int m=0; m < M; m++) {
195 |         for(int d=0; d < D; d++) {
196 | 	    components->means[m*D+d] = 0.0;
197 | 	    for(int index = 0; index < num_indices; index++) {
198 |                 int n = indices[index];
199 | 		components->means[m*D+d] += data_by_dimension[d*N+n]*component_memberships[m*N+n];
200 | 	    }
201 | 	    components->means[m*D+d] /= components->N[m];
202 |         }
203 |     }
204 | }
205 | 
206 | void mstep_n${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N) {
207 |     cilk_for(int m=0; m < M; m++) {
208 |         components->N[m] = 0.0;
209 |         for(int n=0; n < N; n++) {
210 |             components->N[m] += component_memberships[m*N+n];
211 |         }
212 |         components->pi[m] =  components->N[m];
213 |     }
214 | }
215 | 
216 | void mstep_n_idx${'_'+'_'.join(param_val_list)}(float* data, int* indices, int num_indices, components_t* components, float* component_memberships, int D, int M, int N) {
217 |     cilk_for(int m=0; m < M; m++) {
218 |         components->N[m] = 0.0;
219 |         for(int index=0; index < num_indices; index++) {
220 |             int n = indices[index];
221 |             components->N[m] += component_memberships[m*N+n];
222 |         }
223 |         components->pi[m] =  components->N[m];
224 |     }
225 | }
226 | 
227 | void mstep_covar${'_'+'_'.join(param_val_list)}(float* data, components_t* components,float* component_memberships, int D, int M, int N) {
228 |     cilk_for(int m=0; m < M; m++) {
229 |         float* means = &(components->means[m*D]);
230 |         cilk::reducer_opadd<float> cov_sum(0.0f);
231 |         for(int i=0; i < D; i++) {
232 |             for(int j=0; j <= i; j++) {
233 | %if cvtype == 'diag':
234 |                 if(i != j) {
235 |                     components->R[m*D*D+i*D+j] = 0.0f;
236 |                     components->R[m*D*D+j*D+i] = 0.0f;
237 |                     continue;
238 |                 }
239 | %endif
240 |                 float sum = 0.0;
241 |                 for(int n=0; n < N; n++) {
242 |                     sum += (data[i*N+n]-means[i])*(data[j*N+n]-means[j])*component_memberships[m*N+n];
243 |                 }
244 | 
245 |                 if(components->N[m] >= 1.0f) {
246 |                     components->R[m*D*D+i*D+j] = sum / components->N[m];
247 |                     components->R[m*D*D+j*D+i] = sum / components->N[m];
248 |                 } else {
249 |                     components->R[m*D*D+i*D+j] = 0.0f;
250 |                     components->R[m*D*D+j*D+i] = 0.0f;
251 |                 }
252 |                 if(i == j) {
253 |                     components->R[m*D*D+j*D+i] += components->avgvar[m];
254 |                 }
255 |             }
256 |         }
257 |     }
258 | }
259 | 
260 | void mstep_covar_idx${'_'+'_'.join(param_val_list)}(float* data_by_dimension, float* data_by_event, int* indices, int num_indices, components_t* components, float* component_memberships, int D, int M, int N) {
261 |     cilk_for(int m=0; m < M; m++) {
262 |         float* means = &(components->means[m*D]);
263 |         cilk::reducer_opadd<float> cov_sum(0.0f);
264 |         for(int i=0; i < D; i++) {
265 |             for(int j=0; j <= i; j++) {
266 | %if cvtype == 'diag':
267 |                 if(i != j) {
268 |                     components->R[m*D*D+i*D+j] = 0.0f;
269 |                     components->R[m*D*D+j*D+i] = 0.0f;
270 |                     continue;
271 |                 }
272 | %endif
273 |                 float sum = 0.0;
274 |                 for(int index=0; index < num_indices; index++) {
275 |                     int n = indices[index];
276 |                     sum += (data_by_dimension[i*N+n]-means[i])*(data_by_dimension[j*N+n]-means[j])*component_memberships[m*N+n];
277 |                 }
278 | 
279 |                 if(components->N[m] >= 1.0f) {
280 |                     components->R[m*D*D+i*D+j] = sum / components->N[m];
281 |                     components->R[m*D*D+j*D+i] = sum / components->N[m];
282 |                 } else {
283 |                     components->R[m*D*D+i*D+j] = 0.0f;
284 |                     components->R[m*D*D+j*D+i] = 0.0f;
285 |                 }
286 |                 if(i == j) {
287 |                     components->R[m*D*D+j*D+i] += components->avgvar[m];
288 |                 }
289 |             }
290 |         }
291 |     }
292 | }
293 | 


--------------------------------------------------------------------------------
/templates/em_cilk_seed_components.mako:
--------------------------------------------------------------------------------
1 | void em_cilk_seed_components${'_'+'_'.join(param_val_list)} (
2 |                              int num_components, 
3 |                              int num_dimensions, 
4 |                              int num_events ) 
5 | {
6 |   seed_components${'_'+'_'.join(param_val_list)}(fcs_data_by_event, &components, num_dimensions, num_components, num_events);
7 | }
8 | 


--------------------------------------------------------------------------------
/templates/em_cilk_train.mako:
--------------------------------------------------------------------------------
 1 | 
 2 | boost::python::tuple em_cilk_train${'_'+'_'.join(param_val_list)} (
 3 |                              int num_components, 
 4 |                              int num_dimensions, 
 5 |                              int num_events,
 6 |                              int min_iters,
 7 |                              int max_iters) 
 8 | {
 9 |     
10 |     // Computes the R matrix inverses, and the gaussian constant
11 |     constants${'_'+'_'.join(param_val_list)}(&components,num_components,num_dimensions);
12 |     // Compute average variance based on the data
13 |     compute_average_variance${'_'+'_'.join(param_val_list)}(fcs_data_by_event, &components, num_dimensions, num_components, num_events);
14 | 
15 |     // Calculate an epsilon value
16 |     //int ndata_points = num_events*num_dimensions;
17 |     float epsilon = (1+num_dimensions+0.5*(num_dimensions+1)*num_dimensions)*log((float)num_events*num_dimensions)*0.0001;
18 |     int iters;
19 |     float likelihood = -100000;
20 |     float old_likelihood = likelihood * 10;
21 |     
22 |     float change = epsilon*2;
23 |     
24 |     iters = 0;
25 |     // This is the iterative loop for the EM algorithm.
26 |     // It re-estimates parameters, re-computes constants, and then regroups the events
27 |     // These steps keep repeating until the change in likelihood is less than some epsilon        
28 |     while(iters < min_iters || (fabs(change) > epsilon && iters < max_iters)) {
29 |         old_likelihood = likelihood;
30 | 
31 |         estep1${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events,loglikelihoods);
32 |         estep2${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events,&likelihood);
33 |         
34 |         // This kernel computes a new N, pi isn't updated until compute_constants though
35 |         mstep_n${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events);
36 |         mstep_mean${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events);
37 |         mstep_covar${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events);
38 | 
39 |         
40 |         // Inverts the R matrices, computes the constant, normalizes cluster probabilities
41 |         constants${'_'+'_'.join(param_val_list)}(&components,num_components,num_dimensions);
42 |         change = likelihood - old_likelihood;
43 |         iters++;
44 |     }
45 | 
46 |     estep1${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events,loglikelihoods);
47 |     estep2${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events,&likelihood);
48 |     
49 |   return boost::python::make_tuple(likelihood, iters);
50 | }
51 | 


--------------------------------------------------------------------------------
/templates/em_cuda_device_helper_funcs.mako:
--------------------------------------------------------------------------------
  1 | #define PI  3.1415926535897931
  2 | #define COVARIANCE_DYNAMIC_RANGE 1E6
  3 | #include <stdio.h>
  4 | #include <float.h>
  5 | #include <limits.h>
  6 | #define MINVALUEFORMINUSLOG -1000.0f
  7 | #define MIN_VARIANCE 0.01
  8 | #define SCALE_VAL 1000.0f
  9 | 
 10 | 
 11 | __device__ static int  ToFixedPoint(float input) {
 12 |         if (input==FLT_MAX)
 13 |                 return INT_MAX;
 14 |         return (int)(SCALE_VAL*input);
 15 | }
 16 | 
 17 | __device__ static float ToFloatPoint(int input) {
 18 |         if (input==INT_MAX)
 19 |                 return FLT_MAX;
 20 |         return (float)input/SCALE_VAL;
 21 | }
 22 | 
 23 | __device__ float log_add(float log_a, float log_b) {
 24 |   if(log_a < log_b) {
 25 |       float tmp = log_a;
 26 |       log_a = log_b;
 27 |       log_b = tmp;
 28 |     }
 29 |   //setting MIN...LOG so small, I don't even need to look
 30 |   return (((log_b - log_a) <= MINVALUEFORMINUSLOG) ? log_a : 
 31 |                 log_a + (float)(logf(1.0 + (double)(expf((double)(log_b - log_a))))));
 32 | }
 33 | 
 34 | 
 35 | 
 36 | /*
 37 |  * Compute the multivariate mean of the FCS data
 38 |  */ 
 39 | __device__ void mvtmeans(float* fcs_data, int num_dimensions, int num_events, float* means) {
 40 |     // access thread id
 41 |     int tid = threadIdx.x;
 42 | 
 43 |     if(tid < num_dimensions) {
 44 |         means[tid] = 0.0f;
 45 | 
 46 |         // Sum up all the values for the dimension
 47 |         for(int i=0; i < num_events; i++) {
 48 |             means[tid] += fcs_data[i*num_dimensions+tid];
 49 |         }
 50 | 
 51 |         // Divide by the # of elements to get the average
 52 |         means[tid] /= (float) num_events;
 53 |     }
 54 | }
 55 | 
 56 | 
 57 | __device__ void normalize_pi(components_t* components, int num_components) {
 58 |     __shared__ float sum;
 59 |     
 60 |     // TODO: could maybe use a parallel reduction..but the # of elements is really small
 61 |     // What is better: having thread 0 compute a shared sum and sync, or just have each one compute the sum?
 62 |     if(threadIdx.x == 0) {
 63 |         sum = 0.0f;
 64 |         for(int i=0; i<num_components; i++) {
 65 |             sum += components->pi[i];
 66 |         }
 67 |     }
 68 |     
 69 |     __syncthreads();
 70 |     
 71 |     for(int c=threadIdx.x; c < num_components; c += blockDim.x) {
 72 |         components->pi[threadIdx.x] /= sum;
 73 |     }
 74 |  
 75 |     __syncthreads();
 76 | }
 77 | 
 78 | __device__ void parallelSum(float* data) {
 79 |   const unsigned int tid = threadIdx.x;
 80 |   for(unsigned int s=blockDim.x/2; s>32; s>>=1) {
 81 |     if (tid < s)
 82 |       data[tid] += data[tid + s];  
 83 |     __syncthreads();
 84 |   }
 85 |   if (tid < 32) {
 86 |     volatile float* sdata = data;
 87 |     sdata[tid] += sdata[tid+32];
 88 |     sdata[tid] += sdata[tid+16];
 89 |     sdata[tid] += sdata[tid+8];
 90 |     sdata[tid] += sdata[tid+4];
 91 |     sdata[tid] += sdata[tid+2];
 92 |     sdata[tid] += sdata[tid+1];
 93 |   }
 94 | }
 95 | 
 96 | /*
 97 |  * Computes the row and col of a square matrix based on the index into
 98 |  * a lower triangular (with diagonal) matrix
 99 |  * 
100 |  * Used to determine what row/col should be computed for covariance
101 |  * based on a block index.
102 |  */
103 | __device__ void compute_row_col(int n, int* row, int* col) {
104 |     int i = 0;
105 |     for(int r=0; r < n; r++) {
106 |         for(int c=0; c <= r; c++) {
107 |             if(i == blockIdx.y) {  
108 |                 *row = r;
109 |                 *col = c;
110 |                 return;
111 |             }
112 |             i++;
113 |         }
114 |     }
115 | }
116 | 
117 | //CODEVAR_2
118 | __device__ void compute_row_col_thread(int n, int* row, int* col) {
119 |     int i = 0;
120 |     for(int r=0; r < n; r++) {
121 |         for(int c=0; c <= r; c++) {
122 |             if(i == threadIdx.x) {  
123 |                 *row = r;
124 |                 *col = c;
125 |                 return;
126 |             }
127 |             i++;
128 |         }
129 |     }
130 | }
131 | //CODEVAR_3
132 | __device__ void compute_row_col_block(int n, int* row, int* col) {
133 |     int i = 0;
134 |     for(int r=0; r < n; r++) {
135 |         for(int c=0; c <= r; c++) {
136 |             if(i == blockIdx.x) {  
137 |                 *row = r;
138 |                 *col = c;
139 |                 return;
140 |             }
141 |             i++;
142 |         }
143 |     }
144 | }
145 | 
146 | //CODEVAR_2B and CODEVAR_3B
147 | __device__ void compute_my_event_indices(int n, int bsize, int num_b, int* e_start, int* e_end) {
148 |   int myId = blockIdx.y;
149 |   *e_start = myId*bsize;
150 |   if(myId==(num_b-1)) {
151 |     *e_end = ((myId*bsize)-n < 0 ? n:myId*bsize);
152 |   } else {
153 |     *e_end = myId*bsize + bsize;
154 |   }
155 |   
156 |   return;
157 | }
158 | 
159 | 
160 | __device__ void compute_row_col_transpose(int n, int* row, int* col) {
161 |     int i = 0;
162 |     for(int r=0; r < n; r++) {
163 |         for(int c=0; c <= r; c++) {
164 |             if(i == blockIdx.x) {  
165 |                 *row = r;
166 |                 *col = c;
167 |                 return;
168 |             }
169 |             i++;
170 |         }
171 |     }
172 | }
173 | 
174 | 


--------------------------------------------------------------------------------
/templates/em_cuda_eval.mako:
--------------------------------------------------------------------------------
 1 | void em_cuda_eval${'_'+'_'.join(param_val_list)} (
 2 |                              int num_components, 
 3 |                              int num_dimensions, 
 4 |                              int num_events ) 
 5 | {
 6 |   //TODO: Is this necessary, or can we assume the values are still set?
 7 |   // Computes the R matrix inverses, and the gaussian constant
 8 |   constants_kernel_launch${'_'+'_'.join(param_val_list)}(d_components,num_components,num_dimensions);
 9 |   cudaThreadSynchronize();
10 |   CUT_CHECK_ERROR("Constants Kernel execution failed: ");
11 |   estep1_launch${'_'+'_'.join(param_val_list)}(d_fcs_data_by_dimension,d_components, d_component_memberships, num_dimensions,num_components,num_events,d_loglikelihoods);
12 |   cudaThreadSynchronize();
13 |   CUT_CHECK_ERROR("Kernel execution failed");
14 | 
15 |   copy_evals_data_GPU_to_CPU(num_events, num_components);
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/templates/em_cuda_host_helper_funcs.mako:
--------------------------------------------------------------------------------
  1 | 
  2 | #  define CUT_CHECK_ERROR(errorMessage) {                                    \
  3 |     cudaError_t err = cudaGetLastError();                                    \
  4 |     if( cudaSuccess != err) {                                                \
  5 |         fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n",    \
  6 |                 errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
  7 |         exit(EXIT_FAILURE);                                                  \
  8 |     }                                                                        \
  9 |    }
 10 | 
 11 | #  define CUDA_SAFE_CALL_NO_SYNC( call) {                                    \
 12 |     cudaError err = call;                                                    \
 13 |     if( cudaSuccess != err) {                                                \
 14 |         fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n",        \
 15 |                 __FILE__, __LINE__, cudaGetErrorString( err) );              \
 16 |         exit(EXIT_FAILURE);                                                  \
 17 |     } }
 18 | 
 19 | #  define CUDA_SAFE_CALL( call)     CUDA_SAFE_CALL_NO_SYNC(call);            \
 20 | 
 21 | 
 22 | //=== Data structure pointers ===
 23 | 
 24 | //GPU copies of events
 25 | float* d_fcs_data_by_event;
 26 | float* d_fcs_data_by_dimension;
 27 | 
 28 | //GPU index list for train_on_subset
 29 | int* d_index_list;
 30 | 
 31 | //GPU copies of components
 32 | components_t temp_components;
 33 | components_t* d_components;
 34 | 
 35 | //GPU copies of eval data
 36 | float *d_component_memberships;
 37 | float *d_loglikelihoods;
 38 | 
 39 | //Copy functions to ensure CPU data structures are up to date
 40 | void copy_component_data_GPU_to_CPU(int num_components, int num_dimensions);
 41 | void copy_evals_data_GPU_to_CPU(int num_events, int num_components);
 42 | 
 43 | //=== Memory Alloc/Free Functions ===
 44 | 
 45 | // ================== Event data allocation on GPU  ================= :
 46 | 
 47 | void alloc_events_on_GPU(int num_dimensions, int num_events) {
 48 |   int mem_size = num_dimensions*num_events*sizeof(float);
 49 |   CUDA_SAFE_CALL(cudaMalloc( (void**) &d_fcs_data_by_event, mem_size));
 50 |   CUDA_SAFE_CALL(cudaMalloc( (void**) &d_fcs_data_by_dimension, mem_size));
 51 |   CUT_CHECK_ERROR("Alloc events on GPU failed: ");
 52 | }
 53 | 
 54 | void alloc_index_list_on_GPU(int num_indices) {
 55 |   int mem_size = num_indices*sizeof(int);
 56 |   CUDA_SAFE_CALL(cudaMalloc( (void**) &d_index_list, mem_size));
 57 |   CUT_CHECK_ERROR("Alloc index list on GPU failed: ");
 58 | }
 59 | 
 60 | void alloc_events_from_index_on_GPU(int num_indices, int num_dimensions) {
 61 |   CUDA_SAFE_CALL(cudaMalloc((void**) &d_fcs_data_by_event, sizeof(float)*num_indices*num_dimensions));
 62 |   CUDA_SAFE_CALL(cudaMalloc((void**) &d_fcs_data_by_dimension, sizeof(float)*num_indices*num_dimensions));
 63 |   CUT_CHECK_ERROR("Alloc events from index on GPU failed: ");
 64 | }
 65 | 
 66 | // ================== Cluster data allocation on GPU  ================= :
 67 | void alloc_components_on_GPU(int original_num_components, int num_dimensions) {
 68 |   // Setup the component data structures on device
 69 |   // First allocate structures on the host, CUDA malloc the arrays
 70 |   // Then CUDA malloc structures on the device and copy them over
 71 |   CUDA_SAFE_CALL(cudaMalloc((void**) &(temp_components.N),sizeof(float)*original_num_components));
 72 |   CUDA_SAFE_CALL(cudaMalloc((void**) &(temp_components.pi),sizeof(float)*original_num_components));
 73 |   CUDA_SAFE_CALL(cudaMalloc((void**) &(temp_components.CP),sizeof(float)*original_num_components)); //NEW LINE
 74 |   CUDA_SAFE_CALL(cudaMalloc((void**) &(temp_components.constant),sizeof(float)*original_num_components));
 75 |   CUDA_SAFE_CALL(cudaMalloc((void**) &(temp_components.avgvar),sizeof(float)*original_num_components));
 76 |   CUDA_SAFE_CALL(cudaMalloc((void**) &(temp_components.means),sizeof(float)*num_dimensions*original_num_components));
 77 |   CUDA_SAFE_CALL(cudaMalloc((void**) &(temp_components.R),sizeof(float)*num_dimensions*num_dimensions*original_num_components));
 78 |   CUDA_SAFE_CALL(cudaMalloc((void**) &(temp_components.Rinv),sizeof(float)*num_dimensions*num_dimensions*original_num_components));
 79 | 
 80 |   // Allocate a struct on the device
 81 |   CUDA_SAFE_CALL(cudaMalloc((void**) &d_components, sizeof(components_t)));
 82 | 
 83 |   // Copy Cluster data to device
 84 |   CUDA_SAFE_CALL(cudaMemcpy(d_components,&temp_components,sizeof(components_t),cudaMemcpyHostToDevice));
 85 |   CUT_CHECK_ERROR("Alloc components on GPU failed: ");
 86 | }
 87 | 
 88 | 
 89 | // ================= Eval data alloc on GPU =============== 
 90 | void alloc_evals_on_GPU(int num_events, int num_components){
 91 |   CUDA_SAFE_CALL(cudaMalloc((void**) &(d_component_memberships),sizeof(float)*num_events*num_components));
 92 |   CUDA_SAFE_CALL(cudaMalloc((void**) &(d_loglikelihoods),sizeof(float)*num_events));
 93 |   CUT_CHECK_ERROR("Alloc eval data on GPU failed: ");
 94 | }
 95 | 
 96 | 
 97 | // ======================== Copy event data from CPU to GPU ================
 98 | void copy_event_data_CPU_to_GPU(int num_events, int num_dimensions) {
 99 |   int mem_size = num_dimensions*num_events*sizeof(float);
100 |   CUDA_SAFE_CALL(cudaMemcpy( d_fcs_data_by_event, fcs_data_by_event, mem_size,cudaMemcpyHostToDevice) );
101 |   CUDA_SAFE_CALL(cudaMemcpy( d_fcs_data_by_dimension, fcs_data_by_dimension, mem_size,cudaMemcpyHostToDevice) );
102 |   CUT_CHECK_ERROR("Copy events from CPU to GPU execution failed: ");
103 | }
104 | 
105 | void copy_index_list_data_CPU_to_GPU(int num_indices) {
106 |   int mem_size = num_indices*sizeof(int);
107 |   CUDA_SAFE_CALL(cudaMemcpy( d_index_list, index_list, mem_size,cudaMemcpyHostToDevice) );
108 |   CUT_CHECK_ERROR("Copy event index list from CPU to GPU execution failed: ");
109 | }
110 | 
111 | void copy_events_from_index_CPU_to_GPU(int num_indices, int num_dimensions) {
112 |   CUDA_SAFE_CALL(cudaMemcpy(d_fcs_data_by_dimension, fcs_data_by_dimension, sizeof(float)*num_indices*num_dimensions, cudaMemcpyHostToDevice));
113 |   CUDA_SAFE_CALL(cudaMemcpy(d_fcs_data_by_event, fcs_data_by_event, sizeof(float)*num_indices*num_dimensions, cudaMemcpyHostToDevice));
114 |   CUT_CHECK_ERROR("Copy events by index from CPU to GPU execution failed: ");
115 | }
116 | 
117 | // ======================== Copy component data from CPU to GPU ================
118 | void copy_component_data_CPU_to_GPU(int num_components, int num_dimensions) {
119 |   CUDA_SAFE_CALL(cudaMemcpy(temp_components.N, components.N, sizeof(float)*num_components,cudaMemcpyHostToDevice));
120 |   CUDA_SAFE_CALL(cudaMemcpy(temp_components.pi, components.pi, sizeof(float)*num_components,cudaMemcpyHostToDevice));
121 |   CUDA_SAFE_CALL(cudaMemcpy(temp_components.CP, components.CP, sizeof(float)*num_components,cudaMemcpyHostToDevice)); 
122 |   //NEW LINE
123 |   CUDA_SAFE_CALL(cudaMemcpy(temp_components.constant, components.constant, sizeof(float)*num_components,cudaMemcpyHostToDevice));
124 |   CUDA_SAFE_CALL(cudaMemcpy(temp_components.avgvar, components.avgvar, sizeof(float)*num_components,cudaMemcpyHostToDevice));
125 |   CUDA_SAFE_CALL(cudaMemcpy(temp_components.means, components.means, sizeof(float)*num_dimensions*num_components,cudaMemcpyHostToDevice));
126 |   CUDA_SAFE_CALL(cudaMemcpy(temp_components.R, components.R, sizeof(float)*num_dimensions*num_dimensions*num_components,cudaMemcpyHostToDevice));
127 |   CUDA_SAFE_CALL(cudaMemcpy(temp_components.Rinv, components.Rinv, sizeof(float)*num_dimensions*num_dimensions*num_components,cudaMemcpyHostToDevice));
128 |   CUDA_SAFE_CALL(cudaMemcpy(d_components,&temp_components,sizeof(components_t),cudaMemcpyHostToDevice));
129 |   CUT_CHECK_ERROR("Copy components from CPU to GPU execution failed: ");
130 | }
131 | 
132 | 
133 | // ======================== Copy component data from GPU to CPU ================
134 | void copy_component_data_GPU_to_CPU(int num_components, int num_dimensions) {
135 |   CUDA_SAFE_CALL(cudaMemcpy(&temp_components, d_components, sizeof(components_t),cudaMemcpyDeviceToHost));
136 |   // copy all of the arrays from the structs
137 |   CUDA_SAFE_CALL(cudaMemcpy(components.N, temp_components.N, sizeof(float)*num_components,cudaMemcpyDeviceToHost));
138 |   CUDA_SAFE_CALL(cudaMemcpy(components.pi, temp_components.pi, sizeof(float)*num_components,cudaMemcpyDeviceToHost));
139 |   CUDA_SAFE_CALL(cudaMemcpy(components.CP, temp_components.CP, sizeof(float)*num_components,cudaMemcpyDeviceToHost));
140 |   CUDA_SAFE_CALL(cudaMemcpy(components.constant, temp_components.constant, sizeof(float)*num_components,cudaMemcpyDeviceToHost));
141 |   CUDA_SAFE_CALL(cudaMemcpy(components.avgvar, temp_components.avgvar, sizeof(float)*num_components,cudaMemcpyDeviceToHost));
142 |   CUDA_SAFE_CALL(cudaMemcpy(components.means, temp_components.means, sizeof(float)*num_dimensions*num_components,cudaMemcpyDeviceToHost));
143 |   CUDA_SAFE_CALL(cudaMemcpy(components.R, temp_components.R, sizeof(float)*num_dimensions*num_dimensions*num_components,cudaMemcpyDeviceToHost));
144 |   CUDA_SAFE_CALL(cudaMemcpy(components.Rinv, temp_components.Rinv, sizeof(float)*num_dimensions*num_dimensions*num_components,cudaMemcpyDeviceToHost));
145 |   CUT_CHECK_ERROR("Copy components from GPU to CPU execution failed: ");
146 | }
147 | 
148 | // ======================== Copy eval data GPU <==> CPU ================
149 | void copy_evals_CPU_to_GPU(int num_events, int num_components) {
150 |   CUDA_SAFE_CALL(cudaMemcpy( d_loglikelihoods, loglikelihoods, sizeof(float)*num_events,cudaMemcpyHostToDevice) );
151 |   CUDA_SAFE_CALL(cudaMemcpy( d_component_memberships, component_memberships, sizeof(float)*num_events*num_components,cudaMemcpyHostToDevice) );
152 |    CUT_CHECK_ERROR("Copy eval data from CPU to GPU execution failed: ");
153 | }
154 | 
155 | void copy_evals_data_GPU_to_CPU(int num_events, int num_components){
156 |   CUDA_SAFE_CALL(cudaMemcpy(component_memberships, d_component_memberships, sizeof(float)*num_events*num_components, cudaMemcpyDeviceToHost));
157 |   CUDA_SAFE_CALL(cudaMemcpy(loglikelihoods, d_loglikelihoods, sizeof(float)*num_events, cudaMemcpyDeviceToHost));
158 |   //  CUDA_SAFE_CALL(cudaMemcpy(likelihoods, d_likelihoods, sizeof(float)*num_events, cudaMemcpyDeviceToHost));
159 |   CUT_CHECK_ERROR("Copy eval data from GPU to CPU execution failed: ");
160 | }
161 | 
162 | // ================== Event data dellocation on GPU  ================= :
163 | void dealloc_events_on_GPU() {
164 |   CUDA_SAFE_CALL(cudaFree(d_fcs_data_by_event));
165 |   CUDA_SAFE_CALL(cudaFree(d_fcs_data_by_dimension));
166 |   CUT_CHECK_ERROR("Dealloc events on GPU failed: ");
167 | }
168 | 
169 | void dealloc_index_list_on_GPU() {
170 |   CUDA_SAFE_CALL(cudaFree(d_index_list));
171 |   CUT_CHECK_ERROR("Dealloc index list on GPU failed: ");
172 | }
173 | 
174 | // ==================== Cluster data deallocation on GPU =================  
175 | void dealloc_components_on_GPU() {
176 |   CUDA_SAFE_CALL(cudaFree(temp_components.N));
177 |   CUDA_SAFE_CALL(cudaFree(temp_components.pi));
178 |   CUDA_SAFE_CALL(cudaFree(temp_components.CP));
179 |   CUDA_SAFE_CALL(cudaFree(temp_components.constant));
180 |   CUDA_SAFE_CALL(cudaFree(temp_components.avgvar));
181 |   CUDA_SAFE_CALL(cudaFree(temp_components.means));
182 |   CUDA_SAFE_CALL(cudaFree(temp_components.R));
183 |   CUDA_SAFE_CALL(cudaFree(temp_components.Rinv));
184 |   CUDA_SAFE_CALL(cudaFree(d_components));
185 |   CUT_CHECK_ERROR("Dealloc components on GPU failed: ");
186 | }
187 | 
188 | // ==================== Eval data deallocation GPU =================  
189 | void dealloc_evals_on_GPU() {
190 |   CUDA_SAFE_CALL(cudaFree(d_component_memberships));
191 |   CUDA_SAFE_CALL(cudaFree(d_loglikelihoods));
192 |   CUT_CHECK_ERROR("Dealloc eval data on GPU failed: ");
193 | }
194 | 
195 | // ==================== Diagnostics =================
196 | 
197 | void print_components(int num_components, int num_dimensions){
198 |   copy_component_data_GPU_to_CPU(num_components,num_dimensions);
199 |   printf("===============\n");
200 |   for(int m = 0; m < num_components; m++){
201 | 	printf("%0.4f ", components.N[m]);
202 |   } printf("\n");
203 |   for(int m = 0; m < num_components; m++){
204 | 	printf("%0.4f ", components.pi[m]);
205 |   } printf("\n");
206 |   for(int m = 0; m < num_components; m++){
207 | 	printf("%0.4f ", components.CP[m]);
208 |   } printf("\n");
209 |   for(int m = 0; m < num_components; m++){
210 | 	printf("%0.4f ", components.constant[m]);
211 |   } printf("\n");
212 |   for(int m = 0; m < num_components; m++){
213 | 	printf("%0.4f ", components.avgvar[m]);
214 |   } printf("\n");
215 |   for(int m = 0; m < num_components; m++){
216 |     for(int d = 0; d < num_dimensions; d++)
217 |         printf("%0.4f ", components.means[m*num_dimensions+d]);
218 |     printf("\n");
219 |   }
220 |     for(int m = 0; m < num_components; m++){
221 |         for(int d = 0; d < num_dimensions; d++)
222 |             for(int d2 = 0; d2 < num_dimensions; d2++)
223 |                 printf("%0.4f ", components.R[m*num_dimensions*num_dimensions+d*num_dimensions+d2]);
224 |         printf("\n");
225 |     }
226 | 
227 |     for(int m = 0; m < num_components; m++){
228 |         for(int d = 0; d < num_dimensions; d++)
229 |             for(int d2 = 0; d2 < num_dimensions; d2++)
230 |                 printf("%0.4f ", components.Rinv[m*num_dimensions*num_dimensions+d*num_dimensions+d2]);
231 |         printf("\n");
232 |     }
233 |   printf("===============\n");
234 | }
235 | 
236 | 


--------------------------------------------------------------------------------
/templates/em_cuda_launch_decl.mako:
--------------------------------------------------------------------------------
 1 | <%
 2 | tempbuff_type_name = 'unsigned int' if supports_float32_atomic_add == '0' else 'float'
 3 | %>
 4 | 
 5 | void seed_components_launch${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_event, components_t* d_components, int num_dimensions, int original_num_components, int num_events);
 6 | //void seed_components_launch(float* d_fcs_data_by_event, components_t* d_components, int num_dimensions, int original_num_components, int num_events);
 7 | void constants_kernel_launch${'_'+'_'.join(param_val_list)}(components_t* d_components, int original_num_components, int num_dimensions);
 8 | void estep1_launch${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_dimension, components_t* d_components, float* component_memberships, int num_dimensions, int num_components, int num_events, float* d_loglikelihoods);
 9 | void estep2_launch${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_dimension, components_t* d_components, float* component_memberships, int num_dimensions, int num_components, int num_events, float* d_likelihoods);
10 | void mstep_N_launch${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_event, components_t* d_components, float* component_memberships, int num_dimensions, int num_components, int num_events);
11 | void mstep_N_launch_idx${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_event, int* d_index_list, int num_indices,components_t* d_components, float* component_memberships, int num_dimensions, int num_components, int num_events);
12 | void mstep_means_launch${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_dimension, components_t* d_components, float* component_memberships, int num_dimensions, int num_components, int num_events);
13 | void mstep_means_launch_idx${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_dimension, int* d_index_list, int num_indices, components_t* d_components, float* component_memberships, int num_dimensions, int num_components, int num_events);
14 | void mstep_covar_launch${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_dimension, float* d_fcs_data_by_event, components_t* d_components, float* component_memberships, int num_dimensions, int num_components, int num_events, ${tempbuff_type_name}* temp_buffer_2b);
15 | void mstep_covar_launch_idx${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_dimension, float* d_fcs_data_by_event,int* d_index_list, int num_indices, components_t* d_components, float* component_memberships, int num_dimensions, int num_components, int num_events, ${tempbuff_type_name}* temp_buffer_2b);
16 | void compute_average_variance_launch${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_event, components_t* d_components, int num_dimensions, int num_components, int num_events);
17 | 


--------------------------------------------------------------------------------
/templates/em_cuda_seed_components.mako:
--------------------------------------------------------------------------------
1 | void em_cuda_seed_components${'_'+'_'.join(param_val_list)} (
2 |                              int num_components, 
3 |                              int num_dimensions, 
4 |                              int num_events ) 
5 | {
6 |   seed_components_launch${'_'+'_'.join(param_val_list)}(d_fcs_data_by_event, d_components, num_dimensions, num_components, num_events);
7 |   cudaThreadSynchronize();
8 | }
9 | 


--------------------------------------------------------------------------------
/templates/em_cuda_train.mako:
--------------------------------------------------------------------------------
  1 | <%
  2 | tempbuff_type_name = 'unsigned int' if supports_float32_atomic_add == '0' else 'float'
  3 | %>
  4 | 
  5 | boost::python::tuple em_cuda_train${'_'+'_'.join(param_val_list)} (
  6 |                              int num_components, 
  7 |                              int num_dimensions, 
  8 |                              int num_events,
  9 |                              int min_iters,
 10 |                              int max_iters) 
 11 | {
 12 | 
 13 |   // ================= Temp buffer for codevar 2b ================ 
 14 |   ${tempbuff_type_name} *temp_buffer_2b = NULL;
 15 | %if covar_version_name.upper() in ['2B','V2B','_V2B']:
 16 |     //scratch space to clear out components->R
 17 |     ${tempbuff_type_name} *zeroR_2b = (${tempbuff_type_name}*) malloc(sizeof(${tempbuff_type_name})*num_dimensions*num_dimensions*num_components);
 18 |     for(int i = 0; i<num_dimensions*num_dimensions*num_components; i++) {
 19 |         zeroR_2b[i] = 0;
 20 |     }
 21 |     CUDA_SAFE_CALL(cudaMalloc((void**) &(temp_buffer_2b),sizeof(${tempbuff_type_name})*num_dimensions*num_dimensions*num_components));
 22 |     CUDA_SAFE_CALL(cudaMemcpy(temp_buffer_2b, zeroR_2b, sizeof(${tempbuff_type_name})*num_dimensions*num_dimensions*num_components, cudaMemcpyHostToDevice) );
 23 | %endif
 24 | 
 25 |   // Computes the R matrix inverses, and the gaussian constant
 26 |   constants_kernel_launch${'_'+'_'.join(param_val_list)}(d_components,num_components,num_dimensions);
 27 |   cudaThreadSynchronize();
 28 |   CUT_CHECK_ERROR("Constants Kernel execution failed: ");
 29 |   //copy_component_data_GPU_to_CPU(num_components, num_dimensions);
 30 |   //print_components(&components, num_components, num_dimensions);
 31 |   //copy_evals_data_GPU_to_CPU(num_events, num_components);
 32 |   //print_evals(component_memberships, loglikelihoods, num_events, num_components);
 33 | 
 34 |   // Compute average variance based on the data
 35 |   compute_average_variance_launch${'_'+'_'.join(param_val_list)}(d_fcs_data_by_event, d_components, num_dimensions, num_components, num_events);
 36 |   
 37 |   // Calculate an epsilon value
 38 |   float epsilon = (1+num_dimensions+0.5*(num_dimensions+1)*num_dimensions)*log((float)num_events*num_dimensions)*0.0001;
 39 |   int iters;
 40 |   float likelihood = -100000;
 41 |   float old_likelihood = likelihood * 10;
 42 |   // Used to hold the result from regroup kernel
 43 |   float* likelihoods = (float*) malloc(sizeof(float)*${num_blocks_estep});
 44 |   float* d_likelihoods;
 45 |   CUDA_SAFE_CALL(cudaMalloc((void**) &d_likelihoods, sizeof(float)*${num_blocks_estep}));
 46 |     
 47 |   /*************** EM ALGORITHM *****************************/
 48 |         
 49 |   //================================== EM INITIALIZE =======================
 50 | 
 51 | 
 52 |   float change = epsilon*2;
 53 | 
 54 |   //================================= EM BEGIN ==================================
 55 |   iters = 0;
 56 | 
 57 |   // This is the iterative loop for the EM algorithm.
 58 |   // It re-estimates parameters, re-computes constants, and then regroups the events
 59 |   // These steps keep repeating until the change in likelihood is less than some epsilon        
 60 | 
 61 |   while(iters < min_iters || (iters < max_iters && fabs(change) > epsilon)) {
 62 |     old_likelihood = likelihood;
 63 |     
 64 |     estep1_launch${'_'+'_'.join(param_val_list)}(d_fcs_data_by_dimension,d_components, d_component_memberships, num_dimensions,num_components,num_events,d_loglikelihoods);
 65 |     estep2_launch${'_'+'_'.join(param_val_list)}(d_fcs_data_by_dimension,d_components, d_component_memberships, num_dimensions,num_components,num_events,d_likelihoods);
 66 |     cudaThreadSynchronize();
 67 |     CUT_CHECK_ERROR("E-step Kernel execution failed");
 68 |     //copy_evals_data_GPU_to_CPU(num_events, num_components);
 69 |     //print_evals(component_memberships, loglikelihoods, num_events, num_components);
 70 | 
 71 |     // Copy the likelihood totals from each block, sum them up to get a total
 72 |     CUDA_SAFE_CALL(cudaMemcpy(likelihoods,d_likelihoods,sizeof(float)*${num_blocks_estep},cudaMemcpyDeviceToHost));
 73 |     likelihood = 0.0;
 74 |     for(int i=0;i<${num_blocks_estep};i++) {
 75 |       likelihood += likelihoods[i]; 
 76 |     }
 77 |     // This kernel computes a new N, pi isn't updated until compute_constants though
 78 |     mstep_N_launch${'_'+'_'.join(param_val_list)}(d_fcs_data_by_event,d_components, d_component_memberships, num_dimensions,num_components,num_events);
 79 |     cudaThreadSynchronize();
 80 | 
 81 |     // This kernel computes new means
 82 |     mstep_means_launch${'_'+'_'.join(param_val_list)}(d_fcs_data_by_dimension,d_components, d_component_memberships, num_dimensions,num_components,num_events);
 83 |     cudaThreadSynchronize();
 84 | 
 85 |             
 86 | %if covar_version_name.upper() in ['2B','V2B','_V2B']:
 87 |       CUDA_SAFE_CALL(cudaMemcpy(temp_buffer_2b, zeroR_2b, sizeof(${tempbuff_type_name})*num_dimensions*num_dimensions*num_components, cudaMemcpyHostToDevice) );
 88 | %endif
 89 | 
 90 |     // Covariance is symmetric, so we only need to compute N*(N+1)/2 matrix elements per component
 91 |     mstep_covar_launch${'_'+'_'.join(param_val_list)}(d_fcs_data_by_dimension,d_fcs_data_by_event,d_components,d_component_memberships,num_dimensions,num_components,num_events,temp_buffer_2b);
 92 |     cudaThreadSynchronize();
 93 |                  
 94 |     CUT_CHECK_ERROR("M-step Kernel execution failed: ");
 95 | 
 96 |     // Inverts the R matrices, computes the constant, normalizes component probabilities
 97 |     constants_kernel_launch${'_'+'_'.join(param_val_list)}(d_components,num_components,num_dimensions);
 98 |     cudaThreadSynchronize();
 99 |     CUT_CHECK_ERROR("Constants Kernel execution failed: ");
100 | 
101 |     // change = (likelihood - old_likelihood)/fabs(old_likelihood);
102 |      change = likelihood - old_likelihood;
103 | 
104 |     iters++;
105 |     
106 |     //copy_component_data_GPU_to_CPU(num_components, num_dimensions);
107 |     //print_components(&components, num_components, num_dimensions);
108 |   }//EM Loop
109 | 
110 |     
111 |     //regroup = E step
112 |     // Compute new component membership probabilities for all the events
113 |   estep1_launch${'_'+'_'.join(param_val_list)}(d_fcs_data_by_dimension,d_components,d_component_memberships, num_dimensions,num_components,num_events,d_loglikelihoods);
114 |     estep2_launch${'_'+'_'.join(param_val_list)}(d_fcs_data_by_dimension,d_components,d_component_memberships, num_dimensions,num_components,num_events,d_likelihoods);
115 |     cudaThreadSynchronize();
116 |     CUT_CHECK_ERROR("E-step Kernel execution failed: ");
117 |         
118 |     CUDA_SAFE_CALL(cudaMemcpy(likelihoods,d_likelihoods,sizeof(float)*${num_blocks_estep},cudaMemcpyDeviceToHost));
119 |     likelihood = 0.0;
120 |     for(int i=0;i<${num_blocks_estep};i++) {
121 |       likelihood += likelihoods[i];
122 |     }
123 | 
124 |   cudaThreadSynchronize();
125 | 
126 |   //================================ EM DONE ==============================
127 | 
128 |   copy_component_data_GPU_to_CPU(num_components, num_dimensions);
129 |   copy_evals_data_GPU_to_CPU(num_events, num_components);
130 |   
131 | %if covar_version_name.upper() in ['2B','V2B','_V2B']:
132 |   free(zeroR_2b);
133 |   CUDA_SAFE_CALL(cudaFree(temp_buffer_2b));
134 | %endif
135 | 
136 |    return boost::python::make_tuple(likelihood, iters);
137 | }
138 | 
139 | 
140 | 


--------------------------------------------------------------------------------
/templates/em_tbb_eval.mako:
--------------------------------------------------------------------------------
 1 | void em_tbb_eval${'_'+'_'.join(param_val_list)} (
 2 |                              int num_components, 
 3 |                              int num_dimensions, 
 4 |                              int num_events) 
 5 | {
 6 |   //TODO: Is this necessary, or can we assume the values are still set?
 7 |   // Computes the R matrix inverses, and the gaussian constant
 8 |   constants${'_'+'_'.join(param_val_list)}(&components,num_components,num_dimensions);
 9 |   estep1${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events,loglikelihoods);
10 | }
11 | 


--------------------------------------------------------------------------------
/templates/em_tbb_helper_funcs.mako:
--------------------------------------------------------------------------------
  1 | #define PI  3.1415926535897931
  2 | #define COVARIANCE_DYNAMIC_RANGE 1E6
  3 | #define MINVALUEFORMINUSLOG -1000.0f
  4 | 
  5 | void mvtmeans(float* data_by_event, int num_dimensions, int num_events, float* means) {
  6 |     for(int d=0; d < num_dimensions; d++) {
  7 |         means[d] = 0.0;
  8 |         for(int n=0; n < num_events; n++) {
  9 |             means[d] += data_by_event[n*num_dimensions+d];
 10 |         }
 11 |         means[d] /= (float) num_events;
 12 |     }
 13 | }
 14 | 
 15 | float log_add(float log_a, float log_b) {
 16 |   if(log_a < log_b) {
 17 |       float tmp = log_a;
 18 |       log_a = log_b;
 19 |       log_b = tmp;
 20 |     }
 21 |   //setting MIN...LOG so small, I don't even need to look
 22 |   return (((log_b - log_a) <= MINVALUEFORMINUSLOG) ? log_a : 
 23 |                 log_a + (float)(logf(1.0 + (double)(expf((double)(log_b - log_a))))));
 24 | }
 25 | 
 26 | void normalize_pi(components_t* components, int num_components) {
 27 |     float total = 0;
 28 |     for(int i=0; i<num_components; i++) {
 29 |         total += components->pi[i];
 30 |     }
 31 |     
 32 |     for(int m=0; m < num_components; m++){
 33 |         components->pi[m] /= total; 
 34 |     }
 35 | }
 36 | 
 37 | static int 
 38 | ludcmp(float *a,int n,int *indx,float *d);
 39 | 
 40 | static void 
 41 | lubksb(float *a,int n,int *indx,float *b);
 42 | 
 43 | /*
 44 |  * Inverts a square matrix (stored as a 1D float array)
 45 |  * 
 46 |  * actualsize - the dimension of the matrix
 47 |  *
 48 |  * written by Mike Dinolfo 12/98
 49 |  * version 1.0
 50 |  */
 51 | void invert_cpu(float* data, int actualsize, float* log_determinant)  {
 52 |     int maxsize = actualsize;
 53 |     int n = actualsize;
 54 |     
 55 |         *log_determinant = 0.0f;
 56 |       // sanity check        
 57 |       if (actualsize == 1) {
 58 |         *log_determinant = logf(data[0]);
 59 |         data[0] = 1.0 / data[0];
 60 |       } else {
 61 | 
 62 |           for (int i=1; i < actualsize; i++) data[i] /= data[0]; // normalize row 0
 63 |           for (int i=1; i < actualsize; i++)  { 
 64 |             for (int j=i; j < actualsize; j++)  { // do a column of L
 65 |               float sum = 0.0f;
 66 |               for (int k = 0; k < i; k++)  
 67 |                   sum += data[j*maxsize+k] * data[k*maxsize+i];
 68 |               data[j*maxsize+i] -= sum;
 69 |               }
 70 |             if (i == actualsize-1) continue;
 71 |             for (int j=i+1; j < actualsize; j++)  {  // do a row of U
 72 |               float sum = 0.0f;
 73 |               for (int k = 0; k < i; k++)
 74 |                   sum += data[i*maxsize+k]*data[k*maxsize+j];
 75 |               data[i*maxsize+j] = 
 76 |                  (data[i*maxsize+j]-sum) / data[i*maxsize+i];
 77 |               }
 78 |             }
 79 |             
 80 |             for(int i=0; i<actualsize; i++) {
 81 |                 *log_determinant += logf(fabs(data[i*n+i]));
 82 |             }
 83 |             
 84 |           for ( int i = 0; i < actualsize; i++ )  // invert L
 85 |             for ( int j = i; j < actualsize; j++ )  {
 86 |               float x = 1.0f;
 87 |               if ( i != j ) {
 88 |                 x = 0.0f;
 89 |                 for ( int k = i; k < j; k++ ) 
 90 |                     x -= data[j*maxsize+k]*data[k*maxsize+i];
 91 |                 }
 92 |               data[j*maxsize+i] = x / data[j*maxsize+j];
 93 |               }
 94 |           for ( int i = 0; i < actualsize; i++ )   // invert U
 95 |             for ( int j = i; j < actualsize; j++ )  {
 96 |               if ( i == j ) continue;
 97 |               float sum = 0.0f;
 98 |               for ( int k = i; k < j; k++ )
 99 |                   sum += data[k*maxsize+j]*( (i==k) ? 1.0 : data[i*maxsize+k] );
100 |               data[i*maxsize+j] = -sum;
101 |               }
102 |           for ( int i = 0; i < actualsize; i++ )   // final inversion
103 |             for ( int j = 0; j < actualsize; j++ )  {
104 |               float sum = 0.0f;
105 |               for ( int k = ((i>j)?i:j); k < actualsize; k++ )  
106 |                   sum += ((j==k)?1.0:data[j*maxsize+k])*data[k*maxsize+i];
107 |               data[j*maxsize+i] = sum;
108 |               }
109 |         }
110 | }
111 | 
112 | 
113 | /*
114 |  * Another matrix inversion function
115 |  * This was modified from the 'component' application by Charles A. Bouman
116 |  */
117 | int invert_matrix(float* a, int n, float* determinant) {
118 |   int  i,j,f,g;
119 |    
120 |   float* y = (float*) malloc(sizeof(float)*n*n);
121 |   float* col = (float*) malloc(sizeof(float)*n);
122 |   int* indx = (int*) malloc(sizeof(int)*n);
123 |   /*
124 |     printf("\n\nR matrix before LU decomposition:\n");
125 |     for(i=0; i<n; i++) {
126 |     for(j=0; j<n; j++) {
127 |     printf("%.2f ",a[i*n+j]);
128 |     }
129 |     printf("\n");
130 |     }*/
131 | 
132 |   *determinant = 0.0;
133 |   if(ludcmp(a,n,indx,determinant)) {
134 |     printf("Determinant mantissa after LU decomposition: %f\n",*determinant);
135 |     printf("\n\nR matrix after LU decomposition:\n");
136 |     for(i=0; i<n; i++) {
137 |       for(j=0; j<n; j++) {
138 |         printf("%.2f ",a[i*n+j]);
139 |       }
140 |       printf("\n");
141 |     }
142 |        
143 |     for(j=0; j<n; j++) {
144 |       *determinant *= a[j*n+j];
145 |     }
146 |      
147 |     printf("determinant: %E\n",*determinant);
148 |      
149 |     for(j=0; j<n; j++) {
150 |       for(i=0; i<n; i++) col[i]=0.0;
151 |       col[j]=1.0;
152 |       lubksb(a,n,indx,col);
153 |       for(i=0; i<n; i++) y[i*n+j]=col[i];
154 |     }
155 | 
156 |     for(i=0; i<n; i++)
157 |       for(j=0; j<n; j++) a[i*n+j]=y[i*n+j];
158 |      
159 |     printf("\n\nMatrix at end of clust_invert function:\n");
160 |     for(f=0; f<n; f++) {
161 |       for(g=0; g<n; g++) {
162 |         printf("%.2f ",a[f*n+g]);
163 |       }
164 |       printf("\n");
165 |     }
166 |     free(y);
167 |     free(col);
168 |     free(indx);
169 |     return(1);
170 |   }
171 |   else {
172 |     *determinant = 0.0;
173 |     free(y);
174 |     free(col);
175 |     free(indx);
176 |     return(0);
177 |   }
178 | }
179 | 
180 | #define TINY 1.0e-20
181 | 
182 | static int
183 | ludcmp(float *a,int n,int *indx,float *d)
184 | {
185 |   int i,imax=0,j,k;
186 |   float big,dum,sum,temp;
187 |   float *vv;
188 | 
189 |   vv= (float*) malloc(sizeof(float)*n);
190 |    
191 |   *d=1.0;
192 |    
193 |   for (i=0;i<n;i++)
194 |     {
195 |       big=0.0;
196 |       for (j=0;j<n;j++)
197 |         if ((temp=fabsf(a[i*n+j])) > big)
198 |           big=temp;
199 |       if (big == 0.0)
200 |         return 0; /* Singular matrix  */
201 |       vv[i]=1.0/big;
202 |     }
203 |        
204 |    
205 |   for (j=0;j<n;j++)
206 |     {  
207 |       for (i=0;i<j;i++)
208 |         {
209 |           sum=a[i*n+j];
210 |           for (k=0;k<i;k++)
211 |             sum -= a[i*n+k]*a[k*n+j];
212 |           a[i*n+j]=sum;
213 |         }
214 |        
215 |       /*
216 |         int f,g;
217 |         printf("\n\nMatrix After Step 1:\n");
218 |         for(f=0; f<n; f++) {
219 |         for(g=0; g<n; g++) {
220 |         printf("%.2f ",a[f*n+g]);
221 |         }
222 |         printf("\n");
223 |         }*/
224 |        
225 |       big=0.0;
226 |       dum=0.0;
227 |       for (i=j;i<n;i++)
228 |         {
229 |           sum=a[i*n+j];
230 |           for (k=0;k<j;k++)
231 |             sum -= a[i*n+k]*a[k*n+j];
232 |           a[i*n+j]=sum;
233 |           dum=vv[i]*fabsf(sum);
234 |           //printf("sum: %f, dum: %f, big: %f\n",sum,dum,big);
235 |           //printf("dum-big: %E\n",fabs(dum-big));
236 |           if ( (dum-big) >= 0.0 || fabs(dum-big) < 1e-3)
237 |             {
238 |               big=dum;
239 |               imax=i;
240 |               //printf("imax: %d\n",imax);
241 |             }
242 |         }
243 |        
244 |       if (j != imax)
245 |         {
246 |           for (k=0;k<n;k++)
247 |             {
248 |               dum=a[imax*n+k];
249 |               a[imax*n+k]=a[j*n+k];
250 |               a[j*n+k]=dum;
251 |             }
252 |           *d = -(*d);
253 |           vv[imax]=vv[j];
254 |         }
255 |       indx[j]=imax;
256 |        
257 |       /*
258 |         printf("\n\nMatrix after %dth iteration of LU decomposition:\n",j);
259 |         for(f=0; f<n; f++) {
260 |         for(g=0; g<n; g++) {
261 |         printf("%.2f ",a[f][g]);
262 |         }
263 |         printf("\n");
264 |         }
265 |         printf("imax: %d\n",imax);
266 |       */
267 | 
268 | 
269 |       /* Change made 3/27/98 for robustness */
270 |       if ( (a[j*n+j]>=0)&&(a[j*n+j]<TINY) ) a[j*n+j]= TINY;
271 |       if ( (a[j*n+j]<0)&&(a[j*n+j]>-TINY) ) a[j*n+j]= -TINY;
272 | 
273 |       if (j != n-1)
274 |         {
275 |           dum=1.0/(a[j*n+j]);
276 |           for (i=j+1;i<n;i++)
277 |             a[i*n+j] *= dum;
278 |         }
279 |     }
280 |   free(vv);
281 |   return(1);
282 | }
283 | 
284 | #undef TINY
285 | 
286 | static void
287 | lubksb(float *a,int n,int *indx,float *b)
288 | {
289 |   int i,ii,ip,j;
290 |   float sum;
291 | 
292 |   ii = -1;
293 |   for (i=0;i<n;i++)
294 |     {
295 |       ip=indx[i];
296 |       sum=b[ip];
297 |       b[ip]=b[i];
298 |       if (ii >= 0)
299 |         for (j=ii;j<i;j++)
300 |           sum -= a[i*n+j]*b[j];
301 |       else if (sum)
302 |         ii=i;
303 |       b[i]=sum;
304 |     }
305 |   for (i=n-1;i>=0;i--)
306 |     {
307 |       sum=b[i];
308 |       for (j=i+1;j<n;j++)
309 |         sum -= a[i*n+j]*b[j];
310 |       b[i]=sum/a[i*n+i];
311 |     }
312 | }
313 | 


--------------------------------------------------------------------------------
/templates/em_tbb_kernel_decl.mako:
--------------------------------------------------------------------------------
 1 | 
 2 | void seed_components${'_'+'_'.join(param_val_list)}(float *data, components_t* components, int D, int M, int N);
 3 | void constants${'_'+'_'.join(param_val_list)}(components_t* components, int M, int D);
 4 | void estep1${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N, float* loglikelihoods);
 5 | void estep2${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N, float* likelihood);
 6 | void mstep_n${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N);
 7 | void mstep_n_idx${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_event, int* d_index_list, int num_indices,components_t* d_components, float* component_memberships, int num_dimensions, int num_components, int num_events);
 8 | void mstep_mean${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N);
 9 | void mstep_mean_idx${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_dimension, int* d_index_list, int num_indices, components_t* d_components, float* component_memberships, int num_dimensions, int num_components, int num_events);
10 | void mstep_covar${'_'+'_'.join(param_val_list)}(float* data, components_t* components,float* component_memberships, int D, int M, int N);
11 | void mstep_covar_idx${'_'+'_'.join(param_val_list)}(float* d_fcs_data_by_dimension, float* d_fcs_data_by_event,int* d_index_list, int num_indices, components_t* d_components, float* component_memberships, int num_dimensions, int num_components, int num_events);
12 | 


--------------------------------------------------------------------------------
/templates/em_tbb_kernels.mako:
--------------------------------------------------------------------------------
  1 | using namespace tbb;
  2 | 
  3 | class TBB_seed_covars${'_'+'_'.join(param_val_list)} {
  4 |     components_t* components;
  5 |     float* fcs_data;
  6 |     float* means;
  7 |     int num_dimensions;
  8 |     int num_events;
  9 |     float* avgvar;
 10 |     int num_components;
 11 |   public:
 12 |     TBB_seed_covars${'_'+'_'.join(param_val_list)}(components_t* _components, float* _fcs_data, float* _means, int _num_dimensions, int _num_events, float* _avgvar, int _num_components) : components(_components), fcs_data(_fcs_data), means(_means), num_dimensions(_num_dimensions), num_events(_num_events), avgvar(_avgvar), num_components(_num_components) { }
 13 | 
 14 |     void operator() ( const blocked_range<int>& r ) const {
 15 |         for(int i=r.begin(); i != r.end(); i++) {
 16 |           int row = (i) / num_dimensions;
 17 |           int col = (i) % num_dimensions;
 18 |           components->R[row*num_dimensions+col] = 0.0f;
 19 |           for(int j=0; j < num_events; j++) {
 20 |             if(row==col) {
 21 |               components->R[row*num_dimensions+col] += (fcs_data[j*num_dimensions + row])*(fcs_data[j*num_dimensions + row]);
 22 |             }
 23 |           }
 24 |           if(row==col) {
 25 |             components->R[row*num_dimensions+col] /= (float) (num_events -1);
 26 |             components->R[row*num_dimensions+col] -= ((float)(num_events)*means[row]*means[row]) / (float)(num_events-1);
 27 |             components->R[row*num_dimensions+col] /= (float)num_components;
 28 |           }
 29 |         }
 30 |     }
 31 | };
 32 | 
 33 | void seed_covars${'_'+'_'.join(param_val_list)}(components_t* components, float* fcs_data, float* means, int num_dimensions, int num_events, float* avgvar, int num_components) {
 34 |     parallel_for(blocked_range<int>(0, num_dimensions*num_dimensions),
 35 |         TBB_seed_covars${'_'+'_'.join(param_val_list)}( components, fcs_data, means, num_dimensions, num_events, avgvar, num_components) );
 36 | }
 37 | 
 38 | class TBB_average_variance${'_'+'_'.join(param_val_list)} {
 39 |     float* fcs_data;
 40 |     float* means; 
 41 |     int num_dimensions;
 42 |     int num_events;
 43 |     float* avgvar;
 44 |   public:
 45 |     float total;
 46 | 
 47 |     TBB_average_variance${'_'+'_'.join(param_val_list)} (TBB_average_variance${'_'+'_'.join(param_val_list)}& x, split) :
 48 |         fcs_data(x.fcs_data), means(x.means), num_dimensions(x.num_dimensions), num_events(x.num_events), avgvar(x.avgvar), total(0.0f) { }
 49 | 
 50 |     TBB_average_variance${'_'+'_'.join(param_val_list)} (float* _fcs_data, float* _means, int _num_dimensions, int _num_events, float* _avgvar) : 
 51 |         fcs_data(_fcs_data), means(_means), num_dimensions(_num_dimensions), num_events(_num_events), avgvar(_avgvar), total(0.0f) { }
 52 | 
 53 |     void join( const TBB_average_variance${'_'+'_'.join(param_val_list)}& y ) {total += y.total;}
 54 |     
 55 |     void operator()( const blocked_range<int>& r ) {
 56 |         // Compute average variance for each dimension
 57 |         for(int i = r.begin(); i != r.end(); ++i) {
 58 |             float variance = 0.0f;
 59 |             for(int j=0; j < num_events; j++) {
 60 |                 variance += fcs_data[j*num_dimensions + i]*fcs_data[j*num_dimensions + i];
 61 |             }
 62 |             variance /= (float) num_events;
 63 |             variance -= means[i]*means[i];
 64 |             total += variance;
 65 |         }
 66 |     }
 67 | };
 68 | 
 69 | void average_variance${'_'+'_'.join(param_val_list)}(float* fcs_data, float* means, int num_dimensions, int num_events, float* avgvar) {
 70 |     TBB_average_variance${'_'+'_'.join(param_val_list)} numerator(fcs_data, means, num_dimensions, num_events, avgvar);
 71 |     parallel_reduce( blocked_range<int>(0,num_dimensions), numerator );
 72 |     *avgvar = numerator.total / (float) num_dimensions;
 73 | }
 74 | 
 75 | 
 76 | void constants${'_'+'_'.join(param_val_list)}(components_t* components, int M, int D) {
 77 |     float log_determinant;
 78 |     float* matrix = (float*) malloc(sizeof(float)*D*D);
 79 | 
 80 |     //float sum = 0.0;
 81 |     for(int m=0; m < M; m++) {
 82 |         // Invert covariance matrix
 83 |         memcpy(matrix,&(components->R[m*D*D]),sizeof(float)*D*D);
 84 |         invert_cpu(matrix,D,&log_determinant);
 85 |         memcpy(&(components->Rinv[m*D*D]),matrix,sizeof(float)*D*D);
 86 |     
 87 |         // Compute constant
 88 |         components->constant[m] = -D*0.5f*logf(2*PI) - 0.5f*log_determinant;
 89 |         components->CP[m] = components->constant[m]*2.0;
 90 |     }
 91 |     normalize_pi(components, M);
 92 |     free(matrix);
 93 | }
 94 | 
 95 | void seed_components${'_'+'_'.join(param_val_list)}(float *data_by_event, components_t* components, int num_dimensions, int num_components, int num_events) {
 96 |     float* means = (float*) malloc(sizeof(float)*num_dimensions);
 97 |     float avgvar;
 98 | 
 99 |     // Compute means
100 |     mvtmeans(data_by_event, num_dimensions, num_events, means);
101 | 
102 |     // Compute the average variance
103 |     seed_covars${'_'+'_'.join(param_val_list)}(components, data_by_event, means, num_dimensions, num_events, &avgvar, num_components);
104 |     average_variance${'_'+'_'.join(param_val_list)}(data_by_event, means, num_dimensions, num_events, &avgvar);    
105 |     float seed;
106 |     if(num_components > 1) {
107 |        seed = (num_events)/(num_components);
108 |     } else {
109 |        seed = 0.0f;
110 |     }
111 | 
112 |     memcpy(components->means, means, sizeof(float)*num_dimensions);
113 | 
114 |     for(int c=1; c < num_components; c++) {
115 |         memcpy(&components->means[c*num_dimensions], &data_by_event[((int)(c*seed))*num_dimensions], sizeof(float)*num_dimensions);
116 |           
117 |         for(int i=0; i < num_dimensions*num_dimensions; i++) {
118 |           components->R[c*num_dimensions*num_dimensions+i] = components->R[i];
119 |           components->Rinv[c*num_dimensions*num_dimensions+i] = 0.0f;
120 |         }
121 |     }
122 | 
123 |     //compute pi, N
124 |     for(int c =0; c<num_components; c++) {
125 |         components->pi[c] = 1.0f/((float)num_components);
126 |         components->N[c] = ((float) num_events) / ((float)num_components);
127 |         components->avgvar[c] = avgvar / COVARIANCE_DYNAMIC_RANGE;
128 |     }
129 | 
130 |     free(means);
131 | }
132 | 
133 | void compute_average_variance${'_'+'_'.join(param_val_list)}( float* fcs_data, components_t* components, int num_dimensions, int num_components, int num_events)
134 | {
135 |     float* means = (float*) malloc(sizeof(float)*num_dimensions);
136 |     float avgvar;
137 |     
138 |     // Compute the means
139 |     mvtmeans(fcs_data, num_dimensions, num_events, means);
140 |    
141 |     average_variance${'_'+'_'.join(param_val_list)}(fcs_data, means, num_dimensions, num_events, &avgvar);    
142 |     
143 |     for(int c =0; c<num_components; c++) {
144 |         components->avgvar[c] = avgvar / COVARIANCE_DYNAMIC_RANGE;
145 |     }
146 | }
147 | 
148 | class TBB_estep1${'_'+'_'.join(param_val_list)} {
149 |     float* data;
150 |     components_t* components;
151 |     float* component_memberships;
152 |     int D; 
153 |     int M;
154 |     int N;
155 |     float* loglikelihoods;
156 |   public:
157 |     TBB_estep1${'_'+'_'.join(param_val_list)}(float* _data, components_t* _components, float* _component_memberships, int _D, int _M, int _N, float* _loglikelihoods): 
158 |         data(_data), components(_components), component_memberships(_component_memberships), D(_D), M(_M), N(_N), loglikelihoods(_loglikelihoods) { }
159 | 
160 |     void operator() ( const blocked_range<int>& r ) const {
161 |         for(int m = r.begin(); m != r.end(); ++m) {
162 |             // Compute likelihood for every data point in each component
163 |             float component_pi = components->pi[m];
164 |             float component_constant = components->constant[m];
165 |             float* means = &(components->means[m*D]);
166 |             float* Rinv = &(components->Rinv[m*D*D]);
167 |             for(int n=0; n < N; n++) {
168 |                 float like = 0.0;
169 | %if cvtype == 'diag':
170 |                 for(int i=0; i < D; i++) {
171 |                     like += (data[i*N+n]-means[i])*(data[i*N+n]-means[i])*Rinv[i*D+i];
172 |                 }
173 | %else:
174 |                 for(int i=0; i < D; i++) {
175 |                     for(int j=0; j < D; j++) {
176 |                         like += (data[i*N+n]-means[i])*(data[j*N+n]-means[j])*Rinv[i*D+j];
177 |                     }
178 |                 }
179 | %endif
180 |                 component_memberships[m*N+n] = (component_pi > 0.0f) ? -0.5*like + component_constant + logf(component_pi) : MINVALUEFORMINUSLOG;
181 |             }
182 |         }
183 |     }
184 | };
185 |     
186 | 
187 | void estep1${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N, float* loglikelihoods) {
188 |     parallel_for(blocked_range<int>(0, M),
189 |         TBB_estep1${'_'+'_'.join(param_val_list)}(data, components, component_memberships, D, M, N, loglikelihoods));
190 |     //estep1 log_add()
191 |     for(int n=0; n < N; n++) {
192 |         float finalloglike = MINVALUEFORMINUSLOG;
193 |         for(int m=0; m < M; m++) {
194 |             finalloglike = log_add(finalloglike, component_memberships[m*N+n]);
195 |         }
196 |         loglikelihoods[n] = finalloglike;
197 |     }
198 | }
199 | 
200 | float estep2_events${'_'+'_'.join(param_val_list)}(components_t* components, float* component_memberships, int M, int n, int N) {
201 | 	// Finding maximum likelihood for this data point
202 |         float temp = 0.0f;
203 |         float thread_likelihood = 0.0f;
204 | 	float max_likelihood;
205 | 	float denominator_sum = 0.0f;
206 | 
207 | 	//max_likelihood = __sec_reduce_max(component_memberships[n:M:N]);
208 |         max_likelihood = component_memberships[n];
209 |         for(int m = 1; m < M; m++)
210 |             max_likelihood =
211 |                   fmaxf(max_likelihood,component_memberships[m*N+n]);
212 | 
213 | 	// Computes sum of all likelihoods for this event
214 | 	for(int m=0; m < M; m++) {
215 |             temp = expf(component_memberships[m*N+n] - max_likelihood);
216 |             denominator_sum += temp;
217 | 	}
218 | 	temp = max_likelihood + logf(denominator_sum);
219 |         thread_likelihood += temp;
220 | 
221 | 	// Divide by denominator to get each membership
222 | 	for(int m=0; m < M; m++) {
223 | 	    component_memberships[m*N+n] = expf(component_memberships[m*N+n] - temp);
224 | 	}
225 |         //or component_memberships[n:M:N] = exp(component_memberships[n:M:N] - denominator_sum);
226 | 
227 | 	return thread_likelihood;
228 | }
229 | 
230 | class TBB_estep2${'_'+'_'.join(param_val_list)} {
231 |     float* data;
232 |     components_t* components;
233 |     float* component_memberships;
234 |     int D; 
235 |     int M;
236 |     int N;
237 |     float* likelihood;
238 |   public:
239 |     float total;
240 |     
241 |     TBB_estep2${'_'+'_'.join(param_val_list)} (TBB_estep2${'_'+'_'.join(param_val_list)}& x, split) : data(x.data), components(x.components), component_memberships(x.component_memberships), D(x.D), M(x.M), N(x.N), likelihood(x.likelihood), total(0.0f) { }
242 | 
243 |     TBB_estep2${'_'+'_'.join(param_val_list)} (float* _data, components_t* _components, float* _component_memberships, int _D, int _M, int _N, float* _likelihood) : data(_data), components(_components), component_memberships(_component_memberships), D(_D), M(_M), N(_N), likelihood(_likelihood), total(0.0f) { }
244 | 
245 |     void join( const TBB_estep2${'_'+'_'.join(param_val_list)}& y) {total += y.total;}
246 | 
247 |     void operator()( const blocked_range<int>& r ) {
248 |         for(int n = r.begin(); n != r.end(); ++n) {
249 |             total += estep2_events${'_'+'_'.join(param_val_list)}(components, component_memberships, M, n, N);
250 |         }
251 |     }
252 | };
253 | 
254 | void estep2${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N, float* likelihood) {
255 |     TBB_estep2${'_'+'_'.join(param_val_list)} e2(data, components, component_memberships, D, M, N, likelihood);
256 |     parallel_reduce( blocked_range<int>(0, N), e2);
257 |     *likelihood = e2.total;
258 | }
259 | 
260 | class TBB_mstep_mean${'_'+'_'.join(param_val_list)} {
261 |     float* data;
262 |     components_t* components;
263 |     float* component_memberships;
264 |     int D; 
265 |     int M;
266 |     int N;
267 |   public:
268 |     TBB_mstep_mean${'_'+'_'.join(param_val_list)}(float* _data, components_t* _components, float* _component_memberships, int _D, int _M, int _N): data(_data), components(_components), component_memberships(_component_memberships), D(_D), M(_M), N(_N) { }
269 | 
270 |     void operator() ( const blocked_range<int>& r ) const {
271 |         for(int m=r.begin(); m != r.end(); m++) {
272 |             for(int d=0; d < D; d++) {
273 |                 components->means[m*D+d] = 0.0;
274 |                 for(int n=0; n < N; n++) {
275 |                     components->means[m*D+d] += data[d*N+n]*component_memberships[m*N+n];
276 |                 }
277 |                 components->means[m*D+d] /= components->N[m];
278 |             }
279 |         }
280 |     }
281 | };
282 | 
283 | void mstep_mean${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N) {
284 |     parallel_for(blocked_range<int>(0, M),
285 |         TBB_mstep_mean${'_'+'_'.join(param_val_list)}( data, components, component_memberships, D, M, N));
286 | }
287 | 
288 | class TBB_mstep_mean_idx${'_'+'_'.join(param_val_list)} {
289 |     float* data_by_dimension;
290 |     int* indices;
291 |     int num_indices;
292 |     components_t* components;
293 |     float* component_memberships;
294 |     int D; 
295 |     int M;
296 |     int N;
297 |   public:
298 |     TBB_mstep_mean_idx${'_'+'_'.join(param_val_list)}(float* _data_by_dimension, int* _indices, int _num_indices, components_t* _components, float* _component_memberships, int _D, int _M, int _N): data_by_dimension(_data_by_dimension), indices(_indices), num_indices(_num_indices), components(_components), component_memberships(_component_memberships), D(_D), M(_M), N(_N) { }
299 | 
300 |     void operator() ( const blocked_range<int>& r ) const {
301 |         for(int m=r.begin(); m != r.end(); m++) {
302 |             for(int d=0; d < D; d++) {
303 |                 components->means[m*D+d] = 0.0;
304 |                 for(int index = 0; index < num_indices; index++) {
305 |                     int n = indices[index];
306 |                     components->means[m*D+d] += data_by_dimension[d*N+n]*component_memberships[m*N+n];
307 |                 }
308 |                 components->means[m*D+d] /= components->N[m];
309 |             }
310 |         }
311 |     }
312 | };
313 | 
314 | void mstep_mean_idx${'_'+'_'.join(param_val_list)}(float* data_by_dimension, int* indices, int num_indices, components_t* components, float* component_memberships, int D, int M, int N) {
315 |     parallel_for(blocked_range<int>(0, M),
316 |         TBB_mstep_mean_idx${'_'+'_'.join(param_val_list)}( data_by_dimension, indices, num_indices, components, component_memberships, D, M, N));
317 | }
318 | 
319 | class TBB_mstep_n${'_'+'_'.join(param_val_list)} {
320 |     float* data;
321 |     components_t* components;
322 |     float* component_memberships;
323 |     int D; 
324 |     int M;
325 |     int N;
326 |   public:
327 |     TBB_mstep_n${'_'+'_'.join(param_val_list)}(float* _data, components_t* _components, float* _component_memberships, int _D, int _M, int _N): data(_data), components(_components), component_memberships(_component_memberships), D(_D), M(_M), N(_N) { }
328 | 
329 |     void operator() ( const blocked_range<int>& r ) const {
330 |         for(int m=r.begin(); m != r.end(); m++) {
331 |             components->N[m] = 0.0;
332 |             for(int n=0; n < N; n++) {
333 |                 components->N[m] += component_memberships[m*N+n];
334 |             }
335 |             components->pi[m] =  components->N[m];
336 |         }
337 |     }
338 | };
339 | 
340 | void mstep_n${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N) {
341 |     parallel_for(blocked_range<int>(0, M),
342 |         TBB_mstep_n${'_'+'_'.join(param_val_list)}( data, components, component_memberships, D, M, N));
343 | }
344 | 
345 | class TBB_mstep_n_idx${'_'+'_'.join(param_val_list)} {
346 |     float* data_by_dimension;
347 |     int* indices;
348 |     int num_indices;
349 |     components_t* components;
350 |     float* component_memberships;
351 |     int D; 
352 |     int M;
353 |     int N;
354 |   public:
355 |     TBB_mstep_n_idx${'_'+'_'.join(param_val_list)}(float* _data_by_dimension, int* _indices, int _num_indices, components_t* _components, float* _component_memberships, int _D, int _M, int _N): data_by_dimension(_data_by_dimension), indices(_indices), num_indices(_num_indices), components(_components), component_memberships(_component_memberships), D(_D), M(_M), N(_N) { }
356 | 
357 |     void operator() ( const blocked_range<int>& r ) const {
358 |         for(int m=r.begin(); m != r.end(); m++) {
359 |             components->N[m] = 0.0;
360 |             for(int index=0; index < num_indices; index++) {
361 |                 int n = indices[index];
362 |                 components->N[m] += component_memberships[m*N+n];
363 |             }
364 |             components->pi[m] =  components->N[m];
365 |         }
366 |     }
367 | };
368 | 
369 | void mstep_n_idx${'_'+'_'.join(param_val_list)}(float* data_by_dimension, int* indices, int num_indices, components_t* components, float* component_memberships, int D, int M, int N) {
370 |     parallel_for(blocked_range<int>(0, M),
371 |         TBB_mstep_n_idx${'_'+'_'.join(param_val_list)}( data_by_dimension, indices, num_indices, components, component_memberships, D, M, N));
372 | }
373 | 
374 | class TBB_mstep_covar${'_'+'_'.join(param_val_list)} {
375 |     float* data;
376 |     components_t* components;
377 |     float* component_memberships;
378 |     int D; 
379 |     int M;
380 |     int N;
381 |   public:
382 |     TBB_mstep_covar${'_'+'_'.join(param_val_list)}(float* _data, components_t* _components, float* _component_memberships, int _D, int _M, int _N): data(_data), components(_components), component_memberships(_component_memberships), D(_D), M(_M), N(_N) { }
383 | 
384 |     void operator() ( const blocked_range<int>& r ) const {
385 |         for(int m=r.begin(); m != r.end(); m++) {
386 |             float* means = &(components->means[m*D]);
387 |             for(int i=0; i < D; i++) {
388 |                 for(int j=0; j <= i; j++) {
389 |     %if cvtype == 'diag':
390 |                     if(i != j) {
391 |                         components->R[m*D*D+i*D+j] = 0.0f;
392 |                         components->R[m*D*D+j*D+i] = 0.0f;
393 |                         continue;
394 |                     }
395 |     %endif
396 |                     float sum = 0.0;
397 |                     for(int n=0; n < N; n++) {
398 |                         sum += (data[i*N+n]-means[i])*(data[j*N+n]-means[j])*component_memberships[m*N+n];
399 |                     }
400 | 
401 |                     if(components->N[m] >= 1.0f) {
402 |                         components->R[m*D*D+i*D+j] = sum / components->N[m];
403 |                         components->R[m*D*D+j*D+i] = sum / components->N[m];
404 |                     } else {
405 |                         components->R[m*D*D+i*D+j] = 0.0f;
406 |                         components->R[m*D*D+j*D+i] = 0.0f;
407 |                     }
408 |                     if(i == j) {
409 |                         components->R[m*D*D+j*D+i] += components->avgvar[m];
410 |                     }
411 |                 }
412 |             }
413 |         }
414 |     }
415 | };
416 | 
417 | void mstep_covar${'_'+'_'.join(param_val_list)}(float* data, components_t* components, float* component_memberships, int D, int M, int N) {
418 |     parallel_for(blocked_range<int>(0, M),
419 |         TBB_mstep_covar${'_'+'_'.join(param_val_list)}( data, components, component_memberships, D, M, N));
420 | }
421 | 
422 | class TBB_mstep_covar_idx${'_'+'_'.join(param_val_list)} {
423 |     float* data_by_dimension;
424 |     int* indices;
425 |     int num_indices;
426 |     components_t* components;
427 |     float* component_memberships;
428 |     int D; 
429 |     int M;
430 |     int N;
431 |   public:
432 |     TBB_mstep_covar_idx${'_'+'_'.join(param_val_list)}(float* _data_by_dimension, int* _indices, int _num_indices, components_t* _components, float* _component_memberships, int _D, int _M, int _N): data_by_dimension(_data_by_dimension), indices(_indices), num_indices(_num_indices), components(_components), component_memberships(_component_memberships), D(_D), M(_M), N(_N) { }
433 | 
434 |     void operator() ( const blocked_range<int>& r ) const {
435 |         for(int m=r.begin(); m != r.end(); m++) {
436 |             float* means = &(components->means[m*D]);
437 |             for(int i=0; i < D; i++) {
438 |                 for(int j=0; j <= i; j++) {
439 |     %if cvtype == 'diag':
440 |                     if(i != j) {
441 |                         components->R[m*D*D+i*D+j] = 0.0f;
442 |                         components->R[m*D*D+j*D+i] = 0.0f;
443 |                         continue;
444 |                     }
445 |     %endif
446 |                     float sum = 0.0;
447 |                     for(int index=0; index < num_indices; index++) {
448 |                         int n = indices[index];
449 |                         sum += (data_by_dimension[i*N+n]-means[i])*(data_by_dimension[j*N+n]-means[j])*component_memberships[m*N+n];
450 |                     }
451 | 
452 |                     if(components->N[m] >= 1.0f) {
453 |                         components->R[m*D*D+i*D+j] = sum / components->N[m];
454 |                         components->R[m*D*D+j*D+i] = sum / components->N[m];
455 |                     } else {
456 |                         components->R[m*D*D+i*D+j] = 0.0f;
457 |                         components->R[m*D*D+j*D+i] = 0.0f;
458 |                     }
459 |                     if(i == j) {
460 |                         components->R[m*D*D+j*D+i] += components->avgvar[m];
461 |                     }
462 |                 }
463 |             }
464 |         }
465 |     }
466 | };
467 | 
468 | void mstep_covar_idx${'_'+'_'.join(param_val_list)}(float* data_by_dimension, float* data_by_event, int* indices, int num_indices, components_t* components, float* component_memberships, int D, int M, int N) {
469 |     parallel_for(blocked_range<int>(0, M),
470 |         TBB_mstep_covar_idx${'_'+'_'.join(param_val_list)}( data_by_dimension, indices, num_indices, components, component_memberships, D, M, N));
471 | }
472 | 
473 | 


--------------------------------------------------------------------------------
/templates/em_tbb_seed_components.mako:
--------------------------------------------------------------------------------
1 | void em_tbb_seed_components${'_'+'_'.join(param_val_list)} (
2 |                              int num_components, 
3 |                              int num_dimensions, 
4 |                              int num_events ) 
5 | {
6 |   seed_components${'_'+'_'.join(param_val_list)}(fcs_data_by_event, &components, num_dimensions, num_components, num_events);
7 | }
8 | 


--------------------------------------------------------------------------------
/templates/em_tbb_train.mako:
--------------------------------------------------------------------------------
 1 | 
 2 | boost::python::tuple em_tbb_train${'_'+'_'.join(param_val_list)} (
 3 |                              int num_components, 
 4 |                              int num_dimensions, 
 5 |                              int num_events,
 6 |                              int min_iters,
 7 |                              int max_iters) 
 8 | {
 9 |     
10 |     // Computes the R matrix inverses, and the gaussian constant
11 |     constants${'_'+'_'.join(param_val_list)}(&components,num_components,num_dimensions);
12 |     // Compute average variance based on the data
13 |     compute_average_variance${'_'+'_'.join(param_val_list)}(fcs_data_by_event, &components, num_dimensions, num_components, num_events);
14 | 
15 |     // Calculate an epsilon value
16 |     //int ndata_points = num_events*num_dimensions;
17 |     float epsilon = (1+num_dimensions+0.5*(num_dimensions+1)*num_dimensions)*log((float)num_events*num_dimensions)*0.0001;
18 |     int iters;
19 |     float likelihood = -100000;
20 |     float old_likelihood = likelihood * 10;
21 |     
22 |     float change = epsilon*2;
23 |     
24 |     iters = 0;
25 |     // This is the iterative loop for the EM algorithm.
26 |     // It re-estimates parameters, re-computes constants, and then regroups the events
27 |     // These steps keep repeating until the change in likelihood is less than some epsilon        
28 |     while(iters < min_iters || (fabs(change) > epsilon && iters < max_iters)) {
29 |         old_likelihood = likelihood;
30 | 
31 |         estep1${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events,loglikelihoods);
32 |         estep2${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events,&likelihood);
33 |         
34 |         // This kernel computes a new N, pi isn't updated until compute_constants though
35 |         mstep_n${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events);
36 |         mstep_mean${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events);
37 |         mstep_covar${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events);
38 | 
39 |         
40 |         // Inverts the R matrices, computes the constant, normalizes cluster probabilities
41 |         constants${'_'+'_'.join(param_val_list)}(&components,num_components,num_dimensions);
42 |         change = likelihood - old_likelihood;
43 |         iters++;
44 |     }
45 | 
46 |     estep1${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events,loglikelihoods);
47 |     estep2${'_'+'_'.join(param_val_list)}(fcs_data_by_dimension,&components,component_memberships,num_dimensions,num_components,num_events,&likelihood);
48 |     
49 |   return boost::python::make_tuple(likelihood, iters);
50 | }
51 | 


--------------------------------------------------------------------------------
/tests/em_convert_from_pickle_dump_to_csv.py:
--------------------------------------------------------------------------------
 1 | import asp.jit.asp_module as asp_module
 2 | import numpy as np
 3 | from em import *
 4 | import pickle
 5 | import sys
 6 | 
 7 | param_type_map = {
 8 |         'num_blocks_estep': ('cardinal','variant'),
 9 |         'num_threads_estep': ('cardinal','variant'),
10 |         'num_threads_mstep': ('cardinal','variant'),
11 |         'num_event_blocks': ('cardinal','variant'),
12 |         'max_num_dimensions': ('cardinal','variant'),
13 |         'max_num_components': ('cardinal','variant'),
14 |         'max_num_dimensions_covar_v3': ('cardinal','variant'),
15 |         'max_num_components_covar_v3': ('cardinal','variant'),
16 |         'diag_only': ('binary','variant'),
17 |         'max_iters': ('cardinal','variant'),
18 |         'min_iters': ('cardinal','variant'),
19 |         'covar_version_name': ('nominal','variant'),
20 |         'supports_32b_floating_point_atomics': ('nominal','machine'),
21 |         'max_xy_grid_dim': ('cardinal','machine'),
22 |         'max_threads_per_block': ('cardinal','machine'),
23 |         'max_shared_memory_capacity_per_SM': ('cardinal','machine')
24 | }
25 | 
26 | if __name__ == '__main__':  
27 |         ifile_name = sys.argv[1]
28 |         ofile_name = sys.argv[2]
29 |         func_name = sys.argv[3]
30 |         device_id = sys.argv[4]
31 |         gmm = GMM(1,1)
32 |         mod = gmm.get_asp_mod()
33 |         mod.restore_method_timings(func_name,ifile_name)
34 |         var_names = mod.compiled_methods[func_name].v_id_list
35 |         param_names = mod.compiled_methods[func_name].param_names
36 |         var_times = mod.compiled_methods[func_name].database.variant_times
37 |         f = file(ofile_name, 'a')
38 |         f.write("Heading, Function Name, Device Name, Input Params,,,Variant Params"+","*len(param_names)+"Time\n")
39 |         f.write("Name,function,device,M,D,N,%s,Time\n" % ','.join(param_names))
40 |         f.write("Type,nominal,nominal,cardinal,cardinal,cardinal,%s,real\n" % 
41 |                 ','.join([param_type_map.get(n,'unknown')[0] for n in param_names]))
42 |         f.write("Prefix,problem,machine,problem,problem,problem,%s,performance\n" % 
43 |                 ','.join([param_type_map.get(n,'unknown')[1] for n in param_names]))
44 |         for size, times in var_times.items():
45 |             for name in var_names:
46 |                 time = times[name]
47 |                 f.write(",%s,%s,%s,%s,%s\n" % ( func_name, 
48 |                                                 device_id,  
49 |                                                 ','.join([str(p) for p in size[1:]]),
50 |                                                 ','.join(name.split('_')[1:]),
51 |                                                 time ) )
52 |         f.close()
53 | 
54 | 


--------------------------------------------------------------------------------
/tests/gmm_test.py:
--------------------------------------------------------------------------------
  1 | import unittest2 as unittest
  2 | import copy
  3 | import numpy as np
  4 | from gmm_specializer.gmm import GMM, compute_distance_BIC
  5 | 
  6 | class BasicTests(unittest.TestCase):
  7 |     def test_init(self):
  8 |         gmm = GMM(3, 2, cvtype='diag')
  9 |         self.assertIsNotNone(gmm)
 10 | 
 11 | class SyntheticDataTests(unittest.TestCase):
 12 |     def setUp(self):
 13 |         self.D = 2
 14 |         self.N = 600
 15 |         self.M = 3
 16 |         np.random.seed(0)
 17 |         C = np.array([[0., -0.7], [3.5, .7]])
 18 |         C1 = np.array([[-0.4, 1.7], [0.3, .7]])
 19 |         Y = np.r_[
 20 |             np.dot(np.random.randn(self.N/3, 2), C1),
 21 |             np.dot(np.random.randn(self.N/3, 2), C),
 22 |             np.random.randn(self.N/3, 2) + np.array([3, 3]),
 23 |             ]
 24 |         self.X = Y.astype(np.float32)
 25 |     
 26 |     def test_pure_python(self):
 27 |         gmm = GMM(self.M, self.D, cvtype='diag')
 28 |         means, covars = gmm.train_using_python(self.X)
 29 |         Y = gmm.predict_using_python(self.X)
 30 |         self.assertTrue(len(set(Y)) > 1)
 31 | 
 32 |     def test_training_once(self):
 33 |         gmm0 = GMM(self.M, self.D, cvtype='diag')
 34 |         likelihood0 = gmm0.train(self.X)
 35 |         means0  = gmm0.components.means.flatten()
 36 |         covars0 = gmm0.components.covars.flatten()
 37 | 
 38 |         gmm1 = GMM(self.M, self.D, cvtype='diag')
 39 |         likelihood1 = gmm1.train(self.X)
 40 |         means1  = gmm1.components.means.flatten()
 41 |         covars1 = gmm1.components.covars.flatten()
 42 | 
 43 |         self.assertAlmostEqual(likelihood0, likelihood1, delta=1)
 44 |         for a,b in zip(means0, means1):   self.assertAlmostEqual(a,b,places=2)
 45 |         for a,b in zip(covars0, covars1): self.assertAlmostEqual(a,b,places=2)
 46 | 
 47 |     def test_prediction_once(self):
 48 |         gmm0 = GMM(self.M, self.D, cvtype='diag')
 49 |         likelihood0 = gmm0.train(self.X)
 50 |         Y0 = gmm0.predict(self.X)
 51 | 
 52 |         gmm1 = GMM(self.M, self.D, cvtype='diag')
 53 |         likelihood1 = gmm1.train(self.X)
 54 |         Y1 = gmm1.predict(self.X)
 55 | 
 56 |         for a,b in zip(Y0, Y1): self.assertAlmostEqual(a,b)
 57 |         self.assertTrue(len(set(Y0)) > 1)
 58 | 
 59 |     def test_training_repeat(self):
 60 |         gmm0 = GMM(self.M, self.D, cvtype='diag')
 61 |         likelihood0 = gmm0.train(self.X)
 62 |         likelihood0 = gmm0.train(self.X)
 63 |         likelihood0 = gmm0.train(self.X)
 64 |         likelihood0 = gmm0.train(self.X)
 65 |         likelihood0 = gmm0.train(self.X)
 66 |         means0  = gmm0.components.means.flatten()
 67 |         covars0 = gmm0.components.covars.flatten()
 68 | 
 69 |         gmm1 = GMM(self.M, self.D, cvtype='diag')
 70 |         likelihood1 = gmm1.train(self.X)
 71 |         likelihood1 = gmm1.train(self.X)
 72 |         likelihood1 = gmm1.train(self.X)
 73 |         likelihood1 = gmm1.train(self.X)
 74 |         likelihood1 = gmm1.train(self.X)
 75 |         means1  = gmm1.components.means.flatten()
 76 |         covars1 = gmm1.components.covars.flatten()
 77 | 
 78 |         self.assertAlmostEqual(likelihood0, likelihood1, delta=1)
 79 |         for a,b in zip(means0, means1):   self.assertAlmostEqual(a,b,places=2)
 80 |         for a,b in zip(covars0, covars1): self.assertAlmostEqual(a,b,places=2)
 81 | 
 82 |     def test_prediction_full(self):
 83 |         gmm0 = GMM(self.M, self.D, cvtype='full')
 84 |         likelihood0 = gmm0.train(self.X)
 85 |         Y0 = gmm0.predict(self.X)
 86 | 
 87 |         gmm1 = GMM(self.M, self.D, cvtype='full')
 88 |         likelihood1 = gmm1.train(self.X)
 89 |         Y1 = gmm1.predict(self.X)
 90 | 
 91 |         for a,b in zip(Y0, Y1): self.assertAlmostEqual(a,b)
 92 |         self.assertTrue(len(set(Y0)) > 1)
 93 | 
 94 | class SpeechDataTests(unittest.TestCase):
 95 |     def setUp(self):
 96 |         self.X = np.ndfromtxt('./tests/speech_data.csv', delimiter=',', dtype=np.float32)
 97 |         self.N = self.X.shape[0]
 98 |         self.D = self.X.shape[1]
 99 |         self.M = 5
100 |         self.init_num_clusters = 16
101 | 
102 |     def do_bic_agglomeration(self, gmm_list):
103 |         # Get the events, divide them into an initial k clusters and train each GMM on a cluster
104 |         per_cluster = self.N/self.init_num_clusters
105 |         init_training = zip(gmm_list,np.vsplit(self.X, range(per_cluster, self.N, per_cluster)))
106 |         for g, x in init_training:
107 |             g.train(x)
108 | 
109 |         # Perform hierarchical agglomeration based on BIC scores
110 |         best_BIC_score = 1.0
111 |         while (best_BIC_score > 0 and len(gmm_list) > 1):
112 |             num_clusters = len(gmm_list)
113 |             # Resegment data based on likelihood scoring
114 |             likelihoods = gmm_list[0].score(self.X)
115 |             for g in gmm_list[1:]:
116 |                 likelihoods = np.column_stack((likelihoods, g.score(self.X)))
117 |             most_likely = likelihoods.argmax(axis=1)
118 |             # Across 2.5 secs of observations, vote on which cluster they should be associated with
119 |             iter_training = {}
120 |             for i in range(250, self.N, 250):
121 |                 votes = np.zeros(num_clusters)
122 |                 for j in range(i-250, i):
123 |                     votes[most_likely[j]] += 1
124 |                 iter_training.setdefault(gmm_list[votes.argmax()],[]).append(self.X[i-250:i,:])
125 |             votes = np.zeros(num_clusters)
126 |             for j in range((self.N/250)*250, self.N):
127 |                 votes[most_likely[j]] += 1
128 |             iter_training.setdefault(gmm_list[votes.argmax()],[]).append(self.X[(self.N/250)*250:self.N,:])
129 |             # Retrain the GMMs on the clusters for which they were voted most likely and
130 |             # make a list of candidates for merging
131 |             iter_bic_list = []
132 |             for g, data_list in iter_training.iteritems():
133 |                 cluster_data =  data_list[0]
134 |                 for d in data_list[1:]:
135 |                     cluster_data = np.concatenate((cluster_data, d))
136 |                 cluster_data = np.ascontiguousarray(cluster_data)
137 |                 g.train(cluster_data)
138 |                 iter_bic_list.append((g,cluster_data))
139 |     
140 |             # Keep any GMMs that lost all votes in candidate list for merging
141 |             for g in gmm_list:
142 |                 if g not in iter_training.keys():
143 |                     iter_bic_list.append((g,None))            
144 | 
145 |             # Score all pairs of GMMs using BIC
146 |             best_merged_gmm = None
147 |             best_BIC_score = 0.0
148 |             merged_tuple = None
149 |             for gmm1idx in range(len(iter_bic_list)):
150 |                 for gmm2idx in range(gmm1idx+1, len(iter_bic_list)):
151 |                     g1, d1 = iter_bic_list[gmm1idx]
152 |                     g2, d2 = iter_bic_list[gmm2idx] 
153 |                     score = 0.0
154 |                     if d1 is not None or d2 is not None:
155 |                         if d1 is not None and d2 is not None:
156 |                             new_gmm, score = compute_distance_BIC(g1, g2, np.ascontiguousarray(np.concatenate((d1, d2))))
157 |                         elif d1 is not None:
158 |                             new_gmm, score = compute_distance_BIC(g1, g2, d1)
159 |                         else:
160 |                             new_gmm, score = compute_distance_BIC(g1, g2, d2)
161 |                     if score > best_BIC_score: 
162 |                         best_merged_gmm = new_gmm
163 |                         merged_tuple = (g1, g2)
164 |                         best_BIC_score = score
165 |             
166 |             # Merge the winning candidate pair if its deriable to do so
167 |             if best_BIC_score > 0.0:
168 |                 gmm_list.remove(merged_tuple[0]) 
169 |                 gmm_list.remove(merged_tuple[1]) 
170 |                 gmm_list.append(best_merged_gmm)
171 | 
172 |         return [ g.M for g in gmm_list] 
173 | 
174 |     def test_training_once(self):
175 |         gmm0 = GMM(self.M, self.D, cvtype='diag')
176 |         likelihood0 = gmm0.train(self.X)
177 |         means0  = gmm0.components.means.flatten()
178 |         covars0 = gmm0.components.covars.flatten()
179 | 
180 |         gmm1 = GMM(self.M, self.D, cvtype='diag')
181 |         likelihood1 = gmm1.train(self.X)
182 |         means1  = gmm1.components.means.flatten()
183 |         covars1 = gmm1.components.covars.flatten()
184 | 
185 |         self.assertAlmostEqual(likelihood0, likelihood1, delta=1)
186 |         for a,b in zip(means0, means1):   self.assertAlmostEqual(a,b,places=2)
187 |         for a,b in zip(covars0, covars1): self.assertAlmostEqual(a,b,places=2)
188 | 
189 |     def test_prediction_once(self):
190 |         gmm0 = GMM(self.M, self.D, cvtype='diag')
191 |         likelihood0 = gmm0.train(self.X)
192 |         Y0 = gmm0.predict(self.X)
193 | 
194 |         gmm1 = GMM(self.M, self.D, cvtype='diag')
195 |         likelihood1 = gmm1.train(self.X)
196 |         Y1 = gmm1.predict(self.X)
197 | 
198 |         for a,b in zip(Y0, Y1): self.assertAlmostEqual(a,b)
199 |         self.assertTrue(len(set(Y0)) > 1)
200 | 
201 |     #TODO: Sometimes generates mysterious m-step cuda launch failures
202 |     #def test_bic_agglomeration_diag(self):
203 |     #    gmm_list = [GMM(self.M, self.D, cvtype='diag') for i in range(self.init_num_clusters)]
204 |     #    ms = self.do_bic_agglomeration(gmm_list)
205 |     #    self.assertItemsEqual(ms, [5, 10, 65])
206 |     #
207 |     #def test_bic_agglomeration_full(self):
208 |     #    gmm_list = [GMM(self.M, self.D, cvtype='full') for i in range(self.init_num_clusters)]
209 |     #    ms = self.do_bic_agglomeration(gmm_list)
210 |     #    self.assertItemsEqual(ms, [5, 5, 5, 10, 15])
211 | 
212 | if __name__ == '__main__':
213 |     unittest.main()
214 | 


--------------------------------------------------------------------------------