├── .gitignore
├── .travis.yml
├── CMakeLists.txt
├── COPYING
├── COPYING.LESSER
├── FindEigen3.cmake
├── README.md
├── doc
    └── Doxyfile
├── include
    ├── distributions.h
    ├── libcluster.h
    └── probutils.h
├── python
    ├── CMakeLists.txt
    ├── FindNumpy.cmake
    ├── libclusterpy.cpp
    ├── libclusterpy.h
    └── testapi.py
├── src
    ├── cluster.cpp
    ├── comutils.cpp
    ├── comutils.h
    ├── distributions.cpp
    ├── mcluster.cpp
    ├── probutils.cpp
    └── scluster.cpp
└── test
    ├── CMakeLists.txt
    ├── cluster_test.cpp
    ├── mcluster_test.cpp
    ├── scluster_test.cpp
    ├── scott25.dat
    └── testdata.h


/.gitignore:
--------------------------------------------------------------------------------
1 | # ignore list for git status etc.
2 | *.mex*
3 | *.user
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: cpp
 2 | dist: trusty
 3 | sudo: required
 4 | 
 5 | addons:
 6 |     apt:
 7 |         packages:
 8 |             - cmake
 9 |             - python3
10 |             - python3-dev
11 |             - libeigen3-dev
12 |             - libboost-all-dev
13 |             - libboost-python-dev
14 |             - python3-numpy
15 | 
16 | install:
17 |     - cd /usr/lib/x86_64-linux-gnu/
18 |     - sudo ln -s libboost_python-py34.so libboost_python3.so
19 |     - cd $TRAVIS_BUILD_DIR
20 |     - mkdir build
21 |     - cd build
22 |     - cmake -DBUILD_PYTHON_INTERFACE=ON -DBUILD_USE_PYTHON3=ON ..
23 |     - make
24 |     - sudo make install
25 | 
26 | script:
27 |     - cd $TRAVIS_BUILD_DIR/build
28 |     - ./cluster_test
29 |     - ./scluster_test
30 |     - ./mcluster_test
31 |     - sudo ldconfig
32 |     - cd ../python
33 |     - python3 testapi.py
34 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | project(cluster)
  2 | cmake_minimum_required(VERSION 2.6)
  3 | 
  4 | 
  5 | #--------------------------------#
  6 | # Includes                       #
  7 | #--------------------------------#
  8 | 
  9 | find_package(Boost REQUIRED)
 10 | include_directories(${Boost_INCLUDE_DIRS})
 11 | include(${PROJECT_SOURCE_DIR}/FindEigen3.cmake REQUIRED)
 12 | include_directories(${EIGEN_INCLUDE_DIRS})
 13 | include(FindOpenMP)
 14 | 
 15 | 
 16 | #--------------------------------#
 17 | # Enforce an out-of-source build #
 18 | #--------------------------------#
 19 | 
 20 | string(COMPARE EQUAL "${PROJECT_SOURCE_DIR}" "${PROJECT_BINARY_DIR}" INSOURCE)
 21 | if(INSOURCE)
 22 |   message(FATAL_ERROR "This project requires an out of source build.")
 23 | endif(INSOURCE)
 24 | 
 25 | 
 26 | #--------------------------------#
 27 | # Compiler environment Setup     #
 28 | #--------------------------------#
 29 | 
 30 | # Some compilation options (changeable from ccmake)
 31 | option(BUILD_EXHAUST_SPLIT "Use the exhaustive cluster split heuristic?" off)
 32 | option(BUILD_PYTHON_INTERFACE "Build the python interface?" off)
 33 | option(BUILD_USE_PYTHON3 "Use python3 instead of python 2?" on)
 34 | 
 35 | # Locations for source code
 36 | set(LIB_SOURCE_DIR    ${PROJECT_SOURCE_DIR}/src)
 37 | set(LIB_INCLUDE_DIR   ${PROJECT_SOURCE_DIR}/include)
 38 | set(TEST_SOURCE_DIR   ${PROJECT_SOURCE_DIR}/test)
 39 | set(PYTHON_SOURCE_DIR ${PROJECT_SOURCE_DIR}/python)
 40 | 
 41 | # Locations for binary files
 42 | set(LIBRARY_OUTPUT_PATH     ${PROJECT_SOURCE_DIR}/lib)
 43 | set(EXECUTABLE_OUTPUT_PATH  ${PROJECT_SOURCE_DIR}/build)
 44 | 
 45 | # Automatically or from command line set build type
 46 | if(NOT CMAKE_BUILD_TYPE)
 47 |   set(CMAKE_BUILD_TYPE Release CACHE STRING
 48 |        "Build type options are: None Debug Release RelWithDebInfo MinSizeRel."
 49 |        FORCE
 50 |      )
 51 | endif(NOT CMAKE_BUILD_TYPE)
 52 | 
 53 | # If we want to use the greedy splitting heuristic, define it here
 54 | if(BUILD_EXHAUST_SPLIT)
 55 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEXHAUST_SPLIT")
 56 | endif(BUILD_EXHAUST_SPLIT)
 57 | 
 58 | # Python needs row major matrices (for convenience)
 59 | if(BUILD_PYTHON_INTERFACE)
 60 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DEFAULT_TO_ROW_MAJOR")
 61 | endif(BUILD_PYTHON_INTERFACE)
 62 | 
 63 | # Search for OpenMP support for multi-threading
 64 | if(OPENMP_FOUND)
 65 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
 66 |   set(CMAKE_EXE_LINKER_FLAGS
 67 |     "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}"
 68 |   )
 69 |   # Disable Eigen's parallelisation (this will get in the way of mine)
 70 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_PARALLELIZE")
 71 | endif(OPENMP_FOUND)
 72 | 
 73 | 
 74 | #--------------------------------#
 75 | # Library Build Instructions     #
 76 | #--------------------------------#
 77 | 
 78 | # Make sure we include library headers in compile
 79 | include_directories(${LIB_INCLUDE_DIR})
 80 | 
 81 | # Library build instructions
 82 | add_library(${PROJECT_NAME} SHARED
 83 |   ${LIB_INCLUDE_DIR}/libcluster.h
 84 |   ${LIB_INCLUDE_DIR}/probutils.h
 85 |   ${LIB_INCLUDE_DIR}/distributions.h
 86 |   ${LIB_SOURCE_DIR}/distributions.cpp
 87 |   ${LIB_SOURCE_DIR}/comutils.h
 88 |   ${LIB_SOURCE_DIR}/comutils.cpp
 89 |   ${LIB_SOURCE_DIR}/cluster.cpp
 90 |   ${LIB_SOURCE_DIR}/scluster.cpp
 91 |   ${LIB_SOURCE_DIR}/mcluster.cpp
 92 |   ${LIB_SOURCE_DIR}/probutils.cpp
 93 | )
 94 | 
 95 | add_definitions("-Wall")
 96 | 
 97 | 
 98 | #--------------------------------#
 99 | # Library Install Instructions   #
100 | #--------------------------------#
101 | 
102 | if(NOT CMAKE_INSTALL_PREFIX)
103 |    set(CMAKE_INSTALL_PREFIX "/usr/local" )
104 | endif(NOT CMAKE_INSTALL_PREFIX)
105 | 
106 | install(TARGETS ${PROJECT_NAME} DESTINATION lib)
107 | install(FILES
108 |   ${LIB_INCLUDE_DIR}/libcluster.h
109 |   ${LIB_INCLUDE_DIR}/probutils.h
110 |   ${LIB_INCLUDE_DIR}/distributions.h
111 |   DESTINATION include/libcluster
112 | )
113 | 
114 | 
115 | #--------------------------------#
116 | # Subdirectories to recurse to   #
117 | #--------------------------------#
118 | 
119 | subdirs(test python)
120 | 


--------------------------------------------------------------------------------
/COPYING.LESSER:
--------------------------------------------------------------------------------
  1 |                    GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/FindEigen3.cmake:
--------------------------------------------------------------------------------
 1 | # Make sure that we can find Eigen
 2 | # This creates the following variables:
 3 | #  - EIGEN_INCLUDE_DIRS where to find the library
 4 | #  - EIGEN_FOUND TRUE if found, FALSE otherwise
 5 | 
 6 | find_path(
 7 |   EIGEN_INCLUDE_DIRS Eigen
 8 |   /usr/local/eigen3
 9 |   /usr/local/include/eigen3
10 |   /usr/include/eigen3
11 | )
12 | 
13 | # Check found Eigen
14 | if(EIGEN_INCLUDE_DIRS)
15 |   set(EIGEN_FOUND TRUE)
16 |   message(STATUS "Found Eigen: ${EIGEN_INCLUDE_DIRS}")
17 | else(EIGEN_INCLUDE_DIRS)
18 |   if(EIGEN_FIND_REQUIRED)
19 |     set(EIGEN_FOUND FALSE)
20 |     message(FATAL_ERROR "Eigen not found")
21 |   endif(EIGEN_FIND_REQUIRED)
22 | endif(EIGEN_INCLUDE_DIRS)
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Libcluster
  2 | ==========
  3 | 
  4 | [![CI status](https://travis-ci.org/dsteinberg/libcluster.svg?branch=master)](https://travis-ci.org/dsteinberg/libcluster)
  5 | 
  6 | ***Author***:
  7 | [Daniel Steinberg](http://dsteinberg.github.io/)
  8 | 
  9 | ***License***: 
 10 | LGPL v3 (See COPYING and COPYING.LESSER)
 11 | 
 12 | ***Overview***:
 13 | 
 14 | This library implements the following algorithms with variational Bayes
 15 | learning procedures and efficient cluster splitting heuristics:
 16 |  
 17 |  * The Variational Dirichlet Process (VDP) [1, 2, 6]
 18 |  * The Bayesian Gaussian Mixture Model [3 - 6]
 19 |  * The Grouped Mixtures Clustering (GMC) model [6]
 20 |  * The Symmetric Grouped Mixtures Clustering (S-GMC) model [4 - 6]. This is 
 21 |    referred to as Gaussian latent Dirichlet allocation (G-LDA) in [4, 5]. 
 22 |  * Simultaneous Clustering Model (SCM) for Multinomial Documents, and Gaussian 
 23 |    Observations [5, 6].
 24 |  * Multiple-source Clustering Model (MCM) for clustering two observations,
 25 |    one of an image/document, and multiple of segments/words 
 26 |    simultaneously [4 - 6]. 
 27 |  * And more clustering algorithms based on diagonal Gaussian, and 
 28 |    Exponential distributions.
 29 | 
 30 | And also,
 31 |  * Various functions for evaluating means, standard deviations, covariance,
 32 |    primary Eigenvalues etc of data.
 33 |  * Extensible template interfaces for creating new algorithms within the
 34 |    variational Bayes framework.
 35 | 
 36 | 
 37 | <section>
 38 | <img src="http://dsteinberg.github.io/images/MSRC_im_ex.jpg" width="360">
 39 | <img src="http://dsteinberg.github.io/images/MSRC_seg_ex.jpg" width="360">
 40 | </section>
 41 | 
 42 | An example of using the MCM to simultaneously cluster images and objects within
 43 | images for unsupervised scene understanding. See [4 - 6] for more information.
 44 | 
 45 | * * *
 46 | 
 47 | 
 48 | TABLE OF CONTENTS
 49 | -----------------
 50 | 
 51 | * [Dependencies](#dependencies)
 52 | 
 53 | * [Install Instructions](#install-instructions)
 54 | 
 55 | * [C++ Interface](#c-interface)
 56 | 
 57 | * [Python Interface](#python-interface)
 58 | 
 59 | * [General Usability Tips](#general-usability-tips)
 60 | 
 61 | * [References and Citing](#references-and-citing)
 62 | 
 63 | 
 64 | * * *
 65 | 
 66 | 
 67 | DEPENDENCIES
 68 | ------------
 69 | 
 70 |  - Eigen version 3.0 or greater
 71 |  - Boost version 1.4.x or greater and devel packages (special math functions)
 72 |  - OpenMP, comes default with most compilers (may need a special version of 
 73 |    [LLVM](http://openmp.llvm.org/)).
 74 |  - CMake
 75 | 
 76 | For the python interface:
 77 | 
 78 |  - Python 2 or 3
 79 |  - Boost python and boost python devel packages (make sure you have version 2
 80 |    or 3 for the relevant version of python)
 81 |  - Numpy (tested with v1.7)
 82 | 
 83 | 
 84 | INSTALL INSTRUCTIONS
 85 | --------------------
 86 | 
 87 | *For Linux and OS X -- I've never tried to build on Windows.*
 88 | 
 89 | To build libcluster:
 90 | 
 91 | 1. Make sure you have CMake installed, and Eigen and Boost preferably in the 
 92 |    usual locations:
 93 | 
 94 |         /usr/local/include/eigen3/ or /usr/include/eigen3
 95 |         /usr/local/include/boost or /usr/include/boost
 96 | 
 97 | 2. Make a build directory where you checked out the source if it does not
 98 |    already exist, then change into this directory,
 99 | 
100 |         cd {where you checked out the source}
101 |         mkdir build
102 |         cd build
103 | 
104 | 3. To build libcluster, run the following from the build directory:
105 | 
106 |         cmake ..
107 |         make
108 |         sudo make install
109 |     
110 |     This installs:
111 |    
112 |         libcluster.h    /usr/local/include
113 |         distributions.h /usr/local/include
114 |         probutils.h     /usr/local/include
115 |         libcluster.*    /usr/local/lib     (* this is either .dylib or .so)
116 | 
117 | 4. Use the doxyfile in {where you checked out the source}/doc to make the
118 |    documentation with doxygen:
119 | 
120 |         doxygen Doxyfile
121 | 
122 | **NOTE**: There are few options you can change using ccmake (or the cmake gui),
123 | these include:
124 |    
125 | - `BUILD_EXHAUST_SPLIT` (toggle `ON` or `OFF`, default `OFF`) This uses the
126 |   exhaustive cluster split heuristic [1, 2] instead of the greedy heuristic [4,
127 |   5] for all algorithms but the SCM and MCM. The greedy heuristic is MUCH
128 |   faster, but does give different results. I have yet to determine whether it
129 |   is actually worse than the exhaustive method (if it is, it is not by much).
130 |   The SCM and MCM only use the greedy split heuristic at this stage.
131 | 
132 | - `BUILD_PYTHON_INTERFACE` (toggle `ON` or `OFF`, default `OFF`) Build the
133 |   python interface. This requires boost python, and also uses row-major storage
134 |   to be compatible with python.
135 | 
136 | - `BUILD_USE_PYTHON3` (toggle `ON` or `OFF`, default `ON`) Use python 3 or 2 to
137 |   build the python interface. Make sure you have the relevant python and boost
138 |   python libraries installed!
139 |      
140 | - `CMAKE_INSTALL_PREFIX` (default `/usr/local`) The default prefix for
141 |   installing the library and binaries.
142 |      
143 | - `EIGEN_INCLUDE_DIRS` (default `/usr/include/eigen3`) Where to look for the
144 |   Eigen matrix library.  
145 |    
146 | **NOTE**: On linux you may have to run `sudo ldconfig` before the system can
147 | find libcluster.so (or just reboot).
148 | 
149 | **NOTE**: On Red-Hat based systems, `/usr/local/lib` is not checked unless 
150 | added to `/etc/ld.so.conf`! This may lead to "cannot find libcluster.so" 
151 | errors.
152 | 
153 | 
154 | C++ INTERFACE
155 | -------------
156 | 
157 | All of the interfaces to this library are documented in `include/libcluster.h`.
158 | There are far too many algorithms to go into here, and I *strongly* recommend
159 | looking at the `test/` directory for example usage, specifically,
160 | 
161 | * `cluster_test.cpp` for the group mixture models (GMC etc) 
162 | * `scluster_test.cpp` for the SCM
163 | * `mcluster_test.cpp` for the MCM
164 | 
165 | Here is an example for regular mixture models, such as the BGMM, which simply
166 | clusters some test data and prints the resulting posterior parameters to the
167 | terminal,
168 | 
169 | ```C++
170 | 
171 | #include "libcluster.h"
172 | #include "distributions.h"
173 | #include "testdata.h"
174 | 
175 | 
176 | //
177 | // Namespaces
178 | //
179 | 
180 | using namespace std;
181 | using namespace Eigen;
182 | using namespace libcluster;
183 | using namespace distributions;
184 | 
185 | 
186 | //
187 | // Functions
188 | //
189 | 
190 | // Main
191 | int main()
192 | {
193 | 
194 |   // Populate test data from testdata.h
195 |   MatrixXd Xcat;
196 |   vMatrixXd X;
197 |   makeXdata(Xcat, X);
198 | 
199 |   // Set up the inputs for the BGMM
200 |   Dirichlet weights;
201 |   vector<GaussWish> clusters;
202 |   MatrixXd qZ;
203 | 
204 |   // Learn the BGMM
205 |   double F = learnBGMM(Xcat, qZ, weights, clusters, PRIORVAL, true);
206 | 
207 |   // Print the posterior parameters
208 |   cout << endl << "Cluster Weights:" << endl;
209 |   cout << weights.Elogweight().exp().transpose() << endl;
210 | 
211 |   cout << endl << "Cluster means:" << endl;
212 |   for (vector<GaussWish>::iterator k=clusters.begin(); k < clusters.end(); ++k)
213 |     cout << k->getmean() << endl;
214 | 
215 |   cout << endl << "Cluster covariances:" << endl;
216 |   for (vector<GaussWish>::iterator k=clusters.begin(); k < clusters.end(); ++k)
217 |     cout << k->getcov() << endl << endl;
218 | 
219 |   return 0;
220 | }
221 | 
222 | ```
223 | 
224 | Note that `distributions.h` has also been included. In fact, all of the
225 | algorithms in `libcluster.h` are just wrappers over a few key functions in
226 | `cluster.cpp`, `scluster.cpp` and `mcluster.cpp` that can take in *arbitrary*
227 | distributions as inputs, and so more algorithms potentially exist than
228 | enumerated in `libcluster.h`. If you want to create different algorithms, or
229 | define more cluster distributions (like categorical) have a look at inheriting
230 | the `WeightDist` and `ClusterDist` base classes in `distributions.h`. Depending
231 | on the distributions you use, you may also have to come up with a way to
232 | 'split' clusters. Otherwise you can create an algorithm with a random initial
233 | set of clusters like the MCM at the top level, which then variational Bayes
234 | will prune.
235 | 
236 | There are also some generally useful functions included in `probutils.h` when
237 | dealing with mixture models (such as the log-sum-exp trick).
238 | 
239 | 
240 | PYTHON INTERFACE
241 | ----------------
242 | 
243 | ### Installation
244 | 
245 | Easy, follow the normal build instructions up to step (4) (if you haven't
246 | already), then from the build directory:
247 | 
248 |     cmake ..
249 |     ccmake .
250 | 
251 | Make sure `BUILD_PYTHON_INTERFACE` is `ON`
252 | 
253 |     make
254 |     sudo make install
255 | 
256 | This installs all the same files as step (4), as well as `libclusterpy.so` to
257 | your python staging directory, so it should be on your python path. I.e. just
258 | run
259 | 
260 | ```python
261 | import libclusterpy
262 | ```
263 | 
264 | **Trouble Shooting**:
265 | 
266 | On Fedora 20/21 I have to append `/usr/local/lib` to the file `/etc/ld.so.conf`
267 | to make python find the compiled shared object.
268 | 
269 | 
270 | ### Usage
271 | 
272 | Import the library as
273 | 
274 | ```python
275 | import numpy as np
276 | import libclusterpy as lc
277 | ```
278 | 
279 | Then for the mixture models, assuming `X` is a numpy array where `X.shape` is 
280 | `(N, D)` -- `N` being the number of samples, and `D` being the dimension of
281 | each sample,
282 | 
283 |     f, qZ, w, mu, cov = lc.learnBGMM(X)
284 | 
285 | where `f` is the final free energy value, `qZ` is a distribution over all of
286 | the cluster labels where `qZ.shape` is `(N, K)` and `K` is the number of
287 | clusters (each row of `qZ` sums to 1). Then `w`, `mu` and `cov` the expected
288 | posterior cluster parameters (see the documentation for details. Alternatively,
289 | tuning the `prior` argument can be used to change the number of clusters found,
290 | 
291 |     f, qZ, w, mu, cov = lc.learnBGMM(X, prior=0.1)
292 | 
293 | This interface is common to all of the simple mixture models (i.e. VDP, BGMM
294 | etc).
295 | 
296 | For the group mixture models (GMC, SGMC etc) `X` is a *list* of arrays of size
297 | `(Nj, D)` (indexed by j), one for each group/album, `X = [X_1, X_2, ...]`. The
298 | returned `qZ` and `w` are also lists of arrays, one for each group, e.g.,
299 | 
300 |     f, qZ, w, mu, cov = lc.learnSGMC(X)
301 | 
302 | The SCM again has a similar interface to the above models, but now `X` is a
303 | *list of lists of arrays*, `X = [[X_11, X_12, ...], [X_21, X_22, ...], ...]`.
304 | This specifically for modelling situations where `X` is a matrix of all of the
305 | features of, for example, `N_ij` segments in image `ij` in album `j`.
306 | 
307 |     f, qY, qZ, wi, wij, mu, cov = lc.learnSCM(X)
308 | 
309 | Where `qY` is a list of arrays of top-level/image cluster probabilities, `qZ`
310 | is a list of lists of arrays of bottom-level/segment cluster probabilities.
311 | `wi` are the mixture weights (list of arrays) corresponding to the `qY` labels,
312 | and `wij` are the weights (list of lists of arrays) corresponding the `qZ`
313 | labels. This has two optional prior inputs, and a cluster truncation level
314 | (max number of clusters) for the top-level/image clusters,
315 | 
316 |     f, qY, qZ, wi, wij, mu, cov = lc.learnSCM(X, trunc=10, dirprior=1,
317 |                                               gausprior=0.1)
318 | 
319 | Where `dirprior` refers to the top-level cluster prior, and `gausprior` the
320 | bottom-level. 
321 | 
322 | Finally, the MCM has a similar interface to the MCM, but with an extra input,
323 | `W` which is of the same format as the `X` in the GMC-style models, i.e. it is
324 | a list of arrays of top-level or image features, `W = [W_1, W_2, ...]`. The
325 | usage is,
326 | 
327 |     f, qY, qZ, wi, wij, mu_t, mu_k, cov_t, cov_k = lc.learnMCM(W, X)
328 |     
329 | Here `mu_t` and `cov_t` are the top-level posterior cluster parameters -- these
330 | are both lists of `T` cluster parameters (`T` being the number of clusters
331 | found. Similarly `mu_k` and `cov_k` are lists of `K` bottom-level posterior
332 | cluster parameters. Like the SCM, this has a number of optional inputs,
333 | 
334 | 
335 |     f, qY, qZ, wi, wij, mu_t, mu_k, cov_t, cov_k = lc.learnMCM(W, X, trunc=10,
336 |                                                                gausprior_t=1,
337 |                                                                gausprior_k=0.1)
338 | 
339 | Where `gausprior_t` refers to the top-level cluster prior, and `gausprior_k`
340 | the bottom-level. 
341 | 
342 | Look at the `libclusterpy` docstrings for more help on usage, and the
343 | `testapi.py` script in the `python` directory for more usage examples. 
344 | 
345 | **NOTE** if you get the following message when importing libclusterpy:
346 |     
347 |     ImportError: /lib64/libboost_python.so.1.54.0: undefined symbol: PyClass_Type
348 |     
349 | Make sure you have `boost-python3` installed!
350 | 
351 | 
352 | GENERAL USABILITY TIPS
353 | ----------------------
354 | 
355 | When verbose mode is activated you will get output that looks something like
356 | this:
357 | 
358 |         Learning MODEL X...
359 |         --------<=>
360 |         ---<==>
361 |         --------x<=>
362 |         --------------<====>
363 |         ----<*>
364 |         ---<>
365 |         Finished!
366 |         Number of clusters = 4
367 |         Free Energy = 41225
368 | 
369 | What this means:
370 | 
371 | * `-` iteration of Variational Bayes (VBE and VBM step)
372 | * `<` cluster splitting has started (model selection)
373 | * `=` found a valid candidate split
374 | * `>` chosen candidate split and testing for inclusion into model
375 | * `x` clusters have been deleted because they became devoid of observations
376 | * `*` clusters (image/document clusters) that are empty have been removed. 
377 | 
378 | For best clustering results, I have found the following tips may help:
379 | 
380 | 1.  If clustering runs REALLY slowly then it may be because of hyper-threading.
381 |     OpenMP will by default use as many cores available to it as possible, this
382 |     includes virtual hyper-threading cores. Unfortunately this may result in
383 |     large slow-downs, so try only allowing these functions to use a number of
384 |     threads less than or equal to the number of PHYSICAL cores on your machine.
385 | 
386 | 2.  Garbage in = garbage out. Make sure your assumptions about the data are 
387 |     reasonable for the type of cluster distribution you use. For instance, if  
388 |     your observations do not resemble a mixture of Gaussians in feature space,
389 |     then it may not be appropriate to use Gaussian clusters.
390 | 
391 | 3.  For Gaussian clusters: standardising or whitening your data may help, i.e.
392 | 
393 |     if X is an NxD matrix of observations you wish to cluster, you may get
394 |     better results if you use a standardised version of it, X*,
395 | 
396 |         X_s = C * ( X - mean(X) ) / std(X)
397 | 
398 |     where `C` is some constant (optional) and the mean and std are for each 
399 |     column of X.
400 |     
401 |     You may obtain even better results by using PCA or ZCA whitening on X 
402 |     (assuming ZERO MEAN data), using python syntax:
403 |     
404 |         [U, S, V] = svd(cov(X))
405 |         X_w = X.dot(U).dot(diag(1. / sqrt(diag(S))))   #  PCA Whitening
406 |       
407 |     Such that 
408 |     
409 |         cov(X_w) = I_D.
410 |     
411 |     Also, to get some automatic scaling you can multiply the prior by the 
412 |     PRINCIPAL eigenvector of `cov(X)` (or `cov(X_s)`, `cov(X_w)`).
413 |     
414 |     **NOTE**: If you use diagonal covariance Gaussians I STRONGLY recommend PCA
415 |     or ZCA whitening your data first, otherwise you may end up with hundreds of
416 |     clusters!
417 |           
418 | 4.  For Exponential clusters: Your observations have to be in the range [0,
419 |     inf).  The clustering solution may also be sensitive to the prior. I find
420 |     usually using a prior value that has the approximate magnitude of your data
421 |     or more leads to better convergence.
422 | 
423 | 
424 | * * *
425 | 
426 | 
427 | REFERENCES AND CITING
428 | ---------------------
429 | 
430 | **[1]** K. Kurihara, M. Welling, and N. Vlassis. Accelerated variational
431 | Dirichlet process mixtures, Advances in Neural Information Processing Systems,
432 | vol. 19, p. 761, 2007.
433 | 
434 | **[2]** D. M. Steinberg, A. Friedman, O. Pizarro, and S. B. Williams. A
435 | Bayesian nonparametric approach to clustering data from underwater robotic
436 | surveys. In International Symposium on Robotics Research, Flagstaff, AZ, Aug.
437 | 2011.
438 | 
439 | **[3]** C. M. Bishop. Pattern Recognition and Machine Learning. Cambridge, UK:
440 | Springer Science+Business Media, 2006.
441 |    
442 | **[4]** D. M. Steinberg, O. Pizarro, S. B. Williams. Synergistic Clustering of
443 | Image and Segment Descriptors for Unsupervised Scene Understanding, In
444 | International Conference on Computer Vision (ICCV). IEEE, Sydney, NSW, 2013. 
445 | 
446 | **[5]** D. M. Steinberg, O. Pizarro, S. B. Williams. Hierarchical Bayesian
447 | Models for Unsupervised Scene Understanding. Journal of Computer Vision and
448 | Image Understanding (CVIU). Elsevier, 2014.
449 |     
450 | **[6]** D. M. Steinberg, An Unsupervised Approach to Modelling Visual Data, PhD
451 | Thesis, 2013.
452 |      
453 | Please consider citing the following if you use this code:
454 | 
455 |  * VDP: [2, 4, 6]
456 |  * BGMM: [5, 6]
457 |  * GMC: [6]
458 |  * SGMC/GLDA: [4, 5, 6]
459 |  * SCM: [5, 6]
460 |  * MCM: [4, 5, 6]  
461 | 
462 | You can find these on my [homepage](http://dsteinberg.github.io/). 
463 | Thank you!
464 | 


--------------------------------------------------------------------------------
/include/distributions.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
  3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
  4 |  *
  5 |  * This file is part of libcluster.
  6 |  *
  7 |  * libcluster is free software: you can redistribute it and/or modify it under
  8 |  * the terms of the GNU Lesser General Public License as published by the Free
  9 |  * Software Foundation, either version 3 of the License, or (at your option)
 10 |  * any later version.
 11 |  *
 12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 15 |  * for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU Lesser General Public License
 18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | #ifndef DISTRIBUTIONS_H
 22 | #define DISTRIBUTIONS_H
 23 | 
 24 | #include <Eigen/Dense>
 25 | #include <vector>
 26 | #include <stdexcept>
 27 | 
 28 | //TODO: make all protected variables private and accessed by protected functions
 29 | //      to improve encapsulation??
 30 | 
 31 | /*! Namespace that implements weight and cluster distributions. */
 32 | namespace distributions
 33 | {
 34 | 
 35 | //
 36 | // Namespace 'symbolic' constants
 37 | //
 38 | 
 39 | const double BETAPRIOR   = 1.0;      //!< beta prior value (Gaussians)
 40 | const double NUPRIOR     = 1.0;      //!< nu prior value (diagonal Gaussians)
 41 | const double ALPHA1PRIOR = 1.0;      //!< alpha1 prior value (All weight dists)
 42 | const double ALPHA2PRIOR = 1.0;      //!< alpha2 prior value (SB & Gdir)
 43 | const double APRIOR      = 1.0;      //!< a prior value (Exponential)
 44 | 
 45 | 
 46 | //
 47 | // Useful Typedefs
 48 | //
 49 | 
 50 | typedef Eigen::Array<bool, Eigen::Dynamic, 1> ArrayXb; //!< Boolean Array
 51 | 
 52 | 
 53 | //
 54 | // Weight Parameter Distribution classes
 55 | //
 56 | 
 57 | /*! \brief To make a new weight class that will work with the algorithm
 58 |  *         templates, your class must have this as the minimum interface.
 59 |  */
 60 | class WeightDist
 61 | {
 62 | public:
 63 | 
 64 |   // WeightDist(), required inherited constructor template
 65 | 
 66 |   /*! \brief Update the distribution.
 67 |    *  \param Nk an array of observations counts.
 68 |    */
 69 |   virtual void update (const Eigen::ArrayXd& Nk) = 0;
 70 | 
 71 |   /*! \brief Evaluate the expectation of the log label weights in the mixtures.
 72 |    *  \returns An array of likelihoods for the labels given the weights
 73 |    */
 74 |   virtual const Eigen::ArrayXd& Elogweight () const = 0;
 75 | 
 76 |   /*! \brief Get the number of observations contributing to each weight.
 77 |    *  \returns An array the number of observations contributing to each weight.
 78 |    */
 79 |   const Eigen::ArrayXd& getNk () const { return this->Nk; }
 80 | 
 81 |   /*! \brief Get the free energy contribution of these weights.
 82 |    *  \returns the free energy contribution of these weights
 83 |    */
 84 |   virtual double fenergy () const = 0;
 85 | 
 86 |   /*! \brief virtual destructor.
 87 |    */
 88 |   virtual ~WeightDist() {}
 89 | 
 90 | protected:
 91 | 
 92 |   /*! \brief Default constructor to set an empty observation array.
 93 |    */
 94 |   WeightDist () : Nk(Eigen::ArrayXd::Zero(1)) {}
 95 | 
 96 |   Eigen::ArrayXd Nk; //!< Number of observations making up the weights.
 97 | };
 98 | 
 99 | 
100 | /*!
101 |  *  \brief Stick-Breaking (Dirichlet Process) parameter distribution.
102 |  */
103 | class StickBreak : public WeightDist
104 | {
105 | public:
106 | 
107 |   StickBreak ();
108 | 
109 |   StickBreak (const double concentration);
110 | 
111 |   void update (const Eigen::ArrayXd& Nk);
112 | 
113 |   const Eigen::ArrayXd& Elogweight () const { return this->E_logpi; }
114 | 
115 |   double fenergy () const;
116 | 
117 |   virtual ~StickBreak () {}
118 | 
119 | protected:
120 | 
121 |   // Prior hyperparameters, expectations etc
122 |   double alpha1_p;  //!< First prior param \f$ Beta(\alpha_1,\alpha_2) \f$
123 |   double alpha2_p;  //!< Second prior param \f$ Beta(\alpha_1,\alpha_2) \f$
124 |   double F_p;       //!< Free energy component dependent on priors only
125 | 
126 |   // Posterior hyperparameters and expectations
127 |   Eigen::ArrayXd alpha1; //!< First posterior param corresp to \f$ \alpha_1 \f$
128 |   Eigen::ArrayXd alpha2; //!< Second posterior param corresp to \f$ \alpha_2 \f$
129 |   Eigen::ArrayXd E_logv; //!< Stick breaking log expectation
130 |   Eigen::ArrayXd E_lognv; //!< Inverse stick breaking log expectation
131 |   Eigen::ArrayXd E_logpi; //!< Expected log weights
132 | 
133 |   // Order tracker
134 |   std::vector< std::pair<int,double> > ordvec; //!< For order specific updates
135 | 
136 | private:
137 | 
138 |   // Do some prior free energy calcs
139 |   void priorfcalc (void);
140 | };
141 | 
142 | 
143 | /*!
144 |  *  \brief Generalised Dirichlet parameter distribution (truncated stick
145 |  *         breaking).
146 |  */
147 | class GDirichlet : public StickBreak
148 | {
149 | public:
150 | 
151 |   void update (const Eigen::ArrayXd& Nk);
152 | 
153 |   double fenergy () const;
154 | 
155 |   virtual ~GDirichlet () {}
156 | 
157 | };
158 | 
159 | 
160 | /*!
161 |  *  \brief Dirichlet parameter distribution.
162 |  */
163 | class Dirichlet : public WeightDist
164 | {
165 | public:
166 | 
167 |   Dirichlet ();
168 | 
169 |   Dirichlet (const double alpha);
170 | 
171 |   void update (const Eigen::ArrayXd& Nk);
172 | 
173 |   const Eigen::ArrayXd& Elogweight () const { return this->E_logpi; }
174 | 
175 |   double fenergy () const;
176 | 
177 |   virtual ~Dirichlet () {}
178 | 
179 | private:
180 | 
181 |   // Prior hyperparameters, expectations etc
182 |   double alpha_p; // Symmetric Dirichlet prior \f$ Dir(\alpha) \f$
183 |   double F_p;     // Free energy component dependent on priors only
184 | 
185 |   // Posterior hyperparameters and expectations
186 |   Eigen::ArrayXd alpha;   // Posterior param corresp to \f$ \alpha \f$
187 |   Eigen::ArrayXd E_logpi; // Expected log weights
188 | 
189 | };
190 | 
191 | 
192 | //
193 | // Cluster Parameter Distribution classes
194 | //
195 | 
196 | /*! \brief To make a new cluster distribution class that will work with the
197 |  *         algorithm templates your class must have this as the minimum
198 |  *         interface.
199 |  */
200 | class ClusterDist
201 | {
202 | public:
203 | 
204 |   /*! \brief Add observations to the cluster without updating the parameters
205 |    *         (i.e. add to the sufficient statistics)
206 |    *  \param qZk the observation indicators for this cluster, corresponding to
207 |    *              X.
208 |    *  \param X the observations [obs x dims], to add to this cluster according
209 |    *           to qZk.
210 |    */
211 |   virtual void addobs (
212 |       const Eigen::VectorXd& qZk,
213 |       const Eigen::MatrixXd& X
214 |       ) = 0;
215 | 
216 |   /*! \brief Update the cluster parameters from the observations added from
217 |    *         addobs().
218 |    */
219 |   virtual void update () = 0;
220 | 
221 |   /*! \brief Clear the all parameters and observation accumulations from
222 |    *         addobs().
223 |    */
224 |   virtual void clearobs () = 0;
225 | 
226 |   /*! \brief Evaluate the log marginal likelihood of the observations.
227 |    *  \param X a matrix of observations, [obs x dims].
228 |    *  \returns An array of likelihoods for the observations given this dist.
229 |    */
230 |   virtual Eigen::VectorXd Eloglike (const Eigen::MatrixXd& X) const = 0;
231 | 
232 |   /*! \brief Get the free energy contribution of these cluster parameters.
233 |    *  \returns the free energy contribution of these cluster parameters.
234 |    */
235 |   virtual double fenergy () const = 0;
236 | 
237 |   /*! \brief Propose a split for the observations given these cluster parameters
238 |    *  \param X a matrix of observations, [obs x dims], to split.
239 |    *  \returns a binary array of split assignments.
240 |    *  \note this needs to consistently split observations between multiple
241 |    *        subsequent calls, but can change after each update().
242 |    */
243 |   virtual ArrayXb splitobs (const Eigen::MatrixXd& X) const = 0;
244 | 
245 |   /*! \brief Return the number of observations belonging to this cluster.
246 |    *  \returns the number of observations belonging to this cluster.
247 |    */
248 |   double getN () const { return this->N; }
249 | 
250 |   /*! \brief Return the cluster prior value.
251 |    *  \returns the cluster prior value.
252 |    */
253 |   double getprior () const { return this->prior; }
254 | 
255 |   /*! \brief virtual destructor.
256 |    */
257 |   virtual ~ClusterDist() {}
258 | 
259 | protected:
260 | 
261 |   /*! \brief Constructor that must be called to set the prior and cluster
262 |    *         dimensionality.
263 |    *  \param prior the cluster prior.
264 |    *  \param D the dimensionality of this cluster.
265 |    */
266 |   ClusterDist (const double prior, const unsigned int D)
267 |     : D(D), prior(prior), N(0) {}
268 | 
269 |   unsigned int D; //!< Dimensionality
270 |   double prior;   //!< Cluster prior
271 |   double N;       //!< Number of observations making up this cluster.
272 | 
273 | };
274 | 
275 | 
276 | /*!
277 |  *  \brief Gaussian-Wishart parameter distribution for full Gaussian clusters.
278 |  */
279 | class GaussWish : public ClusterDist
280 | {
281 | public:
282 | 
283 |   /*! \brief Make a Gaussian-Wishart prior.
284 |    *
285 |    *  \param clustwidth makes the covariance prior \f$ clustwidth \times D
286 |    *          \times \mathbf{I}_D \f$.
287 |    *  \param D is the dimensionality of the data
288 |    */
289 |   GaussWish (const double clustwidth, const unsigned int D);
290 | 
291 |   void addobs (const Eigen::VectorXd& qZk, const Eigen::MatrixXd& X);
292 | 
293 |   void update ();
294 | 
295 |   void clearobs ();
296 | 
297 |   Eigen::VectorXd Eloglike (const Eigen::MatrixXd& X) const;
298 | 
299 |   ArrayXb splitobs (const Eigen::MatrixXd& X) const;
300 | 
301 |   double fenergy () const;
302 | 
303 |   /*! \brief Get the estimated cluster mean.
304 |    *  \returns the expected cluster mean.
305 |    */
306 |   const Eigen::RowVectorXd& getmean () const { return this->m; }
307 | 
308 |   /*! \brief Get the estimated cluster covariance.
309 |    *  \returns the expected cluster covariance.
310 |    */
311 |   Eigen::MatrixXd getcov () const { return this->iW/this->nu; }
312 | 
313 |   virtual ~GaussWish () {}
314 | 
315 | private:
316 | 
317 |   // Prior hyperparameters etc
318 |   double nu_p;
319 |   double beta_p;
320 |   Eigen::RowVectorXd m_p;
321 |   Eigen::MatrixXd iW_p;
322 |   double logdW_p;
323 |   double F_p;
324 | 
325 |   // Posterior hyperparameters
326 |   double nu;              // nu, Lambda ~ Wishart(W, nu)
327 |   double beta;            // beta, mu ~ Normal(m, (beta*Lambda)^-1)
328 |   Eigen::RowVectorXd m;   // m, mu ~ Normal(m, (beta*Lambda)^-1)
329 |   Eigen::MatrixXd iW;     // Inverse W, Lambda ~ Wishart(W, nu)
330 |   double logdW;           // log(det(W))
331 | 
332 |   // Sufficient Statistics
333 |   double N_s;
334 |   Eigen::RowVectorXd x_s;
335 |   Eigen::MatrixXd xx_s;
336 | 
337 | };
338 | 
339 | 
340 | /*!
341 |  *  \brief Normal-Gamma parameter distribution for diagonal Gaussian clusters.
342 |  */
343 | class NormGamma : public ClusterDist
344 | {
345 | public:
346 | 
347 |   /*! \brief Make a Normal-Gamma prior.
348 |    *
349 |    *  \param clustwidth makes the covariance prior \f$ clustwidth \times
350 |    *         \mathbf{I}_D \f$.
351 |    *  \param D is the dimensionality of the data
352 |    */
353 |   NormGamma (const double clustwidth, const unsigned int D);
354 | 
355 |   void addobs (const Eigen::VectorXd& qZk, const Eigen::MatrixXd& X);
356 | 
357 |   void update ();
358 | 
359 |   void clearobs ();
360 | 
361 |   Eigen::VectorXd Eloglike (const Eigen::MatrixXd& X) const;
362 | 
363 |   ArrayXb splitobs (const Eigen::MatrixXd& X) const;
364 | 
365 |   double fenergy () const;
366 | 
367 |   /*! \brief Get the estimated cluster mean.
368 |    *  \returns the expected cluster mean.
369 |    */
370 |   const Eigen::RowVectorXd& getmean () const { return this->m; }
371 | 
372 |   /*! \brief Get the estimated cluster covariance.
373 |    *  \returns the expected cluster covariance (just the diagonal elements).
374 |    */
375 |   Eigen::RowVectorXd getcov () const { return this->L*this->nu; }
376 | 
377 |   virtual ~NormGamma () {}
378 | 
379 | private:
380 | 
381 |   // Prior hyperparameters etc
382 |   double nu_p;
383 |   double beta_p;
384 |   Eigen::RowVectorXd m_p;
385 |   Eigen::RowVectorXd L_p;
386 |   double logL_p;
387 | 
388 |   // Posterior hyperparameters
389 |   double nu;
390 |   double beta;
391 |   Eigen::RowVectorXd m;
392 |   Eigen::RowVectorXd L;
393 |   double logL;
394 | 
395 |   // Sufficient Statistics
396 |   double N_s;
397 |   Eigen::RowVectorXd x_s;
398 |   Eigen::RowVectorXd xx_s;
399 | 
400 | };
401 | 
402 | 
403 | /*!
404 |  *  \brief Exponential-Gamma parameter distribution for Exponential clusters.
405 |  */
406 | class ExpGamma : public ClusterDist
407 | {
408 | public:
409 | 
410 |   /*! \brief Make a Gamma prior.
411 |    *
412 |    *  \param obsmag is the prior value for b in Gamma(a, b), which works well
413 |    *                when it is approximately the magnitude of the observation
414 |    *                dimensions, x_djn.
415 |    *  \param D is the dimensionality of the data
416 |    */
417 |   ExpGamma (const double obsmag, const unsigned int D);
418 | 
419 |   void addobs (const Eigen::VectorXd& qZk, const Eigen::MatrixXd& X);
420 | 
421 |   void update ();
422 | 
423 |   void clearobs ();
424 | 
425 |   Eigen::VectorXd Eloglike (const Eigen::MatrixXd& X) const;
426 | 
427 |   ArrayXb splitobs (const Eigen::MatrixXd& X) const;
428 | 
429 |   double fenergy () const;
430 | 
431 |   /*! \brief Get the estimated cluster rate parameter, i.e. Exp(E[lambda]),
432 |    *         where lambda is the rate parameter.
433 |    *  \returns the expected cluster rate parameter.
434 |    */
435 |   Eigen::RowVectorXd getrate () { return this->a*this->ib; }
436 | 
437 |   virtual ~ExpGamma () {}
438 | 
439 | private:
440 | 
441 |   // Prior hyperparameters
442 |   double a_p;
443 |   double b_p;
444 | 
445 |   // Posterior hyperparameters etc
446 |   double a;
447 |   Eigen::RowVectorXd ib;  // inverse b
448 |   double logb;
449 | 
450 |   // Sufficient Statistics
451 |   double N_s;
452 |   Eigen::RowVectorXd x_s;
453 | 
454 | };
455 | 
456 | 
457 | }
458 | 
459 | #endif // DISTRIBUTIONS_H
460 | 


--------------------------------------------------------------------------------
/include/probutils.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
  3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
  4 |  *
  5 |  * This file is part of libcluster.
  6 |  *
  7 |  * libcluster is free software: you can redistribute it and/or modify it under
  8 |  * the terms of the GNU Lesser General Public License as published by the Free
  9 |  * Software Foundation, either version 3 of the License, or (at your option)
 10 |  * any later version.
 11 |  *
 12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 15 |  * for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU Lesser General Public License
 18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | #ifndef PROBUTILS_H
 22 | #define PROBUTILS_H
 23 | 
 24 | #include <stdexcept>
 25 | #include <Eigen/Dense>
 26 | #include <vector>
 27 | 
 28 | 
 29 | //
 30 | // Namespaces
 31 | //
 32 | 
 33 | /*! \brief Namespace for various linear algebra tools useful for dealing with
 34 |  *         Gaussians and log-probability expressions.
 35 |  *
 36 |  * \author Daniel Steinberg
 37 |  *         Australian Centre for Field Robotics
 38 |  *         The University of Sydney
 39 |  *
 40 |  * \date   15/02/2011
 41 |  */
 42 | namespace probutils
 43 | {
 44 | 
 45 | 
 46 | //
 47 | // Useful Functions
 48 | //
 49 | 
 50 | /*! \brief Calculate the column means of a matrix.
 51 |  *
 52 |  *  \param X an NxD matrix.
 53 |  *  \returns a 1xD row vector of the means of each column of X.
 54 |  */
 55 | Eigen::RowVectorXd mean (const Eigen::MatrixXd& X);
 56 | 
 57 | 
 58 | /*! \brief Calculate the column means of a vector of matrices (one mean for
 59 |  *         all data in the matrices).
 60 |  *
 61 |  *  \param X a vector of N_jxD matrices for j = 1:J.
 62 |  *  \returns a 1xD row vector of the means of each column of X.
 63 |  *  \throws std::invalid_argument if X has inconsistent D between elements.
 64 |  */
 65 | Eigen::RowVectorXd mean (const std::vector<Eigen::MatrixXd>& X);
 66 | 
 67 | 
 68 | /*! \brief Calculate the column standard deviations of a matrix, uses N - 1.
 69 |  *
 70 |  *  \param X an NxD matrix.
 71 |  *  \returns a 1xD row vector of the standard deviations of each column of X.
 72 |  */
 73 | Eigen::RowVectorXd stdev (const Eigen::MatrixXd& X);
 74 | 
 75 | 
 76 | /*! \brief Calculate the covariance of a matrix.
 77 |  *
 78 |  *    If X is an NxD matrix, then this calculates:
 79 |  *
 80 |  *    \f[ Cov(X) = \frac{1} {N-1} (X-E[X])^T (X-E[X]) \f]
 81 |  *
 82 |  *  \param X is a NxD matrix to calculate the covariance of.
 83 |  *  \returns a DxD covariance matrix.
 84 |  *  \throws std::invalid_argument if X is 1xD or less (has one or less
 85 |  *          observations).
 86 |  */
 87 | Eigen::MatrixXd cov (const Eigen::MatrixXd& X);
 88 | 
 89 | 
 90 | /*! \brief Calculate the covariance of a vector of matrices (one mean for
 91 |  *         all data in the matrices).
 92 |  *
 93 |  *    This calculates:
 94 |  *
 95 |  *    \f[ Cov(X) = \frac{1} {\sum_j N_j-1}  \sum_j (X_j-E[X])^T (X_j-E[X]) \f]
 96 |  *
 97 |  *  \param X is a a vector of N_jxD matrices for j = 1:J.
 98 |  *  \returns a DxD covariance matrix.
 99 |  *  \throws std::invalid_argument if any X_j has one or less observations.
100 |  *  \throws std::invalid_argument if X has inconsistent D between elements.
101 |  */
102 | Eigen::MatrixXd cov (const std::vector<Eigen::MatrixXd>& X);
103 | 
104 | 
105 | /*! \brief Calculate the Mahalanobis distance, (x-mu)' * A^-1 * (x-mu), N
106 |  *         times.
107 |  *
108 |  *  \param X an NxD matrix of samples/obseravtions.
109 |  *  \param mu a 1XD vector of means.
110 |  *  \param A a DxD marix of weights, A must be invertable.
111 |  *  \returns an Nx1 matrix of distances evaluated for each row of X.
112 |  *  \throws std::invalid_argument If X, mu and A do not have compatible
113 |  *          dimensionality, or if A is not PSD.
114 |  */
115 | Eigen::VectorXd mahaldist (
116 |     const Eigen::MatrixXd& X,
117 |     const Eigen::RowVectorXd& mu,
118 |     const Eigen::MatrixXd& A
119 |     );
120 | 
121 | 
122 | /*! \brief Perform a log(sum(exp(X))) in a numerically stable fashion.
123 |  *
124 |  *  \param X is a NxK matrix. We wish to sum along the rows (sum out K).
125 |  *  \returns an Nx1 vector where the log(sum(exp(X))) operation has been
126 |  *           performed along the rows.
127 |  */
128 | Eigen::VectorXd logsumexp (const Eigen::MatrixXd& X);
129 | 
130 | 
131 | /*! \brief The eigen power method. Return the principal eigenvalue and
132 |  *         eigenvector.
133 |  *
134 |  *  \param A is the square DxD matrix to decompose.
135 |  *  \param eigvec is the Dx1 principal eigenvector (mutable)
136 |  *  \returns the principal eigenvalue.
137 |  *  \throws std::invalid_argument if the matrix A is not square
138 |  *
139 |  */
140 | double eigpower (const Eigen::MatrixXd& A, Eigen::VectorXd& eigvec);
141 | 
142 | 
143 | /*! \brief Get the log of the determinant of a PSD matrix.
144 |  *
145 |  *  \param A a DxD positive semi-definite matrix.
146 |  *  \returns log(det(A))
147 |  *  \throws std::invalid_argument if the matrix A is not square or if it is
148 |  *          not positive semidefinite.
149 |  */
150 | double logdet (const Eigen::MatrixXd& A);
151 | 
152 | 
153 | /*! \brief Calculate digamma(X) for each element of X.
154 |  *
155 |  *  \param X an NxM matrix
156 |  *  \returns an NxM matrix for which digamma(X) has been calculated for each
157 |  *           element
158 |  */
159 | Eigen::MatrixXd mxdigamma (const Eigen::MatrixXd& X);
160 | 
161 | 
162 | /*! \brief Calculate log(gamma(X)) for each element of X.
163 |  *
164 |  *  \param X an NxM matrix
165 |  *  \returns an NxM matrix for which log(gamma(X)) has been calculated for
166 |  *           each element
167 |  */
168 | Eigen::MatrixXd mxlgamma (const Eigen::MatrixXd& X);
169 | 
170 | }
171 | 
172 | #endif // PROBUTILS_H
173 | 


--------------------------------------------------------------------------------
/python/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if(BUILD_PYTHON_INTERFACE)
 2 | 
 3 |   message(STATUS "Will build the python interface")
 4 |   if(BUILD_USE_PYTHON3)
 5 |       set(PYCMD "python3")
 6 |       message(STATUS "Will use python 3")
 7 |   else(BUILD_USE_PYTHON3)
 8 |       set(PYCMD "python2")
 9 |       message(STATUS "Will use python 2")
10 |   endif(BUILD_USE_PYTHON3)
11 | 
12 |   # Python needs row major matrices (for convenience)
13 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DEFAULT_TO_ROW_MAJOR")
14 | 
15 | 
16 |   #--------------------------------#
17 |   # Includes                       #
18 |   #--------------------------------#
19 | 
20 |   if(BUILD_USE_PYTHON3)
21 |     find_package(Boost COMPONENTS python3 REQUIRED)
22 |   else(BUILD_USE_PYTHON3)
23 |     find_package(Boost COMPONENTS python REQUIRED)
24 |   endif(BUILD_USE_PYTHON3)
25 | 
26 |   include(${PYTHON_SOURCE_DIR}/FindNumpy.cmake REQUIRED)
27 |   include_directories(${NUMPY_INCLUDE_DIR})
28 |   find_package(PythonLibs REQUIRED)
29 |   include_directories(${PYTHON_INCLUDE_DIRS})
30 |     
31 | 
32 |   #--------------------------------#
33 |   # Library Build Instructions     #
34 |   #--------------------------------#
35 | 
36 |   add_library(${PROJECT_NAME}py SHARED
37 |     ${PYTHON_SOURCE_DIR}/libclusterpy.h
38 |     ${PYTHON_SOURCE_DIR}/libclusterpy.cpp
39 |   )
40 | 
41 |   if(BUILD_USE_PYTHON3)
42 |     set(BOOST_PYTHON boost_python3)
43 |   else(BUILD_USE_PYTHON3)
44 |     set(BOOST_PYTHON boost_python)
45 |   endif(BUILD_USE_PYTHON3)
46 | 
47 |   target_link_libraries(${PROJECT_NAME}py
48 |       ${BOOST_PYTHON}
49 |       ${PYTHON_LIBRARIES}
50 |       ${Boost_LIBRARIES}
51 |       ${PROJECT_NAME}
52 |   )
53 | 
54 | 
55 |   #--------------------------------#
56 |   # Install Instructions           #
57 |   #--------------------------------#
58 | 
59 |   # Get python path
60 |   execute_process(COMMAND ${PYCMD} -c
61 |     "from distutils.sysconfig import get_python_lib; print(get_python_lib())"
62 |     OUTPUT_VARIABLE PYTHON_SITE_PACKAGES OUTPUT_STRIP_TRAILING_WHITESPACE
63 |   )
64 | 
65 |   # Install target
66 |   install(TARGETS ${PROJECT_NAME}py DESTINATION ${PYTHON_SITE_PACKAGES})
67 | 
68 | endif(BUILD_PYTHON_INTERFACE)
69 | 


--------------------------------------------------------------------------------
/python/FindNumpy.cmake:
--------------------------------------------------------------------------------
 1 | # - Find numpy
 2 | # Find the native numpy includes
 3 | # This module defines
 4 | # NUMPY_INCLUDE_DIR, where to find numpy/arrayobject.h, etc.
 5 | # NUMPY_FOUND, If false, do not try to use numpy headers.
 6 | 
 7 | # This is (modified) from the avogadro project, http://avogadro.cc (GPL)
 8 | 
 9 | if (NUMPY_INCLUDE_DIR)
10 | # in cache already
11 |     set (NUMPY_FIND_QUIETLY TRUE)
12 | endif (NUMPY_INCLUDE_DIR)
13 | 
14 | EXEC_PROGRAM ("${PYCMD}"
15 |     ARGS "-c 'import numpy; print(numpy.get_include())'"
16 |     OUTPUT_VARIABLE NUMPY_INCLUDE_DIR)
17 | 
18 | 
19 | if (NUMPY_INCLUDE_DIR MATCHES "Traceback")
20 | # Did not successfully include numpy
21 |     set(NUMPY_FOUND FALSE)
22 | else (NUMPY_INCLUDE_DIR MATCHES "Traceback")
23 | # successful
24 |     set (NUMPY_FOUND TRUE)
25 |     set (NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR} CACHE STRING "Numpy include path")
26 | endif (NUMPY_INCLUDE_DIR MATCHES "Traceback")
27 | 
28 | if (NUMPY_FOUND)
29 |     if (NOT NUMPY_FIND_QUIETLY)
30 |         message (STATUS "Numpy headers found")
31 |     endif (NOT NUMPY_FIND_QUIETLY)
32 | else (NUMPY_FOUND)
33 |     if (NUMPY_FIND_REQUIRED)
34 |         message (FATAL_ERROR "Numpy headers missing")
35 |     endif (NUMPY_FIND_REQUIRED)
36 | endif (NUMPY_FOUND)
37 | 
38 | MARK_AS_ADVANCED (NUMPY_INCLUDE_DIR)
39 | 


--------------------------------------------------------------------------------
/python/libclusterpy.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
  3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
  4 |  *
  5 |  * This file is part of libcluster.
  6 |  *
  7 |  * libcluster is free software: you can redistribute it and/or modify it under
  8 |  * the terms of the GNU Lesser General Public License as published by the Free
  9 |  * Software Foundation, either version 3 of the License, or (at your option)
 10 |  * any later version.
 11 |  *
 12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 15 |  * for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU Lesser General Public License
 18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | #include <Eigen/Dense>
 22 | #include "distributions.h"
 23 | #include "libclusterpy.h"
 24 | 
 25 | //
 26 | // Namespaces
 27 | //
 28 | 
 29 | using namespace std;
 30 | using namespace Eigen;
 31 | using namespace distributions;
 32 | using namespace libcluster;
 33 | using namespace boost::python;
 34 | using namespace boost::python::api;
 35 | 
 36 | 
 37 | //
 38 | // Private Functions
 39 | //
 40 | 
 41 | 
 42 | // Convert (memory share) a numpy array to an Eigen MatrixXd
 43 | MatrixXd numpy2MatrixXd (const object& X)
 44 | {
 45 |   if (PyArray_Check(X.ptr()) == false)
 46 |     throw invalid_argument("PyObject is not an array!");
 47 | 
 48 |   // Cast PyObject* to PyArrayObject* now we know that it's valid
 49 |   PyArrayObject* Xptr = (PyArrayObject*) X.ptr();
 50 | 
 51 |   if (PyArray_ISFLOAT(Xptr) == false)
 52 |     throw invalid_argument("PyObject is not an array of floats/doubles!");
 53 | 
 54 |   return Map<MatrixXd> ((double*) PyArray_DATA(Xptr),
 55 |                         PyArray_DIMS(Xptr)[0], PyArray_DIMS(Xptr)[1]);
 56 | }
 57 | 
 58 | 
 59 | // Convert (memory share) a list of numpy arrays to a vector of Eigen MatrixXd
 60 | vMatrixXd lnumpy2vMatrixXd (const boost::python::list& X)
 61 | {
 62 | 
 63 |   vMatrixXd X_;
 64 | 
 65 |   for (int i=0; i < len(X); ++i)
 66 |     X_.push_back(numpy2MatrixXd(X[i]));
 67 | 
 68 |   return X_;
 69 | }
 70 | 
 71 | 
 72 | // Convert (memory share) a list of lists of arrays to a vector of vectors of
 73 | //  matrices
 74 | vvMatrixXd llnumpy2vvMatrixXd (const boost::python::list& X)
 75 | {
 76 | 
 77 |   vvMatrixXd X_;
 78 | 
 79 |   for (int i=0; i < len(X); ++i)
 80 |   {
 81 |     vMatrixXd Xi_;
 82 | 
 83 |     // Compiler complains when try to use lnumpy2vmatrix instead of following
 84 |     for (int j=0; j < len(X[i]); ++j)
 85 |       Xi_.push_back(numpy2MatrixXd(X[i][j]));
 86 | 
 87 |     X_.push_back(Xi_);
 88 |   }
 89 | 
 90 |   return X_;
 91 | }
 92 | 
 93 | 
 94 | // Get all the means from Gaussian clusters, Kx[1xD] matrices
 95 | vMatrixXd getmean (const vector<GaussWish>& clusters)
 96 | {
 97 |   vMatrixXd means;
 98 | 
 99 |   for (size_t k=0; k < clusters.size(); ++k)
100 |     means.push_back(clusters[k].getmean());
101 | 
102 |   return means;
103 | }
104 | 
105 | 
106 | // Get all of the covarances of Gaussian clusters, Kx[DxD] matrices
107 | vMatrixXd getcov (const vector<GaussWish>& clusters)
108 | {
109 |   vMatrixXd covs;
110 | 
111 |   for (size_t k=0; k < clusters.size(); ++k)
112 |     covs.push_back(clusters[k].getcov());
113 | 
114 |   return covs;
115 | }
116 | 
117 | 
118 | // Get the expected cluster weights in each of the groups
119 | template<class W>
120 | vector<ArrayXd> getweights (const vector<W>& weights)
121 | {
122 |   vector<ArrayXd> rwgt;
123 |   for (size_t k=0; k < weights.size(); ++k)
124 |     rwgt.push_back(ArrayXd(weights[k].Elogweight().exp()));
125 | 
126 |   return rwgt;
127 | }
128 | 
129 | 
130 | //
131 | //  Public Wrappers
132 | //
133 | 
134 | // VDP
135 | tuple wrapperVDP (
136 |     const object& X,
137 |     const float clusterprior,
138 |     const int maxclusters,
139 |     const bool verbose,
140 |     const int nthreads
141 |     )
142 | {
143 |   // Convert X
144 |   const MatrixXd X_ = numpy2MatrixXd(X);
145 | 
146 |   // Pre-allocate some stuff
147 |   MatrixXd qZ;
148 |   StickBreak weights;
149 |   vector<GaussWish> clusters;
150 | 
151 |   // Do the clustering
152 |   double f = learnVDP(X_, qZ, weights, clusters, clusterprior, maxclusters,
153 |                       verbose, nthreads);
154 | 
155 |   // Return relevant objects
156 |   return make_tuple(f, qZ, ArrayXd(weights.Elogweight().exp()),
157 |                     getmean(clusters), getcov(clusters));
158 | }
159 | 
160 | 
161 | // BGMM
162 | tuple wrapperBGMM (
163 |     const object& X,
164 |     const float clusterprior,
165 |     const int maxclusters,
166 |     const bool verbose,
167 |     const int nthreads
168 |     )
169 | {
170 |   // Convert X
171 |   const MatrixXd X_ = numpy2MatrixXd(X);
172 | 
173 |   // Pre-allocate some stuff
174 |   MatrixXd qZ;
175 |   Dirichlet weights;
176 |   vector<GaussWish> clusters;
177 | 
178 |   // Do the clustering
179 |   double f = learnBGMM(X_, qZ, weights, clusters, clusterprior, maxclusters,
180 |                        verbose, nthreads);
181 | 
182 |   // Return relevant objects
183 |   return make_tuple(f, qZ, ArrayXd(weights.Elogweight().exp()),
184 |                     getmean(clusters), getcov(clusters));
185 | }
186 | 
187 | 
188 | // GMC
189 | tuple wrapperGMC (
190 |     const boost::python::list &X,
191 |     const float clusterprior,
192 |     const int maxclusters,
193 |     const bool sparse,
194 |     const bool verbose,
195 |     const int nthreads
196 |     )
197 | {
198 |   // Convert X
199 |   const vMatrixXd X_ = lnumpy2vMatrixXd(X);
200 | 
201 |   // Pre-allocate some stuff
202 |   vMatrixXd qZ;
203 |   vector<GDirichlet> weights;
204 |   vector<GaussWish> clusters;
205 | 
206 |   // Do the clustering
207 |   double f = learnGMC(X_, qZ, weights, clusters, clusterprior, maxclusters,
208 |                       sparse, verbose, nthreads);
209 | 
210 |   // Return relevant objects
211 |   return make_tuple(f, qZ, getweights<GDirichlet>(weights), getmean(clusters),
212 |                     getcov(clusters));
213 | }
214 | 
215 | 
216 | // SGMC
217 | tuple wrapperSGMC (
218 |     const boost::python::list &X,
219 |     const float clusterprior,
220 |     const int maxclusters,
221 |     const bool sparse,
222 |     const bool verbose,
223 |     const int nthreads
224 |     )
225 | {
226 |   // Convert X
227 |   const vMatrixXd X_ = lnumpy2vMatrixXd(X);
228 | 
229 |   // Pre-allocate some stuff
230 |   vMatrixXd qZ;
231 |   vector<Dirichlet> weights;
232 |   vector<GaussWish> clusters;
233 | 
234 |   // Do the clustering
235 |   double f = learnSGMC(X_, qZ, weights, clusters, clusterprior, maxclusters, 
236 |                        sparse, verbose, nthreads);
237 | 
238 |   // Return relevant objects
239 |   return make_tuple(f, qZ, getweights<Dirichlet>(weights), getmean(clusters),
240 |                     getcov(clusters));
241 | }
242 | 
243 | 
244 | // SCM
245 | tuple wrapperSCM (
246 |     const boost::python::list &X,
247 |     const float dirprior,
248 |     const float gausprior,
249 |     const int trunc,
250 |     const int maxclusters,
251 |     const bool verbose,
252 |     const int nthreads
253 |     )
254 | {
255 |   // Convert X
256 |   const vvMatrixXd X_ = llnumpy2vvMatrixXd(X);
257 | 
258 |   // Pre-allocate some stuff
259 |   vMatrixXd qY;
260 |   vvMatrixXd qZ;
261 |   vector<GDirichlet> weights_j;
262 |   vector<Dirichlet> weights_t;
263 |   vector<GaussWish> clusters;
264 | 
265 |   // Do the clustering
266 |   double f = learnSCM(X_, qY, qZ, weights_j, weights_t, clusters, dirprior,
267 |                       gausprior, trunc, maxclusters, verbose, nthreads);
268 | 
269 |   // Return relevant objects
270 |   return make_tuple(f, qY, qZ, getweights<GDirichlet>(weights_j),
271 |          getweights<Dirichlet>(weights_t), getmean(clusters), getcov(clusters));
272 | }
273 | 
274 | 
275 | // MCM
276 | tuple wrapperMCM (
277 |     const boost::python::list &W,
278 |     const boost::python::list &X,
279 |     const float gausprior_t,
280 |     const float gausprior_k,
281 |     const int trunc,
282 |     const int maxclusters,
283 |     const bool verbose,
284 |     const int nthreads
285 |     )
286 | {
287 |   // Convert W and X
288 |   const vMatrixXd W_ = lnumpy2vMatrixXd(W);
289 |   const vvMatrixXd X_ = llnumpy2vvMatrixXd(X);
290 | 
291 |   // Pre-allocate some stuff
292 |   vMatrixXd qY;
293 |   vvMatrixXd qZ;
294 |   vector<GDirichlet> weights_j;
295 |   vector<Dirichlet> weights_t;
296 |   vector<GaussWish> clusters_t;
297 |   vector<GaussWish> clusters_k;
298 | 
299 |   // Do the clustering
300 |   double f = learnMCM(W_, X_, qY, qZ, weights_j, weights_t, clusters_t, 
301 |                 clusters_k,  gausprior_t, gausprior_k, trunc, maxclusters,
302 |                 verbose, nthreads);
303 | 
304 |   // Return relevant objects
305 |   return make_tuple(f, qY, qZ, getweights<GDirichlet>(weights_j),
306 |                 getweights<Dirichlet>(weights_t), getmean(clusters_t), 
307 |                 getmean(clusters_k), getcov(clusters_t), getcov(clusters_k));
308 | }
309 | 


--------------------------------------------------------------------------------
/python/libclusterpy.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
  3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
  4 |  *
  5 |  * This file is part of libcluster.
  6 |  *
  7 |  * libcluster is free software: you can redistribute it and/or modify it under
  8 |  * the terms of the GNU Lesser General Public License as published by the Free
  9 |  * Software Foundation, either version 3 of the License, or (at your option)
 10 |  * any later version.
 11 |  *
 12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 15 |  * for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU Lesser General Public License
 18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | #ifndef LIBCLUSTERPY_H
 22 | #define LIBCLUSTERPY_H
 23 | 
 24 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION  // Test deprication for v1.7
 25 | 
 26 | #include <omp.h>
 27 | #include <boost/python.hpp>
 28 | #include <numpy/arrayobject.h>
 29 | #include "libcluster.h"
 30 | 
 31 | 
 32 | //
 33 | //  To-python type converters
 34 | //
 35 | 
 36 | // Eigen::MatrixXd/ArrayXd (double) to numpy array ([[...]])
 37 | template<typename M>
 38 | struct eigen2numpy
 39 | {
 40 |   static PyObject* convert (const M& X)
 41 |   {
 42 |     npy_intp arsize[] = {X.rows(), X.cols()};
 43 |     M* X_ = new M(X); // Copy to persistent array
 44 |     PyObject* Xp = PyArray_SimpleNewFromData(2, arsize, NPY_DOUBLE, X_->data());
 45 | 
 46 |     if (Xp == NULL)
 47 |       throw std::runtime_error("Cannot convert Eigen matrix to Numpy array!");
 48 | 
 49 |     return Xp;
 50 |   }
 51 | };
 52 | 
 53 | 
 54 | // std::vector<Something> to python list [...].
 55 | template<typename M>
 56 | struct vector2list
 57 | {
 58 |   static PyObject* convert (const std::vector<M>& X)
 59 |   {
 60 |     boost::python::list* Xp = new boost::python::list();
 61 | 
 62 |     for (size_t i = 0; i < X.size(); ++i)
 63 |       Xp->append(X[i]);
 64 | 
 65 |     return Xp->ptr();
 66 |   }
 67 | };
 68 | 
 69 | 
 70 | //
 71 | //  Wrappers
 72 | //
 73 | 
 74 | // VDP
 75 | boost::python::tuple wrapperVDP (
 76 |     const boost::python::api::object& X,
 77 |     const float clusterprior,
 78 |     const int maxclusters,
 79 |     const bool verbose,
 80 |     const int nthreads
 81 |     );
 82 | 
 83 | 
 84 | // BGMM
 85 | boost::python::tuple wrapperBGMM (
 86 |     const boost::python::api::object& X,
 87 |     const float clusterprior,
 88 |     const int maxclusters,
 89 |     const bool verbose,
 90 |     const int nthreads
 91 |     );
 92 | 
 93 | 
 94 | // GMC
 95 | boost::python::tuple wrapperGMC (
 96 |     const boost::python::list& X,
 97 |     const float clusterprior,
 98 |     const int maxclusters,
 99 |     const bool sparse,
100 |     const bool verbose,
101 |     const int nthreads
102 |     );
103 | 
104 | 
105 | // SGMC
106 | boost::python::tuple wrapperSGMC (
107 |     const boost::python::list& X,
108 |     const float clusterprior,
109 |     const int maxclusters,
110 |     const bool sparse,
111 |     const bool verbose,
112 |     const int nthreads
113 |     );
114 | 
115 | 
116 | // SCM
117 | boost::python::tuple wrapperSCM (
118 |     const boost::python::list& X,
119 |     const float dirprior,
120 |     const float gausprior,
121 |     const int trunc,
122 |     const int maxclusters,
123 |     const bool verbose,
124 |     const int nthreads
125 |     );
126 | 
127 | 
128 | // MCM
129 | boost::python::tuple wrapperMCM (
130 |     const boost::python::list& W,
131 |     const boost::python::list& X,
132 |     const float gausprior_t,
133 |     const float gausprior_k,
134 |     const int trunc,
135 |     const int maxclusters,
136 |     const bool verbose,
137 |     const int nthreads
138 |     );
139 | 
140 | 
141 | //
142 | //  Hack for python2/3 numpy return value weirdness
143 | //
144 | 
145 | #if PY_MAJOR_VERSION >= 3
146 | int*
147 | #else
148 | void
149 | #endif
150 | init_numpy()
151 | {
152 |     import_array();
153 | #if PY_MAJOR_VERSION >= 3
154 |     return NULL;
155 | #endif
156 | } 
157 | 
158 | 
159 | //
160 | //  Module definition
161 | //
162 | 
163 | BOOST_PYTHON_MODULE (libclusterpy)
164 | {
165 |   using namespace boost::python;
166 | 
167 |   // This will enable user-defined docstrings and python signatures,
168 |   // while disabling the C++ signatures
169 |   docstring_options local_docstring_options(true, true, false);
170 | 
171 | 
172 |   // set the docstring of the current module scope
173 |   const std::string moddoc =
174 |     "A collection of structured Bayesian clustering algorithms.\n\n"
175 |     "This library contains implementations of a number of variational\n"
176 |     "Bayesian clustering algorithms such as the Bayesian Gaussian Mixture\n"
177 |     "model of [1], and the Variational Dirichlet process of [2]. Also \n"
178 |     "implemented is a latent Dirichlet allocation-like model with a \n"
179 |     "Gaussian observation model (GMC [4], SGMC/G-LDA [3, 4, 5]), and more\n"
180 |     "highly structured models -- see the SCM and MCM functions [3, 4, 5].\n\n"
181 |     "Author: Daniel Steinberg\n"
182 |     "\tAustralian Centre for Field Robotics,\n"
183 |     "\tThe University of Sydney.\n\n"
184 |     "Date: 11/03/2013\n\n"
185 |     "License: GPL v3 or later, See LICENSE.\n\n"
186 |     " [1] C. M. Bishop, Pattern Recognition and Machine Learning. Cambridge,\n"
187 |     "\tUK: pringer Science+Business Media, 2006.\n"
188 |     " [2] K. Kurihara, M. Welling, and N. Vlassis, Accelerated variational\n"
189 |     "\tDirichlet process mixtures, Advances in Neural Information Processing\n"
190 |     "\tSystems, vol. 19, p. 761, 2007.\n"
191 |     " [3] D. M. Steinberg, O. Pizarro, S. B. Williams, Synergistic Clustering\n"
192 |     "\tof Image and Segment Descriptors for Unsupervised Scene Understanding.\n"
193 |     "\tIn International Conference on Computer Vision (ICCV). IEEE, Sydney,\n"
194 |     "\tNSW, 2013.\n" 
195 |     " [4] D. M. Steinberg, O. Pizarro, S. B. Williams. Hierarchical\n"
196 |     "\tBayesian Models for Unsupervised Scene Understanding. Journal of\n"
197 |     "\tComputer Vision and Image Understanding (CVIU). Elsevier, 2014.\n"    
198 |     " [5] D. M. Steinberg, An Unsupervised Approach to Modelling Visual Data,\n"
199 |     "\tPhD Thesis, 2013.\n"
200 |     " [6] D. M. Steinberg, A. Friedman, O. Pizarro, and S. B. Williams.\n"
201 |     "\tA Bayesian nonparametric approach to clustering data from underwater\n"
202 |     "\trobotic surveys. In International Symposium on Robotics Research,\n"
203 |     "\tFlagstaff, AZ, Aug. 2011.";
204 |   scope().attr("__doc__") = moddoc;
205 | 
206 | 
207 |   // To-python converters
208 |   init_numpy();
209 |   to_python_converter< Eigen::ArrayXd, eigen2numpy<Eigen::ArrayXd> >();
210 |   to_python_converter< Eigen::MatrixXd, eigen2numpy<Eigen::MatrixXd> >();
211 |   to_python_converter< std::vector<Eigen::ArrayXd>,
212 |                        vector2list<Eigen::ArrayXd> >();
213 |   to_python_converter< std::vector<Eigen::MatrixXd>,
214 |                        vector2list<Eigen::MatrixXd> >();
215 |   to_python_converter< std::vector< std::vector<Eigen::MatrixXd> >,
216 |                        vector2list< std::vector<Eigen::MatrixXd> > >();
217 | 
218 | 
219 |   // Common documentation strings -- arguments
220 |   const std::string comargs = "\nArguments:\n";
221 |   const std::string Xarg =
222 |     "\tX: array shape(N,D) the data to be clustered, N are the number of \n"
223 |     "\t\tsamples, D the number of dimensions.\n";
224 |   const std::string vXarg =
225 |     "\tX: list[array shape(N_j,D),...] of len = J which is the data to be\n"
226 |     "\t\tclustered, N_j are the number of samples of each group (or list \n"
227 |     "\t\telement) j of data, D the number of dimensions.\n";
228 |   const std::string vvXarg =
229 |     "\tX: list[list[array shape(N_j,D_b),...]] where the outer list is of\n" 
230 |     "\t\tlen = J, and each inner list is of len = I_j. This is the\n"
231 |     "\t\t(bottom-level) data to be clustered, N_ji are the number of samples\n"
232 |     "\t\tof each 'document/image' (ji) within each group (j) of data. D_b is\n"
233 |     "\t\tthe number of dimensions.\n";
234 |   const std::string truncarg = 
235 |     "\ttrunc: the maximum number of top-level clusters to find. This is the \n"
236 |     "\t\ttruncation level, and mostly less top-level clusters than this will\n"
237 |     "\t\tbe returned.\n"; 
238 |   const std::string maxclustersarg = 
239 |     "\tmaxclusters: the maximum number of bottom level clusters to search \n"
240 |     "\t\tfor, -1 (default) means no upper bound.\n";
241 |   const std::string priorarg =
242 |     "\tprior: the prior width of the Gaussian clusters.\n";
243 |   const std::string priorkarg =
244 |     "\tgausprior_k: the prior width of the bottom-level Gaussian clusters.\n";
245 |   const std::string sparsearg =
246 |     "\tsparse: do sparse updates? I.e. only update the clusters that have\n"
247 |     "\t\tmore than one observation.\n";
248 |   const std::string verbarg =
249 |     "\tverbose: output clustering status?\n";
250 |   const std::string threadarg =
251 |     "\tthreads: the number of threads to use.\n";
252 | 
253 |   // Common documentation strings -- returns
254 |   const std::string comrets = "\nReturns:\n";
255 |   const std::string fret =
256 |     "\tf: float, the free energy learning objective value.\n";
257 |   const std::string qZret =
258 |     "\tqZ: array shape(N,K), the probability of the observations belonging to\n"
259 |     "\t\teach cluster, where K is the number of discovered clusters.\n";
260 |   const std::string vqZret =
261 |     "\tqZ: list[array shape(N_j,K),...] of len = J, the probability of the\n"
262 |     "\t\tobservations in group j belonging to each cluster. Here K is the\n"
263 |     "\t\tnumber of discovered clusters.\n";
264 |   const std::string vvqZret =
265 |     "\tqZ: list[list[array shape(N_j,K),...]] with the outer list of len = J,\n"
266 |     "\t\tand each inner list of len = I_j. This is the probability of the\n"
267 |     "\t\tbottom-level observations belonging to each cluster. Here K is the\n"
268 |     "\t\tnumber of discovered bottom-level clusters.\n";
269 |   const std::string vqYret =
270 |     "\tqY: list[array shape(N_j,T),...] of len = J, the probability of the\n"
271 |     "\t\t'documents' in group j belonging to each top-level cluster. Here T\n"
272 |     "\t\tis the number of discovered top-level clusters.\n";
273 |   const std::string wret =
274 |     "\tw: array shape(K,1), the (expected) Gaussian mixture weights.\n";
275 |   const std::string vwret =
276 |     "\tw_j: list[array shape(K,1),...] of len = J, the (expected) Gaussian\n"
277 |     "\t\tmixture weights of each group, j.\n";
278 |   const std::string vwjret =
279 |     "\tw_j: list[array shape(T,1),...] of len = J, the (expected) top-level\n"
280 |     "\t\tcluster weights of each group, j.\n";
281 |   const std::string vwtret =
282 |     "\tw_t: list[array shape(K,1),...] of len = T, the (expected) Gaussian\n"
283 |     "\t\tmixture weights of each bottom-level cluster within each of the T\n"
284 |     "\t\ttop-level clusters.\n";
285 |   const std::string muret =
286 |     "\tmu: array shape(K,D), the (expected) Gaussian mixture means.\n";
287 |   const std::string covret =
288 |     "\tcov: list[array shape(D,D),...] of len = K, the (expected) Gaussian\n"
289 |     "\t\t mixture covariances.\n";
290 |   const std::string mukret =
291 |     "\tmu_k: array shape(K,D_b), the (expected) bottom-level Gaussian mixture\n"
292 |     "\t\tmeans.\n";
293 |   const std::string covkret =
294 |     "\tcov_k: list[array shape(D_b,D_b),...] of len = K, the (expected)\n"
295 |     "\t\tbottom-level Gaussian mixture covariances.\n";
296 | 
297 | 
298 |   // VDP
299 |   const std::string vdpdoc =
300 |     "The Variational Dirichlet Process (VDP) of [2].\n\n"
301 |     "The VDP is similar to a regular Bayesian GMM, but places a Dirichlet\n"
302 |     "process prior over the mixture weights. This is also used in [6].\n"
303 |     + comargs + Xarg + priorarg + maxclustersarg + verbarg + threadarg
304 |     + comrets + fret + qZret + wret + muret + covret;
305 | 
306 |   def ("learnVDP", wrapperVDP,
307 |          (
308 |            arg("X"),
309 |            arg("prior") = libcluster::PRIORVAL,
310 |            arg("maxclusters") = -1,
311 |            arg("verbose") = false,
312 |            arg("threads") = omp_get_max_threads()
313 |          ),
314 |          vdpdoc.c_str()
315 |       );
316 | 
317 | 
318 |   // BGMM
319 |   const std::string bgmmdoc =
320 |     "The Bayseian Gaussian mixture model (BGMM) described in [1].\n\n"
321 |     "This BGMM is similar to a GMM learned with EM, but it places a\n"
322 |     "Dirichlet prior over the mixture weights, and Gaussian-Wishart priors\n"
323 |     "over the Gaussian clusters. This implementation is similar to [1] but\n"
324 |     "also employes the cluster splitting heuristics discussed in [2-5].\n"
325 |     + comargs + Xarg + priorarg + maxclustersarg + verbarg + threadarg
326 |     + comrets + fret + qZret + wret + muret + covret;
327 | 
328 |   def ("learnBGMM", wrapperBGMM,
329 |          (
330 |            arg("X"),
331 |            arg("prior") = libcluster::PRIORVAL,
332 |            arg("maxclusters") = -1,
333 |            arg("verbose") = false,
334 |            arg("threads") = omp_get_max_threads()
335 |          ),
336 |          bgmmdoc.c_str()
337 |       );
338 | 
339 | 
340 |   // GMC
341 |   const std::string gmcdoc =
342 |    "The Grouped Mixtures Clustering (GMC) algorithm.\n\n"
343 |    "This function uses the Grouped Mixtures Clustering model [5] to cluster\n"
344 |    "multiple datasets simultaneously with cluster sharing between datasets.\n"
345 |    "It uses a Generalised Dirichlet prior over the group mixture weights, and\n"
346 |    "a Gaussian-Wishart prior over the cluster parameters. This algorithm is\n"
347 |    "similar to a one-level Hierarchical Dirichlet process with Gaussian\n"
348 |    "observations.\n"
349 |    + comargs + vXarg + priorarg + maxclustersarg+ sparsearg + verbarg 
350 |    + threadarg
351 |    + comrets + fret + vqZret + vwret + muret + covret;
352 | 
353 |   def ("learnGMC", wrapperGMC,
354 |          (
355 |            arg("X"),
356 |            arg("prior") = libcluster::PRIORVAL,
357 |            arg("maxclusters") = -1,
358 |            arg("sparse") = false,
359 |            arg("verbose") = false,
360 |            arg("threads") = omp_get_max_threads()
361 |          ),
362 |          gmcdoc.c_str()
363 |       );
364 | 
365 | 
366 |   // SGMC
367 |   const std::string sgmcdoc =
368 |     "The Symmetric Grouped Mixtures Clustering (S-GMC) algorithm.\n\n"
369 |     "This function uses the Symmetric Grouped Mixtures Clustering model [5]\n"
370 |     "to cluster multiple datasets simultaneously with cluster sharing between\n"
371 |     "datasets. It uses a symmetric Dirichlet prior over the group mixture\n"
372 |     "weights, and a Gaussian-Wishart prior over the cluster parameters. This\n"
373 |     "algorithm is similar to latent Dirichlet allocation with Gaussian\n"
374 |     "observations.\n\n"
375 |     "It is also referred to as Gaussian Latent Dirichlet Allocation (G-LDA)\n"
376 |     "in [3, 4].\n"
377 |     + comargs + vXarg + priorarg + maxclustersarg + sparsearg + verbarg 
378 |     + threadarg
379 |     + comrets + fret + vqZret + vwret + muret + covret;
380 | 
381 |   def ("learnSGMC", wrapperSGMC,
382 |          (
383 |            arg("X"),
384 |            arg("prior") = libcluster::PRIORVAL,
385 |            arg("maxclusters") = -1,
386 |            arg("sparse") = false,
387 |            arg("verbose") = false,
388 |            arg("threads") = omp_get_max_threads()
389 |          ),
390 |          sgmcdoc.c_str()
391 |       );
392 | 
393 | 
394 |   // SCM
395 |   const std::string dpriorarg = 
396 |     "\tdirprior: The top-level Dirichlet prior. This affects the number of\n"
397 |     "\t\tclusters found. This may need to turned up high to have an effect.\n";
398 | 
399 |   const std::string scmdoc =
400 |     "The Simultaneous Clustering Model (SCM).\n\n"
401 |     "This function implements the Simultaneous Clustering Model algorithm as\n"
402 |     "specified by [4, 5]. The SCM uses a Generalised Dirichlet prior on the\n"
403 |     "group mixture weights, a Dirichlet prior on the top-level clusters and\n"
404 |     "Gaussian bottom-level cluster distributions for observations (with\n"
405 |     "Gausian-Wishart priors).\n"
406 |     + comargs + vvXarg + dpriorarg + priorkarg + truncarg + maxclustersarg
407 |     + verbarg + threadarg
408 |     + comrets + fret + vqYret + vvqZret + vwjret + vwtret + mukret + covkret;
409 | 
410 |   def ("learnSCM", wrapperSCM,
411 |          (
412 |            arg("X"),
413 |            arg("dirprior") = libcluster::PRIORVAL,
414 |            arg("gausprior") = libcluster::PRIORVAL,
415 |            arg("trunc") = libcluster::TRUNC,
416 |            arg("maxclusters") = -1,
417 |            arg("verbose") = false,
418 |            arg("threads") = omp_get_max_threads()
419 |          ),
420 |          scmdoc.c_str()
421 |        );
422 | 
423 | 
424 |   // MCM
425 |   const std::string vWarg =
426 |     "\tW: list[array shape(I_j,D_t),...] of len = J which is the top-level\n"
427 |     "\t\t ('document') data to be clustered, I_j are the number of documents\n"
428 |     "\t\tin each group (or list element) j of data, D_t the number of\n"
429 |     "\t\tdimensions.\n";
430 |   const std::string priortarg =
431 |     "\tgausprior_t: the prior width of the top-level Gaussian clusters.\n";
432 |   const std::string mutret =
433 |     "\tmu_t: array shape(T,D_t), the (expected) top-level Gaussian mixture\n"
434 |     "\t\tmeans.\n";
435 |   const std::string covtret =
436 |     "\tcov_t: list[array shape(D_t,D_t),...] of len = T, the (expected)\n"
437 |     "\t\ttop-level Gaussian mixture covariances.\n";
438 | 
439 |   const std::string mcmdoc = 
440 |     "The Multiple-source Clustering Model (MCM).\n\n"
441 |     "This function implements the Multiple-source Clustering Model algorithm\n"
442 |     "as specified by [3-5]. This model jointly cluster both 'document'\n" 
443 |     "level observations, and 'word' observations. The MCM uses a Generalised\n"
444 |     "Dirichlet prior on the group mixture weights, Multinomial-Gaussian \n"
445 |     "top-level (document) clusters, and Gaussian bottom-level (word) cluster\n"
446 |     "distributions.\n"
447 |     + comargs + vWarg + vvXarg  + priortarg + priorkarg + truncarg 
448 |     + maxclustersarg + verbarg + threadarg
449 |     + comrets + fret + vqYret + vvqZret + vwjret + vwtret + mutret + mukret 
450 |     + covtret + covkret;
451 | 
452 |   def ("learnMCM", wrapperMCM,
453 |          (
454 |            arg("W"),
455 |            arg("X"),
456 |            arg("gausprior_t") = libcluster::PRIORVAL,
457 |            arg("gausprior_k") = libcluster::PRIORVAL,
458 |            arg("trunc") = libcluster::TRUNC,
459 |            arg("maxclusters") = -1,
460 |            arg("verbose") = false,
461 |            arg("threads") = omp_get_max_threads()
462 |          ),
463 |          mcmdoc.c_str()
464 |        );
465 | 
466 | }
467 | 
468 | #endif
469 | 


--------------------------------------------------------------------------------
/python/testapi.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | # libcluster -- A collection of hierarchical Bayesian clustering algorithms.
  4 | # Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
  5 | #
  6 | # This file is part of libcluster.
  7 | #
  8 | # libcluster is free software: you can redistribute it and/or modify it under
  9 | # the terms of the GNU Lesser General Public License as published by the Free
 10 | # Software Foundation, either version 3 of the License, or (at your option)
 11 | # any later version.
 12 | #
 13 | # libcluster is distributed in the hope that it will be useful, but WITHOUT
 14 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 15 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 16 | # for more details.
 17 | #
 18 | # You should have received a copy of the GNU Lesser General Public License
 19 | # along with libcluster. If not, see <http://www.gnu.org/licenses/>.
 20 | 
 21 | """ Script to make sure libcluster runs properly using the python API.
 22 | 
 23 |     Author: Daniel Steinberg
 24 |     Date:   13/10/2013
 25 | 
 26 | """
 27 | 
 28 | import numpy as np
 29 | import libclusterpy as lc
 30 | 
 31 | 
 32 | # Top level cluster parameters -- Globals.... whatev...
 33 | means = np.array([[0, 0], [5, 5], [-5, -5]])
 34 | sigma = [np.eye(2)] * 3
 35 | beta = np.array([[1.0 / 3, 1.0 / 3, 1.0 / 3],
 36 |                  [1.0 / 2, 1.0 / 4, 1.0 / 4],
 37 |                  [1.0 / 4, 1.0 / 4, 1.0 / 2]])
 38 | 
 39 | 
 40 | def testmixtures():
 41 |     """ The test function. """
 42 | 
 43 |     print("Testing mixtures ------------------\n")
 44 | 
 45 |     # Create points from clusters
 46 |     W = gengmm(10000)
 47 | 
 48 |     # Test VDP
 49 |     print("------------ Test VDP -------------")
 50 |     f, qZ, w, mu, cov = lc.learnVDP(W, verbose=True)
 51 |     print("")
 52 |     printgmm(w, mu, cov)
 53 | 
 54 |     # Test BGMM
 55 |     print("------------ Test BGMM ------------")
 56 |     f, qZ, w, mu, cov = lc.learnBGMM(W, verbose=True)
 57 |     print("")
 58 |     printgmm(w, mu, cov)
 59 | 
 60 | 
 61 | def testgroupmix():
 62 | 
 63 |     print("Testing group mixtures ------------\n")
 64 | 
 65 |     # Create points from clusters
 66 |     J = 4   # Groups
 67 |     W = [gengmm(2000) for j in range(J)]
 68 | 
 69 |     # Test GMC
 70 |     print("------------ Test GMC -------------")
 71 |     f, qZ, w, mu, cov = lc.learnGMC(W, verbose=True)
 72 |     print("")
 73 |     printgmm(w, mu, cov)
 74 | 
 75 |     # Test SGMC
 76 |     print("------------ Test SGMC ------------")
 77 |     f, qZ, w, mu, cov = lc.learnSGMC(W, verbose=True)
 78 |     print("")
 79 |     printgmm(w, mu, cov)
 80 | 
 81 | 
 82 | def testmultmix():
 83 |     """ The the models that cluster at multiple levels. Just using J=1. """
 84 | 
 85 |     # Generate top-level clusters
 86 |     I = 200
 87 |     Ni = 100
 88 |     betas, Y = gensetweights(I)
 89 | 
 90 |     # Create points from clusters
 91 |     W = np.zeros((I, means.shape[1]))
 92 |     X = []
 93 |     for i in range(I):
 94 |         W[i, :] = np.random.multivariate_normal(means[Y[i]], sigma[Y[i]], 1)
 95 |         X.append(gengmm(Ni, betas[i, :]))
 96 | 
 97 |     # Test SCM
 98 |     print("------------ Test SCM -------------")
 99 |     f, qY, qZ, wi, ws, mu, cov = lc.learnSCM([X], trunc=30, verbose=True)
100 |     print("")
101 |     printgmm(ws, mu, cov)
102 | 
103 |     # Test MCM
104 |     print("------------ Test MCM -------------")
105 |     f, qY, qZ, wi, ws, mui, mus, covi, covs = lc.learnMCM([W], [X], trunc=30,
106 |                                                           verbose=True)
107 |     print("\nTop level mixtures:")
108 |     printgmm(wi, mui, covi)
109 |     print("Bottom level mixtures:")
110 |     printgmm(ws, mus, covs)
111 | 
112 | 
113 | def gengmm(N, weights=None):
114 |     """ Make a random GMM with N observations. """
115 | 
116 |     K = len(sigma)
117 |     pi = np.random.rand(K) if weights is None else weights
118 |     pi /= pi.sum()
119 |     Nk = np.round(pi * N)
120 |     Nk[-1] = N - Nk[0:-1].sum()
121 | 
122 |     X = [np.random.multivariate_normal(means[k, :], sigma[k], int(Nk[k]))
123 |          for k in range(K)]
124 | 
125 |     return np.concatenate(X)
126 | 
127 | 
128 | def gensetweights(I):
129 |     """ Generate sets of similar weights. """
130 | 
131 |     T = beta.shape[0]
132 |     pi = np.random.rand(T)
133 |     pi /= pi.sum()
134 |     Nt = np.round(pi * I)
135 |     Nt[-1] = I - Nt[0:-1].sum()
136 | 
137 |     betas = []
138 |     Y = []
139 |     for t in range(T):
140 |         Y += int(Nt[t]) * [t]
141 |         betas.append(int(Nt[t]) * [beta[t, :]])
142 | 
143 |     return np.concatenate(betas), Y
144 | 
145 | 
146 | def printgmm(W, Mu, Cov):
147 |     """ Print the parameters of a GMM. """
148 | 
149 |     Wnp = np.array(W)
150 | 
151 |     for i, (mu, cov) in enumerate(zip(Mu, Cov)):
152 | 
153 |         print("Mixture {0}:".format(i))
154 |         if Wnp.ndim == 2:
155 |             print(" weight --\n{0}".format(Wnp[i, :]))
156 |         elif Wnp.ndim == 3:
157 |             print(" group weights --\n{0}".format(Wnp[:, i, :]))
158 |         print(" mean --\n{0}\n cov --\n{1}\n".format(mu, cov))
159 | 
160 | 
161 | if __name__ == "__main__":
162 |     testmixtures()
163 |     testgroupmix()
164 |     testmultmix()
165 | 


--------------------------------------------------------------------------------
/src/cluster.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
  3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
  4 |  *
  5 |  * This file is part of libcluster.
  6 |  *
  7 |  * libcluster is free software: you can redistribute it and/or modify it under
  8 |  * the terms of the GNU Lesser General Public License as published by the Free
  9 |  * Software Foundation, either version 3 of the License, or (at your option)
 10 |  * any later version.
 11 |  *
 12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 15 |  * for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU Lesser General Public License
 18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | // TODO:
 22 | //  - sparse updates sometimes create positive free energy steps.
 23 | 
 24 | #include <limits>
 25 | #include "libcluster.h"
 26 | #include "probutils.h"
 27 | #include "distributions.h"
 28 | #include "comutils.h"
 29 | 
 30 | 
 31 | //
 32 | // Namespaces
 33 | //
 34 | 
 35 | using namespace std;
 36 | using namespace Eigen;
 37 | using namespace probutils;
 38 | using namespace distributions;
 39 | using namespace comutils;
 40 | using namespace libcluster;
 41 | 
 42 | 
 43 | //
 44 | // Variational Bayes Private Functions
 45 | //
 46 | 
 47 | 
 48 | /* Update the group and model sufficient statistics based on assignments qZj.
 49 |  *
 50 |  *  mutable: the clusters (add sufficient stats).
 51 |  *  returns: the number of observations in each cluster for this groups.
 52 |  */
 53 | template <class C> ArrayXd updateSS (
 54 |     const MatrixXd& Xj,   // Observations in group j
 55 |     const MatrixXd& qZj,  // Observations to group mixture assignments
 56 |     vector<C>& clusters,  // Cluster Distributions
 57 |     const bool sparse     // Do sparse updates to groups
 58 |     )
 59 | {
 60 |   const unsigned int K = qZj.cols();
 61 | 
 62 |   const ArrayXd Njk = qZj.colwise().sum();  // count obs. in this group
 63 |   ArrayXi Kful = ArrayXi::Zero(1),          // Initialise and set K = 1 defaults
 64 |           Kemp = ArrayXi::Zero(0);
 65 | 
 66 |   // Find empty clusters if sparse
 67 |   if ( (sparse == false) && (K > 1) )
 68 |     Kful = ArrayXi::LinSpaced(Sequential, K, 0, K-1);
 69 |   else if (sparse == true)
 70 |     arrfind((Njk >= ZEROCUTOFF), Kful, Kemp);
 71 | 
 72 |   const unsigned int nKful = Kful.size();
 73 | 
 74 |   // Sufficient statistics - with observations
 75 |   for (unsigned int k = 0; k < nKful; ++k)
 76 |   {
 77 |     #pragma omp critical
 78 |     clusters[Kful(k)].addobs(qZj.col(Kful(k)), Xj);
 79 |   }
 80 | 
 81 |   return Njk;
 82 | }
 83 | 
 84 | 
 85 | /* The Variational Bayes Expectation step for each group.
 86 |  *
 87 |  *  mutable: Group assignment probabilities, qZj
 88 |  *  returns: The complete-data (X,Z) free energy E[log p(X,Z)/q(Z)] for group j.
 89 |  *  throws: invalid_argument rethrown from other functions.
 90 |  */
 91 | template <class W, class C> double vbexpectation (
 92 |     const MatrixXd& Xj,         // Observations in group J
 93 |     const W& weights,           // Group Weight parameter distribution
 94 |     const vector<C>& clusters,  // Cluster parameter distributions
 95 |     MatrixXd& qZj,              // Observations to group mixture assignments
 96 |     const bool sparse           // Do sparse updates to groups
 97 |     )
 98 | {
 99 |   const int K  = clusters.size(),
100 |             Nj = Xj.rows();
101 | 
102 |   // Get log marginal weight likelihoods
103 |   const ArrayXd E_logZ = weights.Elogweight();
104 | 
105 |   // Initialise and set K = 1 defaults for cluster counts
106 |   ArrayXi Kful = ArrayXi::Zero(1), Kemp = ArrayXi::Zero(0);
107 | 
108 |   // Find empty clusters if sparse
109 |   if ( (sparse == false) && (K > 1) )
110 |     Kful = ArrayXi::LinSpaced(Sequential, K, 0, K-1);
111 |   else if (sparse == true)
112 |     arrfind((weights.getNk() >= ZEROCUTOFF), Kful, Kemp);
113 | 
114 |   const int nKful = Kful.size(),
115 |             nKemp = Kemp.size();
116 | 
117 |   // Find Expectations of log joint observation probs -- allow sparse evaluation
118 |   MatrixXd logqZj(Nj, nKful);
119 | 
120 |   for (int k = 0; k < nKful; ++k)
121 |     logqZj.col(k) = E_logZ(Kful(k)) + clusters[Kful(k)].Eloglike(Xj).array();
122 | 
123 |   // Log normalisation constant of log observation likelihoods
124 |   const VectorXd logZzj = logsumexp(logqZj);
125 | 
126 |   // Make sure qZ is the right size, this is a nop if it is
127 |   qZj.resize(Nj, K);
128 | 
129 |   // Normalise and Compute Responsibilities -- again allow sparse evaluation
130 |   for (int k = 0; k < nKful; ++k)
131 |     qZj.col(Kful(k)) = ((logqZj.col(k) - logZzj).array().exp()).matrix();
132 | 
133 |   // Empty Cluster Responsabilities
134 |   for (int k = 0; k < nKemp; ++k)
135 |     qZj.col(Kemp(k)).setZero();
136 | 
137 |   return -logZzj.sum();
138 | }
139 | 
140 | 
141 | /* Calculates the free energy lower bound for the model parameter distributions.
142 |  *
143 |  *  returns: the free energy of the model
144 |  */
145 | template <class W, class C> double fenergy (
146 |     const vector<W>& weights,   // Weight parameter distributions
147 |     const vector<C>& clusters,  // Cluster parameter distributions
148 |     const double Fxz            // Free energy from data log-likelihood
149 |     )
150 | {
151 |   const int K = clusters.size(),
152 |             J = weights.size();
153 | 
154 |   // Free energy of the weight parameter distributions
155 |   double Fw = 0;
156 |   for (int j = 0; j < J; ++j)
157 |     Fw += weights[j].fenergy();
158 | 
159 |   // Free energy of the cluster parameter distributionsreturn
160 |   double Fc = 0;
161 |   for (int k = 0; k < K; ++k)
162 |     Fc += clusters[k].fenergy();
163 | 
164 |   return Fc + Fw + Fxz;
165 | }
166 | 
167 | 
168 | /* Variational Bayes EM for all group mixtures.
169 |  *
170 |  *  returns: Free energy of the whole model.
171 |  *  mutable: variational posterior approximations to p(Z|X).
172 |  *  mutable: the group weight distributions
173 |  *  mutable: the cluster distributions
174 |  *  throws: invalid_argument rethrown from other functions.
175 |  *  throws: runtime_error if there is a negative free energy.
176 |  */
177 | template <class W, class C> double vbem (
178 |     const vMatrixXd& X,         // Observations
179 |     vMatrixXd& qZ,              // Observations to model mixture assignments
180 |     vector<W>& weights,         // Group weight distributions
181 |     vector<C>& clusters,        // Cluster Distributions
182 |     const double clusterprior,  // Prior value for cluster distributions
183 |     const int maxit = -1,       // Max VBEM iterations (-1 = no max, default)
184 |     const bool sparse = false,  // Do sparse updates to groups (default false)
185 |     const bool verbose = false  // Verbose output (default false)
186 |     )
187 | {
188 |   const int J = X.size(),
189 |             K = qZ[0].cols();
190 | 
191 |   // Construct (empty) parameters
192 |   weights.resize(J, W());
193 |   clusters.resize(K, C(clusterprior, X[0].cols()));
194 | 
195 |   double F = numeric_limits<double>::max(), Fold;
196 |   int i = 0;
197 | 
198 |   do
199 |   {
200 |     Fold = F;
201 | 
202 |     // Clear Suffient Statistics
203 |     for (int k = 0; k < K; ++k)
204 |       clusters[k].clearobs();
205 | 
206 |     // Update Suff Stats and VBM for weights
207 |     #pragma omp parallel for schedule(guided)
208 |     for (int j = 0; j < J; ++j)
209 |     {
210 |       ArrayXd Njk = updateSS<C>(X[j], qZ[j], clusters, sparse);
211 |       weights[j].update(Njk);
212 |     }
213 | 
214 |     // VBM for clusters
215 |     #pragma omp parallel for schedule(guided)
216 |     for (int k = 0; k < K; ++k)
217 |       clusters[k].update();
218 | 
219 |     // VBE
220 |     double Fz = 0;
221 |     #pragma omp parallel for schedule(guided) reduction(+ : Fz)
222 |     for (int j = 0; j < J; ++j)
223 |       Fz += vbexpectation<W,C>(X[j], weights[j], clusters, qZ[j], sparse);
224 | 
225 |     // Calculate free energy of model
226 |     F = fenergy<W,C>(weights, clusters, Fz);
227 | 
228 |     // Check bad free energy step
229 |     if ((F-Fold)/abs(Fold) > FENGYDEL)
230 |       throw runtime_error("Free energy increase!");
231 | 
232 |     if (verbose == true)              // Notify iteration
233 |       cout << '-' << flush;
234 |   }
235 |   while ( (abs((Fold-F)/Fold) > CONVERGE)
236 |           && ( (i++ < maxit) || (maxit < 0) ) );
237 | 
238 |   return F;
239 | }
240 | 
241 | 
242 | //
243 | //  Model Selection and Heuristics Private Functions
244 | //
245 | 
246 | 
247 | /*  Search in an exhaustive fashion for a mixture split that lowers model free
248 |  *    energy the most. If no splits are found which lower Free Energy, then
249 |  *    false is returned, and qZ is not modified.
250 |  *
251 |  *    returns: true if a split was found, false if no splits can be found
252 |  *    mutable: qZ is augmented with a new split if one is found, otherwise left
253 |  *    throws: invalid_argument rethrown from other functions
254 |  *    throws: runtime_error from its internal VBEM calls
255 |  */
256 | #ifdef EXHAUST_SPLIT
257 | template <class W, class C> bool split_ex (
258 |     const vMatrixXd& X,         // Observations
259 |     const vector<C>& clusters,  // Cluster Distributions
260 |     vMatrixXd& qZ,              // Probabilities qZ
261 |     const double F,             // Current model free energy
262 |     const int maxclusters,      // maximum number of clusters to search for
263 |     const bool sparse,          // Do sparse updates to groups
264 |     const bool verbose          // Verbose output
265 |     )
266 | {
267 |   const unsigned int J = X.size(),
268 |                      K = clusters.size();
269 | 
270 |   // Check if we have reached the max number of clusters
271 |   if ( ((signed) K >= maxclusters) && (maxclusters >= 0) )
272 |       return false;
273 | 
274 |   // Pre allocate big objects for loops (this makes a runtime difference)
275 |   double Fbest = numeric_limits<double>::infinity();
276 |   vector<ArrayXi> mapidx(J, ArrayXi());
277 |   vMatrixXd qZref(J,MatrixXd()), qZaug(J,MatrixXd()), Xk(J,MatrixXd()), qZbest;
278 | 
279 |   // Loop through each potential cluster in order and split it
280 |   for (unsigned int k = 0; k < K; ++k)
281 |   {
282 |     // Don't waste time with clusters that can't really be split min (2:2)
283 |     if (clusters[k].getN() < 4)
284 |       continue;
285 | 
286 |     // Now split observations and qZ.
287 |     int scount = 0, Mtot = 0;
288 | 
289 |     #pragma omp parallel for schedule(guided) reduction(+ : Mtot, scount)
290 |     for (unsigned int j = 0; j < J; ++j)
291 |     {
292 |       // Make COPY of the observations with only relevant data points, p > 0.5
293 |       mapidx[j] = partX(X[j], (qZ[j].col(k).array()>0.5), Xk[j]);  // Copy :-(
294 |       Mtot += Xk[j].rows();
295 | 
296 |       // Initial cluster split
297 |       ArrayXb splitk = clusters[k].splitobs(Xk[j]);
298 |       qZref[j].setZero(Xk[j].rows(), 2);
299 |       qZref[j].col(0) = (splitk == true).cast<double>();  // Init qZ for split
300 |       qZref[j].col(1) = (splitk == false).cast<double>();
301 | 
302 |       // keep a track of number of splits
303 |       scount += splitk.count();
304 |     }
305 | 
306 |     // Don't waste time with clusters that haven't been split sufficiently
307 |     if ( (scount < 2) || (scount > (Mtot-2)) )
308 |       continue;
309 | 
310 |     // Refine the split
311 |     vector<W> wspl;
312 |     vector<C> cspl;
313 |     vbem<W,C>(Xk, qZref, wspl, cspl, clusters[0].getprior(), SPLITITER, sparse);
314 | 
315 |     if (anyempty<C>(cspl) == true) // One cluster only
316 |       continue;
317 | 
318 |     // Map the refined splits back to original whole-data problem
319 |     #pragma omp parallel for schedule(guided)
320 |     for (unsigned int j = 0; j < J; ++j)
321 |       qZaug[j] = augmentqZ(k, mapidx[j], (qZref[j].col(1).array()>0.5), qZ[j]);
322 | 
323 |     // Calculate free energy of this split with ALL data (and refine a bit)
324 |     double Fsplit = vbem<W,C>(X, qZaug, wspl, cspl,  clusters[0].getprior(), 1,
325 |                               sparse);
326 | 
327 |     if (anyempty<C>(cspl) == true) // One cluster only
328 |       continue;
329 | 
330 |     // Only notify here of split candidates
331 |     if (verbose == true)
332 |       cout << '=' << flush;
333 | 
334 |     // Record best splits so far
335 |     if (Fsplit < Fbest)
336 |     {
337 |       qZbest = qZaug;
338 |       Fbest  = Fsplit;
339 |     }
340 |   }
341 | 
342 |   // See if this split actually improves the model
343 |   if ( (Fbest < F) && (abs((F-Fbest)/F) > CONVERGE) )
344 |   {
345 |     qZ = qZbest;
346 |     return true;
347 |   }
348 |   else
349 |     return false;
350 | }
351 | #endif
352 | 
353 | 
354 | /*  Search in a greedy fashion for a mixture split that lowers model free
355 |  *    energy, or return false. An attempt is made at looking for good, untried,
356 |  *    split candidates first, as soon as a split canditate is found that lowers
357 |  *    model F, it is returned. This may not be the "best" split, but it is
358 |  *    certainly faster than an exhaustive search for the "best" split.
359 |  *
360 |  *    returns: true if a split was found, false if no splits can be found
361 |  *    mutable: qZ is augmented with a new split if one is found, otherwise left
362 |  *    mutable tally is a tally time a cluster has been unsuccessfully split
363 |  *    throws: invalid_argument rethrown from other functions
364 |  *    throws: runtime_error from its internal VBEM calls
365 |  */
366 | #ifndef EXHAUST_SPLIT
367 | template <class W, class C> bool split_gr (
368 |     const vMatrixXd& X,         // Observations
369 |     const vector<W>& weights,   // Group weight distributions
370 |     const vector<C>& clusters,  // Cluster Distributions
371 |     vMatrixXd& qZ,              // Probabilities qZ
372 |     vector<int>& tally,         // Count of unsuccessful splits
373 |     const double F,             // Current model free energy
374 |     const int maxclusters,      // maximum number of clusters to search for
375 |     const bool sparse,          // Do sparse updates to groups
376 |     const bool verbose          // Verbose output
377 |     )
378 | {
379 |   const unsigned int J = X.size(),
380 |                      K = clusters.size();
381 | 
382 |   // Check if we have reached the max number of clusters
383 |   if ( ((signed) K >= maxclusters) && (maxclusters >= 0) )
384 |       return false;
385 | 
386 |   // Split order chooser and cluster parameters
387 |   tally.resize(K, 0); // Make sure tally is the right size
388 |   vector<GreedOrder> ord(K);
389 | 
390 |   // Get cluster parameters and their free energy
391 |   #pragma omp parallel for schedule(guided)
392 |   for (unsigned int k = 0; k < K; ++k)
393 |   {
394 |     ord[k].k     = k;
395 |     ord[k].tally = tally[k];
396 |     ord[k].Fk    = clusters[k].fenergy();
397 |   }
398 | 
399 |   // Get cluster likelihoods
400 |   #pragma omp parallel for schedule(guided)
401 |   for (unsigned int j = 0; j < J; ++j)
402 |   {
403 |     // Get cluster weights
404 |     ArrayXd logpi = weights[j].Elogweight();
405 | 
406 |     // Add in cluster log-likelihood, weighted by responsability
407 |     for (unsigned int k = 0; k < K; ++k)
408 |     {
409 |       double LL = qZ[j].col(k).dot((logpi(k)
410 |                                 + clusters[k].Eloglike(X[j]).array()).matrix());
411 | 
412 |       #pragma omp atomic
413 |       ord[k].Fk -= LL;
414 |     }
415 |   }
416 | 
417 |   // Sort clusters by split tally, then free energy contributions
418 |   sort(ord.begin(), ord.end(), greedcomp);
419 | 
420 |   // Pre allocate big objects for loops (this makes a runtime difference)
421 |   vector<ArrayXi> mapidx(J, ArrayXi());
422 |   vMatrixXd qZref(J, MatrixXd()), qZaug(J,MatrixXd()), Xk(J,MatrixXd());
423 | 
424 |   // Loop through each potential cluster in order and split it
425 |   for (vector<GreedOrder>::iterator i = ord.begin(); i < ord.end(); ++i)
426 |   {
427 |     const int k = i->k;
428 | 
429 |     ++tally[k]; // increase this cluster's unsuccessful split tally by default
430 | 
431 |     // Don't waste time with clusters that can't really be split min (2:2)
432 |     if (clusters[k].getN() < 4)
433 |       continue;
434 | 
435 |     // Now split observations and qZ.
436 |     int scount = 0, Mtot = 0;
437 | 
438 |     #pragma omp parallel for schedule(guided) reduction(+ : Mtot, scount)
439 |     for (unsigned int j = 0; j < J; ++j)
440 |     {
441 |       // Make COPY of the observations with only relevant data points, p > 0.5
442 |       mapidx[j] = partobs(X[j], (qZ[j].col(k).array()>0.5), Xk[j]);  // Copy :-(
443 |       Mtot += Xk[j].rows();
444 | 
445 |       // Initial cluster split
446 |       ArrayXb splitk = clusters[k].splitobs(Xk[j]);
447 |       qZref[j].setZero(Xk[j].rows(), 2);
448 |       qZref[j].col(0) = (splitk == true).cast<double>();  // Init qZ for split
449 |       qZref[j].col(1) = (splitk == false).cast<double>();
450 | 
451 |       // keep a track of number of splits
452 |       scount += splitk.count();
453 |     }
454 | 
455 |     // Don't waste time with clusters that haven't been split sufficiently
456 |     if ( (scount < 2) || (scount > (Mtot-2)) )
457 |       continue;
458 | 
459 |     // Refine the split
460 |     vector<W> wspl;
461 |     vector<C> cspl;
462 |     vbem<W,C>(Xk, qZref, wspl, cspl, clusters[0].getprior(), SPLITITER, sparse);
463 | 
464 |     if (anyempty<C>(cspl) == true) // One cluster only
465 |       continue;
466 | 
467 |     // Map the refined splits back to original whole-data problem
468 |     #pragma omp parallel for schedule(guided)
469 |     for (unsigned int j = 0; j < J; ++j)
470 |       qZaug[j] = auglabels(k, mapidx[j], (qZref[j].col(1).array()>0.5), qZ[j]);
471 | 
472 |     // Calculate free energy of this split with ALL data (and refine a bit)
473 |     double Fsplit = vbem<W,C>(X, qZaug, wspl, cspl,  clusters[0].getprior(), 1,
474 |                               sparse);
475 | 
476 |     if (anyempty<C>(cspl) == true) // One cluster only
477 |       continue;
478 | 
479 |     // Only notify here of split candidates
480 |     if (verbose == true)
481 |       cout << '=' << flush;
482 | 
483 |     // Test whether this cluster split is a keeper
484 |     if ( (Fsplit < F) && (abs((F-Fsplit)/F) > CONVERGE) )
485 |     {
486 |       qZ = qZaug;
487 |       tally[k] = 0;   // Reset tally if successfully split
488 |       return true;
489 |     }
490 |   }
491 | 
492 |   // Failed to find splits
493 |   return false;
494 | }
495 | #endif
496 | 
497 | 
498 | /*  Find and remove all empty clusters.
499 |  *
500 |  *    returns: true if any clusters have been deleted, false if all are kept.
501 |  *    mutable: qZ may have columns deleted if there are empty clusters found.
502 |  *    mutable: weights if there are empty clusters found.
503 |  *    mutable: clusters if there are empty clusters found.
504 |  */
505 | template <class W, class C> bool prune_clusters (
506 |     vMatrixXd& qZ,        // Probabilities qZ
507 |     vector<W>& weights,   // weights distributions
508 |     vector<C>& clusters,  // cluster distributions
509 |     bool verbose = false  // print status
510 |     )
511 | {
512 |   const unsigned int K = clusters.size(),
513 |                      J = qZ.size();
514 | 
515 |   // Look for empty clusters
516 |   ArrayXd Nk(K);
517 |   for (unsigned int k= 0; k < K; ++k)
518 |     Nk(k) = clusters[k].getN();
519 | 
520 |   // Find location of empty and full clusters
521 |   ArrayXi eidx, fidx;
522 |   arrfind(Nk.array() < ZEROCUTOFF, eidx, fidx);
523 |   const unsigned int nempty = eidx.size();
524 | 
525 |   // If everything is not empty, return false
526 |   if (nempty == 0)
527 |     return false;
528 | 
529 |   if (verbose == true)
530 |     cout << '*' << flush;
531 | 
532 |   // Delete empty cluster suff. stats.
533 |   for (int i = (nempty - 1); i >= 0; --i)
534 |     clusters.erase(clusters.begin() + eidx(i));
535 | 
536 |   // Delete empty cluster indicators by copying only full indicators
537 |   const unsigned int newK = fidx.size();
538 |   vMatrixXd newqZ(J);
539 | 
540 |   for (unsigned int j = 0; j < J; ++j)
541 |   {
542 |     newqZ[j].setZero(qZ[j].rows(), newK);
543 |     for (unsigned int k = 0; k < newK; ++k)
544 |       newqZ[j].col(k) = qZ[j].col(fidx(k));
545 | 
546 |     weights[j].update(newqZ[j].colwise().sum()); // new weights
547 |   }
548 | 
549 |   qZ = newqZ;
550 | 
551 |   return true;
552 | }
553 | 
554 | 
555 | /* The model selection algorithm for a grouped mixture model.
556 |  *
557 |  *  returns: Free energy of the final model
558 |  *  mutable: qZ the probabilistic observation to cluster assignments
559 |  *  mutable: the group weight distributions
560 |  *  mutable: the cluster distributions
561 |  *  throws: invalid_argument from other functions.
562 |  *  throws: runtime_error if free energy increases.
563 |  */
564 | template <class W, class C> double cluster (
565 |     const vMatrixXd& X,           // Observations
566 |     vMatrixXd& qZ,                // Observations to model mixture assignments
567 |     vector<W>& weights,           // Group weight distributions
568 |     vector<C>& clusters,          // Cluster Distributions
569 |     const double clusterprior,    // Prior value for cluster distributions
570 |     const int maxclusters,        // Maximum number of clusters to search for
571 |     const bool sparse,            // Do sparse updates to groups
572 |     const bool verbose,           // Verbose output
573 |     const unsigned int nthreads   // Number of threads for OpenMP to use
574 |     )
575 | {
576 |   if (nthreads < 1)
577 |     throw invalid_argument("Must specify at least one thread for execution!");
578 |   omp_set_num_threads(nthreads);
579 | 
580 |   const unsigned int J = X.size();
581 | 
582 |   // Initialise indicator variables to just one cluster
583 |   qZ.resize(J);
584 |   for (unsigned int j = 0; j < J; ++j)
585 |     qZ[j].setOnes(X[j].rows(), 1);
586 | 
587 |   // Initialise free energy and other loop variables
588 |   bool issplit = true;
589 |   double F;
590 | 
591 |   #ifndef EXHAUST_SPLIT
592 |   vector<int> tally;
593 |   #endif
594 | 
595 |   // Main loop
596 |   while (issplit == true)
597 |   {
598 |     // VBEM for all groups (throws runtime_error & invalid_argument)
599 |     F = vbem<W,C>(X, qZ, weights, clusters, clusterprior, -1, sparse, verbose);
600 | 
601 |     // Remove any empty clusters
602 |     prune_clusters<W,C>(qZ, weights, clusters, verbose);
603 | 
604 |     // Start cluster splitting
605 |     if (verbose == true)
606 |       cout << '<' << flush;  // Notify start splitting
607 | 
608 |     // Search for best split, augment qZ if found one
609 |     #ifdef EXHAUST_SPLIT
610 |     issplit = split_ex<W,C>(X, clusters, qZ, F, maxclusters, sparse, verbose);
611 |     #else
612 |     issplit = split_gr<W,C>(X, weights, clusters, qZ, tally, F, maxclusters,
613 |                             sparse, verbose);
614 |     #endif
615 | 
616 |     if (verbose == true)
617 |       cout << '>' << endl;   // Notify end splitting
618 |   }
619 | 
620 |   // Print finished notification if verbose
621 |   if (verbose == true)
622 |   {
623 |     cout << "Finished!" << endl;
624 |     cout << "Number of clusters = " << clusters.size() << endl;
625 |     cout << "Free energy = " << F << endl;
626 |   }
627 | 
628 |   return F;
629 | }
630 | 
631 | 
632 | //
633 | // Public Functions
634 | //
635 | 
636 | double libcluster::learnVDP (
637 |     const MatrixXd& X,
638 |     MatrixXd& qZ,
639 |     StickBreak& weights,
640 |     vector<GaussWish>& clusters,
641 |     const double clusterprior,
642 |     const int maxclusters,
643 |     const bool verbose,
644 |     const unsigned int nthreads
645 |     )
646 | {
647 |   if (verbose == true)
648 |     cout << "Learning VDP..." << endl; // Print start
649 | 
650 |   // Make temporary vectors of data to use with cluster()
651 |   vMatrixXd vecX(1, X);                 // copies :-(
652 |   vMatrixXd vecqZ;
653 |   vector<StickBreak> vecweights(1, weights);
654 | 
655 |   // Perform model learning and selection
656 |   double F = cluster<StickBreak, GaussWish>(vecX, vecqZ, vecweights, clusters,
657 |                                         clusterprior, maxclusters, false,
658 |                                         verbose, nthreads);
659 | 
660 |   // Return final Free energy and qZ
661 |   qZ = vecqZ[0];                        // copies :-(
662 |   weights = vecweights[0];
663 |   return F;
664 | }
665 | 
666 | 
667 | double libcluster::learnBGMM (
668 |     const MatrixXd& X,
669 |     MatrixXd& qZ,
670 |     Dirichlet& weights,
671 |     vector<GaussWish>& clusters,
672 |     const double clusterprior,
673 |     const int maxclusters,
674 |     const bool verbose,
675 |     const unsigned int nthreads
676 |     )
677 | {
678 |   if (verbose == true)
679 |     cout << "Learning Bayesian GMM..." << endl; // Print start
680 | 
681 |   // Make temporary vectors of data to use with cluster()
682 |   vMatrixXd vecX(1, X);                   // copies :-(
683 |   vMatrixXd vecqZ;
684 |   vector<Dirichlet> vecweights(1, weights);
685 | 
686 |   // Perform model learning and selection
687 |   double F = cluster<Dirichlet, GaussWish>(vecX, vecqZ, vecweights, clusters,
688 |                                         clusterprior, maxclusters, false,
689 |                                         verbose, nthreads);
690 | 
691 |   // Return final Free energy and qZ
692 |   qZ = vecqZ[0];                          // copies :-(
693 |   weights = vecweights[0];
694 |   return F;
695 | }
696 | 
697 | 
698 | double libcluster::learnDGMM (
699 |     const MatrixXd& X,
700 |     MatrixXd& qZ,
701 |     Dirichlet& weights,
702 |     vector<NormGamma>& clusters,
703 |     const double clusterprior,
704 |     const int maxclusters,
705 |     const bool verbose,
706 |     const unsigned int nthreads
707 |     )
708 | {
709 |   if (verbose == true)
710 |     cout << "Learning Bayesian diagonal GMM..." << endl; // Print start
711 | 
712 |   // Make temporary vectors of data to use with cluster()
713 |   vMatrixXd vecX(1, X);                   // copies :-(
714 |   vMatrixXd vecqZ;
715 |   vector<Dirichlet> vecweights(1, weights);
716 | 
717 |   // Perform model learning and selection
718 |   double F = cluster<Dirichlet, NormGamma>(vecX, vecqZ, vecweights, clusters,
719 |                                         clusterprior, maxclusters, false,
720 |                                         verbose, nthreads);
721 | 
722 |   // Return final Free energy and qZ
723 |   qZ = vecqZ[0];                          // copies :-(
724 |   weights = vecweights[0];
725 |   return F;
726 | }
727 | 
728 | 
729 | double libcluster::learnBEMM (
730 |     const MatrixXd& X,
731 |     MatrixXd& qZ,
732 |     Dirichlet& weights,
733 |     vector<ExpGamma>& clusters,
734 |     const double clusterprior,
735 |     const int maxclusters,
736 |     const bool verbose,
737 |     const unsigned int nthreads
738 |     )
739 | {
740 |   if ((X.array() < 0).any() == true)
741 |     throw invalid_argument("X has to be in the range [0, inf)!");
742 | 
743 |   if (verbose == true)
744 |     cout << "Learning Bayesian EMM..." << endl; // Print start
745 | 
746 |   // Make temporary vectors of data to use with cluster()
747 |   vMatrixXd vecX(1, X);                   // copies :-(
748 |   vMatrixXd vecqZ;
749 |   vector<Dirichlet> vecweights(1, weights);
750 | 
751 |   // Perform model learning and selection
752 |   double F = cluster<Dirichlet, ExpGamma>(vecX, vecqZ, vecweights, clusters,
753 |                                         clusterprior, maxclusters, false,
754 |                                         verbose, nthreads);
755 | 
756 |   // Return final Free energy and qZ
757 |   qZ = vecqZ[0];                          // copies :-(
758 |   weights = vecweights[0];
759 |   return F;
760 | }
761 | 
762 | 
763 | double libcluster::learnGMC (
764 |     const vMatrixXd& X,
765 |     vMatrixXd& qZ,
766 |     vector<GDirichlet>& weights,
767 |     vector<GaussWish>& clusters,
768 |     const double clusterprior,
769 |     const int maxclusters,
770 |     const bool sparse,
771 |     const bool verbose,
772 |     const unsigned int nthreads
773 |     )
774 | {
775 |   string spnote = (sparse == true) ? "(sparse) " : "";
776 | 
777 |   // Model selection and Variational Bayes learning
778 |   if (verbose == true)
779 |     cout << "Learning " << spnote << "GMC..." << endl;
780 | 
781 |   return cluster<GDirichlet, GaussWish>(X, qZ, weights, clusters, clusterprior,
782 |                                         maxclusters, sparse, verbose,
783 |                                         nthreads);
784 | }
785 | 
786 | 
787 | double libcluster::learnSGMC (
788 |     const vMatrixXd& X,
789 |     vMatrixXd& qZ,
790 |     vector<Dirichlet>& weights,
791 |     vector<GaussWish>& clusters,
792 |     const double clusterprior,
793 |     const int maxclusters,
794 |     const bool sparse,
795 |     const bool verbose,
796 |     const unsigned int nthreads
797 |     )
798 | {
799 |   string spnote = (sparse == true) ? "(sparse) " : "";
800 | 
801 |   // Model selection and Variational Bayes learning
802 |   if (verbose == true)
803 |     cout << "Learning " << spnote << "Symmetric GMC..." << endl;
804 | 
805 |   return cluster<Dirichlet, GaussWish>(X, qZ, weights, clusters, clusterprior,
806 |                                        maxclusters, sparse, verbose, nthreads);
807 | }
808 | 
809 | 
810 | double libcluster::learnDGMC (
811 |     const vMatrixXd& X,
812 |     vMatrixXd& qZ,
813 |     vector<GDirichlet>& weights,
814 |     vector<NormGamma>& clusters,
815 |     const double clusterprior,
816 |     const int maxclusters,
817 |     const bool sparse,
818 |     const bool verbose,
819 |     const unsigned int nthreads
820 |     )
821 | {
822 |   string spnote = (sparse == true) ? "(sparse) " : "";
823 | 
824 |   // Model selection and Variational Bayes learning
825 |   if (verbose == true)
826 |     cout << "Learning " << spnote << "Diagonal GMC..." << endl;
827 | 
828 |   return cluster<GDirichlet, NormGamma>(X, qZ, weights, clusters, clusterprior,
829 |                                         maxclusters, sparse, verbose,
830 |                                         nthreads);
831 | }
832 | 
833 | 
834 | double libcluster::learnEGMC (
835 |     const vMatrixXd& X,
836 |     vMatrixXd& qZ,
837 |     vector<GDirichlet>& weights,
838 |     vector<ExpGamma>& clusters,
839 |     const double clusterprior,
840 |     const int maxclusters,
841 |     const bool sparse,
842 |     const bool verbose,
843 |     const unsigned int nthreads
844 |     )
845 | {
846 |   string spnote = (sparse == true) ? "(sparse) " : "";
847 | 
848 |   // Check for negative inputs
849 |   for (unsigned int j = 0; j < X.size(); ++j)
850 |     if ((X[j].array() < 0).any() == true)
851 |       throw invalid_argument("X has to be in the range [0, inf)!");
852 | 
853 |   // Model selection and Variational Bayes learning
854 |   if (verbose == true)
855 |     cout << "Learning " << spnote << "Exponential GMC..." << endl;
856 | 
857 |   return cluster<GDirichlet, ExpGamma>(X, qZ, weights, clusters, clusterprior,
858 |                                        maxclusters, sparse, verbose, nthreads);
859 | }
860 | 


--------------------------------------------------------------------------------
/src/comutils.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
  3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
  4 |  *
  5 |  * This file is part of libcluster.
  6 |  *
  7 |  * libcluster is free software: you can redistribute it and/or modify it under
  8 |  * the terms of the GNU Lesser General Public License as published by the Free
  9 |  * Software Foundation, either version 3 of the License, or (at your option)
 10 |  * any later version.
 11 |  *
 12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 15 |  * for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU Lesser General Public License
 18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | #include "comutils.h"
 22 | 
 23 | 
 24 | //
 25 | // Namespaces
 26 | //
 27 | 
 28 | using namespace std;
 29 | using namespace Eigen;
 30 | using namespace libcluster;
 31 | using namespace probutils;
 32 | using namespace distributions;
 33 | 
 34 | 
 35 | //
 36 | // Public Functions
 37 | //
 38 | 
 39 | void comutils::arrfind (
 40 |     const ArrayXb& expression,
 41 |     ArrayXi& indtrue,
 42 |     ArrayXi& indfalse
 43 |     )
 44 | {
 45 |   const int N = expression.size(),
 46 |             M = expression.count();
 47 | 
 48 |   indtrue.setZero(M);
 49 |   indfalse.setZero(N-M);
 50 | 
 51 |   for (int n = 0, m = 0, l = 0; n < N; ++n)
 52 |     expression(n) ? indtrue(m++) = n : indfalse(l++) = n;
 53 | }
 54 | 
 55 | 
 56 | ArrayXi comutils::partobs (
 57 |     const MatrixXd& X,
 58 |     const ArrayXb& Xpart,
 59 |     MatrixXd& Xk
 60 |     )
 61 | {
 62 |   const int M = Xpart.count();
 63 | 
 64 |   ArrayXi pidx, npidx;
 65 |   comutils::arrfind(Xpart, pidx, npidx);
 66 | 
 67 |   Xk.setZero(M, X.cols());
 68 |   for (int m=0; m < M; ++m)           // index copy X to Xk
 69 |     Xk.row(m) = X.row(pidx(m));
 70 | 
 71 |   return pidx;
 72 | }
 73 | 
 74 | 
 75 | MatrixXd  comutils::auglabels (
 76 |     const double k,
 77 |     const ArrayXi& map,
 78 |     const ArrayXb& Zsplit,
 79 |     const MatrixXd& qZ
 80 |     )
 81 | {
 82 |   const int K = qZ.cols(),
 83 |             S = Zsplit.count();
 84 | 
 85 |   if (Zsplit.size() != map.size())
 86 |     throw invalid_argument("map and split must be the same size!");
 87 | 
 88 |   // Create new qZ for all data with split
 89 |   MatrixXd qZaug = qZ;    // Copy the existing qZ into the new
 90 |   qZaug.conservativeResize(Eigen::NoChange, K+1);
 91 |   qZaug.col(K).setZero();
 92 | 
 93 |   ArrayXi sidx, nsidx;
 94 |   comutils::arrfind(Zsplit, sidx, nsidx);
 95 | 
 96 |   // Copy split cluster assignments (augment qZ effectively)
 97 |   for (int s = 0; s < S; ++s)
 98 |   {
 99 |     qZaug(map(sidx(s)), K) = qZ(map(sidx(s)), k); // Add new cluster onto end
100 |     qZaug(map(sidx(s)), k) = 0;
101 |   }
102 | 
103 |   return qZaug;
104 | }
105 | 


--------------------------------------------------------------------------------
/src/comutils.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
  3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
  4 |  *
  5 |  * This file is part of libcluster.
  6 |  *
  7 |  * libcluster is free software: you can redistribute it and/or modify it under
  8 |  * the terms of the GNU Lesser General Public License as published by the Free
  9 |  * Software Foundation, either version 3 of the License, or (at your option)
 10 |  * any later version.
 11 |  *
 12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 15 |  * for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU Lesser General Public License
 18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | #ifndef COMUTILS_H
 22 | #define COMUTILS_H
 23 | 
 24 | #include <Eigen/Dense>
 25 | #include <vector>
 26 | #include <stdexcept>
 27 | #include "libcluster.h"
 28 | #include "probutils.h"
 29 | #include "distributions.h"
 30 | 
 31 | 
 32 | /*! Namespace that implements various common utilities used in the algorithms */
 33 | namespace comutils
 34 | {
 35 | 
 36 | 
 37 | //
 38 | // Helper structures
 39 | //
 40 | 
 41 | /* Triplet that contains the information for choosing a good cluster split
 42 |  *  ordering.
 43 |  */
 44 | struct GreedOrder
 45 | {
 46 |   int k;      // Cluster number/index
 47 |   int tally;  // Number of times a cluster has failed to split
 48 |   double Fk;  // The clusters approximate free energy contribution
 49 | };
 50 | 
 51 | 
 52 | //
 53 | // Helper functions
 54 | //
 55 | 
 56 | /* Compares two GreedOrder triplets and returns which is more optimal to split.
 57 |  *  Precendence is given to less split fail tally, and then to more free energy
 58 |  *  contribution.
 59 |  */
 60 | bool inline greedcomp (const GreedOrder& i, const GreedOrder& j)
 61 | {
 62 |   if (i.tally == j.tally)       // If the tally is the same, use the greater Fk
 63 |     return i.Fk > j.Fk;
 64 |   else if (i.tally < j.tally)   // Otherwise prefer the lower tally
 65 |     return true;
 66 |   else
 67 |     return false;
 68 | }
 69 | 
 70 | 
 71 | /* Find the indices of the ones and zeros in a binary array in the order they
 72 |  *  appear.
 73 |  *
 74 |  *  mutable: indtrue the indices of the true values in the array "expression"
 75 |  *  mutable: indfalse the indices of the false values in the array "expression"
 76 |  */
 77 | void arrfind (
 78 |     const distributions::ArrayXb& expression,
 79 |     Eigen::ArrayXi& indtrue,
 80 |     Eigen::ArrayXi& indfalse
 81 |     );
 82 | 
 83 | 
 84 | /* Partition the observations, X according to a logical array.
 85 |  *
 86 |  *  mutable: Xk, MxD matrix of observations that have a correspoding 1 in Xpart.
 87 |  *  returns: an Mx1 array of the locations of Xk in X.
 88 |  */
 89 | Eigen::ArrayXi partobs (
 90 |     const Eigen::MatrixXd& X,            // NxD matrix of observations.
 91 |     const distributions::ArrayXb& Xpart, // Nx1 indicator vector to partition X.
 92 |     Eigen::MatrixXd& Xk          // MxD matrix of obs. beloning to new partition
 93 |     );
 94 | 
 95 | 
 96 | /* Augment the assignment matrix, qZ with the split cluster entry.
 97 |  *
 98 |  * The new cluster assignments are put in the K+1 th column in the return matrix
 99 |  *  returns: The new observation assignments, [Nx(K+1)].
100 |  *  throws: std::invalid_argument if map.size() != Zsplit.size().
101 |  */
102 | Eigen::MatrixXd  auglabels (
103 |     const double k,               // Cluster to split (i.e. which column of qZ)
104 |     const Eigen::ArrayXi& map,    // Mapping from array of partitioned obs to qZ
105 |     const distributions::ArrayXb& Zsplit, // Boolean array of assignments.
106 |     const Eigen::MatrixXd& qZ     // [NxK] observation assignment prob. matrix.
107 |     );
108 | 
109 | 
110 | /* Check if any sufficient statistics are empty.
111 |  *
112 |  *  returns: True if any of the sufficient statistics are empty
113 |  */
114 | template <class C> bool anyempty (const std::vector<C>& clusters)
115 | {
116 |   const unsigned int K = clusters.size();
117 | 
118 |   for (unsigned int k = 0; k < K; ++k)
119 |     if (clusters[k].getN() <= 1)
120 |       return true;
121 | 
122 |   return false;
123 | }
124 | 
125 | }
126 | 
127 | #endif // COMUTILS_H
128 | 


--------------------------------------------------------------------------------
/src/distributions.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
  3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
  4 |  *
  5 |  * This file is part of libcluster.
  6 |  *
  7 |  * libcluster is free software: you can redistribute it and/or modify it under
  8 |  * the terms of the GNU Lesser General Public License as published by the Free
  9 |  * Software Foundation, either version 3 of the License, or (at your option)
 10 |  * any later version.
 11 |  *
 12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 15 |  * for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU Lesser General Public License
 18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | #include <boost/math/special_functions.hpp>
 22 | #include "distributions.h"
 23 | #include "probutils.h"
 24 | 
 25 | //
 26 | // Namespaces
 27 | //
 28 | 
 29 | using namespace std;
 30 | using namespace Eigen;
 31 | using namespace probutils;
 32 | using namespace boost::math;
 33 | 
 34 | 
 35 | //
 36 | //  File scope variables
 37 | //
 38 | 
 39 | // Define pi
 40 | const double pi = constants::pi<double>(); // Boost high precision pi
 41 | 
 42 | 
 43 | //
 44 | // Private Helper Functions
 45 | //
 46 | 
 47 | /* Compare an <int,double> double pair by the double member. Useful
 48 |  *  for sorting an array in descending order while retaining a notion of
 49 |  *  the original order of the array.
 50 |  *
 51 |  *  returns: true if i.second > j.second.
 52 |  */
 53 | bool inline obscomp (
 54 |     const std::pair<int,double>& i, // the first pair to compare.
 55 |     const std::pair<int,double>& j  // the second pair to compare.
 56 |     )
 57 | {
 58 |   return i.second > j.second;
 59 | }
 60 | 
 61 | 
 62 | /* Enumerate the dimensions.
 63 |  *
 64 |  *  returns: 1:D or if D = 1, return 1.
 65 |  */
 66 | ArrayXd enumdims (const int D)
 67 | {
 68 |   ArrayXd l;
 69 | 
 70 |   if (D > 1)
 71 |     l = ArrayXd::LinSpaced(D, 1, D);
 72 |   else
 73 |     l.setOnes(1);
 74 | 
 75 |   return l;
 76 | }
 77 | 
 78 | 
 79 | //
 80 | // Stick-Breaking (Dirichlet Process) weight distribution.
 81 | //
 82 | 
 83 | distributions::StickBreak::StickBreak ()
 84 |   : WeightDist(),
 85 |     alpha1_p(distributions::ALPHA1PRIOR),
 86 |     alpha2_p(distributions::ALPHA2PRIOR),
 87 |     alpha1(ArrayXd::Constant(1, distributions::ALPHA1PRIOR)),
 88 |     alpha2(ArrayXd::Constant(1, distributions::ALPHA2PRIOR)),
 89 |     E_logv(ArrayXd::Zero(1)),
 90 |     E_lognv(ArrayXd::Zero(1)),
 91 |     E_logpi(ArrayXd::Zero(1)),
 92 |     ordvec(1, pair<int,double>(0,0))
 93 | {
 94 |   this->priorfcalc();
 95 | }
 96 | 
 97 | 
 98 | distributions::StickBreak::StickBreak (const double concentration)
 99 |   : WeightDist(),
100 |     alpha2_p(distributions::ALPHA2PRIOR),
101 |     alpha2(ArrayXd::Constant(1, distributions::ALPHA2PRIOR)),
102 |     E_logv(ArrayXd::Zero(1)),
103 |     E_lognv(ArrayXd::Zero(1)),
104 |     E_logpi(ArrayXd::Zero(1)),
105 |     ordvec(1, pair<int,double>(0,0))
106 | {
107 |   if (concentration <=0)
108 |     throw invalid_argument("Concentration parameter has to be > 0!");
109 | 
110 |   this->alpha1_p = concentration;
111 |   this->alpha1 = ArrayXd::Constant(1, concentration);
112 |   this->priorfcalc();
113 | }
114 | 
115 | 
116 | void distributions::StickBreak::priorfcalc (void)
117 | {
118 |   // Prior free energy contribution
119 |   this->F_p = lgamma(this->alpha1_p) + lgamma(this->alpha2_p)
120 |               - lgamma(this->alpha1_p + this->alpha2_p);
121 | }
122 | 
123 | 
124 | void distributions::StickBreak::update (const ArrayXd& Nk)
125 | {
126 |   const int K = Nk.size();
127 | 
128 |   // Destructively resize members to be the same size as Nk, no-op if same
129 |   this->alpha1.resize(K);
130 |   this->alpha2.resize(K);
131 |   this->E_logv.resize(K);
132 |   this->E_lognv.resize(K);
133 |   this->E_logpi.resize(K);
134 |   this->ordvec.resize(K, pair<int,double>(-1, -1));
135 | 
136 |   // Order independent update
137 |   this->Nk     = Nk;
138 |   this->alpha1 = this->alpha1_p + Nk;
139 | 
140 |   // Get at sort size order of clusters
141 |   for (int k = 0; k < K; ++k)
142 |   {
143 |     this->ordvec[k].first  = k;
144 |     this->ordvec[k].second = Nk(k);
145 |   }
146 |   sort(this->ordvec.begin(), this->ordvec.end(), obscomp);
147 | 
148 |   // Now do order dependent updates
149 |   const double N = Nk.sum();
150 |   double cumNk = 0, cumE_lognv = 0;
151 |   for (int idx = 0, k; idx < K; ++idx)
152 |   {
153 |     k = this->ordvec[idx].first;
154 | 
155 |     // Alpha 2
156 |     cumNk += Nk(k);    // Accumulate cluster size sum
157 |     this->alpha2(k) = this->alpha2_p + (N - cumNk);
158 | 
159 |     // Expected stick lengths
160 |     double psisum    = digamma(this->alpha1(k) + this->alpha2(k));
161 |     this->E_logv(k)  = digamma(this->alpha1(k)) - psisum;
162 |     this->E_lognv(k) = digamma(this->alpha2(k)) - psisum;
163 | 
164 |     // Expected weights
165 |     this->E_logpi(k) = this->E_logv(k) + cumE_lognv;
166 |     cumE_lognv += E_lognv(k);         // Accumulate log stick length left
167 |   }
168 | }
169 | 
170 | 
171 | double distributions::StickBreak::fenergy () const
172 | {
173 |   const int K = this->alpha1.size();
174 | 
175 |   return K * this->F_p + (mxlgamma(this->alpha1 + this->alpha2).array()
176 |           - mxlgamma(this->alpha1).array() - mxlgamma(this->alpha2).array()
177 |           + (this->alpha1 - this->alpha1_p) * this->E_logv
178 |           + (this->alpha2 - this->alpha2_p) * this->E_lognv).sum();
179 | }
180 | 
181 | 
182 | //
183 | // Generalised Dirichlet weight distribution.
184 | //
185 | 
186 | void distributions::GDirichlet::update (const ArrayXd& Nk)
187 | {
188 |   // Call base class (stick breaking) update
189 |   this->StickBreak::update(Nk);
190 |   const int smallk = (this->ordvec.end() - 1)->first; // Get smallest cluster
191 | 
192 |   // Set last stick lengths to 1 ( log(0) = 1 ) and adjust log marginal
193 |   this->E_logpi(smallk) = this->E_logpi(smallk) - this->E_logv(smallk);
194 |   this->E_logv(smallk)  = 0; // exp(E[log v_K]) = 1
195 |   this->E_lognv(smallk) = 0; // Undefined, but set to zero
196 | }
197 | 
198 | 
199 | double distributions::GDirichlet::fenergy () const
200 | {
201 |   const int K = this->ordvec.size();
202 | 
203 |   // GDir only has K-1 parameters, so we don't calculate the last F contrib.
204 |   double Fpi = 0;
205 |   for (int idx = 0, k = 0; idx < K-1; ++idx)
206 |   {
207 |     k = this->ordvec[idx].first;
208 |     Fpi += lgamma(this->alpha1(k) + this->alpha2(k))
209 |            - lgamma(this->alpha1(k)) - lgamma(this->alpha2(k))
210 |            + (this->alpha1(k) - this->alpha1_p) * this->E_logv(k)
211 |            + (this->alpha2(k) - this->alpha2_p) * this->E_lognv(k);
212 |   }
213 | 
214 |   return (K-1) * this->F_p + Fpi;
215 | }
216 | 
217 | 
218 | //
219 | // Dirichlet weight distribution.
220 | //
221 | 
222 | distributions::Dirichlet::Dirichlet ()
223 |   : WeightDist(),
224 |     alpha_p(distributions::ALPHA1PRIOR),
225 |     alpha(ArrayXd::Constant(1, distributions::ALPHA1PRIOR)),
226 |     E_logpi(ArrayXd::Zero(1))
227 | {}
228 | 
229 | 
230 | distributions::Dirichlet::Dirichlet (const double alpha)
231 |   : WeightDist(),
232 |     E_logpi(ArrayXd::Zero(1))
233 | {
234 |   if (alpha <= 0)
235 |     throw invalid_argument("Alpha prior must be > 0!");
236 | 
237 |   alpha_p = alpha;
238 |   this->alpha = ArrayXd::Constant(1, alpha);
239 | }
240 | 
241 | 
242 | void distributions::Dirichlet::update (const ArrayXd& Nk)
243 | {
244 |   const int K = Nk.size();
245 | 
246 |   // Destructively resize members to be the same size as Nk, no-op if same
247 |   this->alpha.resize(K);
248 |   this->E_logpi.resize(K);
249 | 
250 |   // Hyperparameter update
251 |   this->Nk    = Nk;
252 |   this->alpha = this->alpha_p + Nk;
253 | 
254 |   // Expectation update
255 |   this->E_logpi = mxdigamma(this->alpha).array() - digamma(this->alpha.sum());
256 | }
257 | 
258 | 
259 | double distributions::Dirichlet::fenergy () const
260 | {
261 |   const int K = this->alpha.size();
262 | 
263 |   return lgamma(this->alpha.sum()) - (this->alpha_p-1) * this->E_logpi.sum()
264 |       + ((this->alpha-1) * this->E_logpi - mxlgamma(this->alpha).array()).sum()
265 |       - lgamma(K * this->alpha_p) + K * lgamma(this->alpha_p);
266 | }
267 | 
268 | 
269 | //
270 | // Gaussian Wishart cluster distribution.
271 | //
272 | 
273 | distributions::GaussWish::GaussWish (
274 |     const double clustwidth,
275 |     const unsigned int D
276 |     )
277 |   : ClusterDist(clustwidth, D),
278 |     nu_p(D),
279 |     beta_p(distributions::BETAPRIOR),
280 |     m_p(RowVectorXd::Zero(D))
281 | {
282 |   if (clustwidth <= 0)
283 |     throw invalid_argument("clustwidth must be > 0!");
284 | 
285 |   // Create Prior
286 |   this->iW_p = this->nu_p * this->prior * MatrixXd::Identity(D, D);
287 | 
288 |   try
289 |     { this->logdW_p = -logdet(this->iW_p); }
290 |   catch (invalid_argument e)
291 |     { throw invalid_argument(string("Creating prior: ").append(e.what())); }
292 | 
293 |   // Calculate prior free energy contribution
294 |   this->F_p = mxlgamma((this->nu_p + 1
295 |               - enumdims(this->m_p.cols())).matrix() / 2).sum();
296 | 
297 |   this->clearobs(); // Empty suff. stats. and set posteriors equal to priors
298 | }
299 | 
300 | 
301 | void distributions::GaussWish::addobs(const VectorXd& qZk, const MatrixXd& X)
302 | {
303 |   if (X.cols() != this->D)
304 |     throw invalid_argument("Mismatched dims. of cluster params and obs.!");
305 |   if (qZk.rows() != X.rows())
306 |     throw invalid_argument("qZk and X ar not the same length!");
307 | 
308 |   MatrixXd qZkX = qZk.asDiagonal() * X;
309 | 
310 |   this->N_s += qZk.sum();
311 |   this->x_s += qZkX.colwise().sum();             // [1xD] row vector
312 |   this->xx_s.noalias() += qZkX.transpose() * X;  // [DxD] matrix
313 | }
314 | 
315 | 
316 | void distributions::GaussWish::update ()
317 | {
318 |   // Prepare the Sufficient statistics
319 |   RowVectorXd xk = RowVectorXd::Zero(this->D);
320 |   if (this->N_s > 0)
321 |     xk = this->x_s/this->N_s;
322 |   MatrixXd Sk = this->xx_s - xk.transpose() * this->x_s;
323 |   RowVectorXd xk_m = xk - this->m_p;               // for iW, (xk - m)
324 | 
325 |   // Update posterior params
326 |   this->N    = this->N_s;
327 |   this->nu   = this->nu_p + this->N;
328 |   this->beta = this->beta_p + this->N;
329 |   this->m    = (this->beta_p * this->m_p + this->x_s) / this->beta;
330 |   this->iW   = this->iW_p + Sk
331 |                 + (this->beta_p * this->N/this->beta) * xk_m.transpose() * xk_m;
332 | 
333 |   try
334 |     { this->logdW = -logdet(this->iW); }
335 |   catch (invalid_argument e)
336 |     { throw runtime_error(string("Calc log(det(W)): ").append(e.what())); }
337 | }
338 | 
339 | 
340 | void distributions::GaussWish::clearobs ()
341 | {
342 |   // Reset parameters back to prior values
343 |   this->nu    = this->nu_p;
344 |   this->beta  = this->beta_p;
345 |   this->m     = this->m_p;
346 |   this->iW    = this->iW_p;
347 |   this->logdW = this->logdW_p;
348 | 
349 |   // Empty sufficient statistics
350 |   this->N_s  = 0;
351 |   this->x_s  = RowVectorXd::Zero(D);
352 |   this->xx_s = MatrixXd::Zero(D,D);
353 | }
354 | 
355 | 
356 | VectorXd distributions::GaussWish::Eloglike (const MatrixXd& X) const
357 | {
358 |   // Expectations of log Gaussian likelihood
359 |   VectorXd E_logX(X.rows());
360 |   double sumpsi = mxdigamma((this->nu+1-enumdims(this->D)).matrix()/2).sum();
361 |   try
362 |   {
363 |     E_logX = 0.5 * (sumpsi + this->logdW - this->D * (1/this->beta + log(pi))
364 |                  - this->nu * mahaldist(X, this->m, this->iW).array()).matrix();
365 |   }
366 |   catch (invalid_argument e)
367 |     { throw(string("Calculating Gaussian likelihood: ").append(e.what())); }
368 | 
369 |   return E_logX;
370 | }
371 | 
372 | 
373 | distributions::ArrayXb distributions::GaussWish::splitobs (
374 |     const MatrixXd& X
375 |     ) const
376 | {
377 | 
378 |   // Find the principle eigenvector using the power method if not done so
379 |   VectorXd eigvec;
380 |   eigpower(this->iW, eigvec);
381 | 
382 |   // 'split' the observations perpendicular to this eigenvector.
383 |   return (((X.rowwise() - this->m)
384 |              * eigvec.asDiagonal()).array().rowwise().sum()) >= 0;
385 | }
386 | 
387 | 
388 | double distributions::GaussWish::fenergy () const
389 | {
390 |   const ArrayXd l = enumdims(this->D);
391 |   double sumpsi = mxdigamma((this->nu + 1 - l).matrix() / 2).sum();
392 | 
393 |   return this->F_p + (this->D * (this->beta_p/this->beta - 1 - this->nu
394 |           - log(this->beta_p/this->beta))
395 |           + this->nu * ((this->iW.ldlt().solve(this->iW_p)).trace()
396 |           + this->beta_p * mahaldist(this->m, this->m_p, this->iW).coeff(0,0))
397 |           + this->nu_p * (this->logdW_p - this->logdW) + this->N*sumpsi)/2
398 |           - mxlgamma((this->nu+1-l).matrix() / 2).sum();
399 | }
400 | 
401 | 
402 | //
403 | // Normal Gamma parameter distribution.
404 | //
405 | 
406 | distributions::NormGamma::NormGamma (
407 |     const double clustwidth,
408 |     const unsigned int D
409 |     )
410 |   : ClusterDist(clustwidth, D),
411 |     nu_p(distributions::NUPRIOR),
412 |     beta_p(distributions::BETAPRIOR),
413 |     m_p(RowVectorXd::Zero(D))
414 | {
415 |   if (clustwidth <= 0)
416 |     throw invalid_argument("clustwidth must be > 0!");
417 | 
418 |   // Create Prior
419 |   this->L_p = this->nu_p * this->prior * RowVectorXd::Ones(D);
420 |   this->logL_p = this->L_p.array().log().sum();
421 | 
422 |   this->clearobs(); // Empty suff. stats. and set posteriors equal to priors
423 | }
424 | 
425 | 
426 | void distributions::NormGamma::addobs (const VectorXd& qZk, const MatrixXd& X)
427 | {
428 |   if (X.cols() != this->D)
429 |     throw invalid_argument("Mismatched dims. of cluster params and obs.!");
430 |   if (qZk.rows() != X.rows())
431 |     throw invalid_argument("qZk and X ar not the same length!");
432 | 
433 |   MatrixXd qZkX = qZk.asDiagonal() * X;
434 | 
435 |   this->N_s  += qZk.sum();
436 |   this->x_s  += qZkX.colwise().sum();                                 // [1xD]
437 |   this->xx_s += (qZkX.array() * X.array()).colwise().sum().matrix();  // [1xD]
438 | }
439 | 
440 | 
441 | void distributions::NormGamma::update ()
442 | {
443 |   // Prepare the Sufficient statistics
444 |   RowVectorXd xk = RowVectorXd::Zero(this->D);
445 |   RowVectorXd Sk = RowVectorXd::Zero(this->D);
446 |   if (this->N_s > 0)
447 |   {
448 |     xk = this->x_s/this->N_s;
449 |     Sk = this->xx_s.array() - this->x_s.array().square()/this->N_s;
450 |   }
451 | 
452 |   // Update posterior params
453 |   this->N    = this->N_s;
454 |   this->beta = this->beta_p + this->N;
455 |   this->nu   = this->nu_p + this->N/2;
456 |   this->m    = (this->beta_p * this->m_p + x_s) / this->beta;
457 |   this->L    = this->L_p + Sk/2 + (this->beta_p * this->N / (2 * this->beta))
458 |                 * (xk - this->m_p).array().square().matrix();
459 | 
460 |   if ((this->L.array() <= 0).any())
461 |     throw invalid_argument(string("Calc log(L): Variance is zero or less!"));
462 | 
463 |   this->logL = this->L.array().log().sum();
464 | }
465 | 
466 | 
467 | void distributions::NormGamma::clearobs ()
468 | {
469 |   // Reset parameters back to prior values
470 |   this->nu   = this->nu_p;
471 |   this->beta = this->beta_p;
472 |   this->m    = this->m_p;
473 |   this->L    = this->L_p;
474 |   this->logL = this->logL_p;
475 | 
476 |   // Empty sufficient statistics
477 |   this->N_s  = 0;
478 |   this->x_s  = RowVectorXd::Zero(this->D);
479 |   this->xx_s = RowVectorXd::Zero(this->D);
480 | }
481 | 
482 | 
483 | VectorXd distributions::NormGamma::Eloglike (const MatrixXd& X) const
484 | {
485 |   // Distance evaluation in the exponent
486 |   VectorXd Xmdist = (X.rowwise() - this->m).array().square().matrix()
487 |                       * this->L.array().inverse().matrix().transpose();
488 | 
489 |   // Expectations of log Gaussian likelihood
490 |   return 0.5 * (this->D * (digamma(this->nu) - log(2 * pi) - 1/this->beta)
491 |               - this->logL - this->nu * Xmdist.array());
492 | }
493 | 
494 | 
495 | distributions::ArrayXb distributions::NormGamma::splitobs (
496 |     const MatrixXd& X
497 |     ) const
498 | {
499 |   // Find location of largest element in L, this is the 'eigenvector'
500 |   int eigvec;
501 |   this->L.maxCoeff(&eigvec);
502 | 
503 |   // 'split' the observations perpendicular to this 'eigenvector'.
504 |   return (X.col(eigvec).array() - this->m(eigvec)) >= 0;
505 | }
506 | 
507 | 
508 | double distributions::NormGamma::fenergy () const
509 | {
510 |   const VectorXd iL = this->L.array().inverse().matrix().transpose();
511 | 
512 |   return D*(lgamma(this->nu_p) - lgamma(this->nu)
513 |     + this->N*digamma(this->nu)/2 - this->nu)
514 |     + D/2 * (log(this->beta) - log(this->beta_p) - 1 + this->beta_p/this->beta)
515 |     + this->beta_p*this->nu/2*(this->m - this->m_p).array().square().matrix()*iL
516 |     + this->nu_p*(this->logL - this->logL_p) + this->nu*this->L_p*iL;
517 | }
518 | 
519 | 
520 | //
521 | // Exponential Gamma parameter distribution.
522 | //
523 | 
524 | distributions::ExpGamma::ExpGamma (const double obsmag, const unsigned int D)
525 |   : ClusterDist(obsmag, D),
526 |     a_p(distributions::APRIOR),
527 |     b_p(obsmag)
528 | {
529 |     this->clearobs(); // Empty suff. stats. and set posteriors equal to priors
530 | }
531 | 
532 | 
533 | void distributions::ExpGamma::addobs (const VectorXd& qZk, const MatrixXd& X)
534 | {
535 |   if (X.cols() != this->D)
536 |     throw invalid_argument("Mismatched dims. of cluster params and obs.!");
537 |   if (qZk.rows() != X.rows())
538 |     throw invalid_argument("qZk and X ar not the same length!");
539 | 
540 |   this->N_s += qZk.sum();
541 |   this->x_s += (qZk.asDiagonal() * X).colwise().sum();
542 | }
543 | 
544 | 
545 | void distributions::ExpGamma::update ()
546 | {
547 |   // Update posterior params
548 |   this->N    = this->N_s;
549 |   this->a    = this->a_p + this->N;
550 |   this->ib   = (this->b_p + this->x_s.array()).array().inverse().matrix();
551 |   this->logb = - this->ib.array().log().sum();
552 | }
553 | 
554 | 
555 | void distributions::ExpGamma::clearobs ()
556 | {
557 |   // Reset parameters back to prior values
558 |   this->a    = this->a_p;
559 |   this->ib   = RowVectorXd::Constant(this->D, 1/this->b_p);
560 |   this->logb = this->D * log(this->b_p);
561 | 
562 |   // Empty sufficient statistics
563 |   this->N_s = 0;
564 |   this->x_s = RowVectorXd::Zero(this->D);
565 | }
566 | 
567 | 
568 | VectorXd distributions::ExpGamma::Eloglike (const MatrixXd& X) const
569 | {
570 |   return this->D * digamma(this->a) - this->logb
571 |           - (this->a * X * this->ib.transpose()).array();
572 | }
573 | 
574 | 
575 | distributions::ArrayXb distributions::ExpGamma::splitobs (
576 |     const MatrixXd& X
577 |     ) const
578 | {
579 |   ArrayXd XdotL = X * (this->a * this->ib).transpose();
580 |   return (XdotL > (XdotL.sum()/XdotL.size()));
581 | }
582 | 
583 | 
584 | double distributions::ExpGamma::fenergy () const
585 | {
586 |  return this->D * ((this->a - this->a_p) * digamma(this->a) - this->a
587 |      - this->a_p * log(this->b_p) - lgamma(this->a) + lgamma(this->a_p))
588 |      + this->b_p * this->a * this->ib.sum() + this->a_p * this->logb;
589 | }
590 | 


--------------------------------------------------------------------------------
/src/mcluster.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
  3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
  4 |  *
  5 |  * This file is part of libcluster.
  6 |  *
  7 |  * libcluster is free software: you can redistribute it and/or modify it under
  8 |  * the terms of the GNU Lesser General Public License as published by the Free
  9 |  * Software Foundation, either version 3 of the License, or (at your option)
 10 |  * any later version.
 11 |  *
 12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 15 |  * for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU Lesser General Public License
 18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | #include <limits>
 22 | #include "libcluster.h"
 23 | #include "probutils.h"
 24 | #include "comutils.h"
 25 | 
 26 | 
 27 | //
 28 | // Namespaces
 29 | //
 30 | 
 31 | using namespace std;
 32 | using namespace Eigen;
 33 | using namespace probutils;
 34 | using namespace distributions;
 35 | using namespace comutils;
 36 | using namespace libcluster;
 37 | 
 38 | 
 39 | //
 40 | //  Variational Bayes Private Functions
 41 | //
 42 | 
 43 | /* The Variational Bayes Expectation step for weights in each group.
 44 |  *
 45 |  *  mutable: Top-level cluster assignment probabilities, qYj
 46 |  *  returns: The complete-data free energy, Y and Y+Z dep. terms, for group j.
 47 |  *  throws: invalid_argument rethrown from other functions.
 48 |  */
 49 | template <class WJ, class WT, class CT> double vbeY (
 50 |     const MatrixXd& Wj,           // Top-level observations for group j
 51 |     const vMatrixXd& qZj,         // Bottom-level cluster labels for group j
 52 |     const WJ& weightsj,           // Group top-level cluster weights
 53 |     const vector<WT>& weights_t,  // Bottom-level cluster proportion/paramters
 54 |     const vector<CT>& clusters_t, // Top-level cluster parameters
 55 |     MatrixXd& qYj                 // Top-level cluster assignments for group j
 56 |     )
 57 | {
 58 |   const unsigned int T  = weights_t.size(),
 59 |                      Ij = qZj.size(),
 60 |                      K  = qZj[0].cols();
 61 | 
 62 |   // No observations (may happen when splitting)
 63 |   if (Ij == 0)
 64 |     return 0;
 65 | 
 66 |   // Get log marginal weight likelihoods
 67 |   const ArrayXd E_logwj = weightsj.Elogweight();
 68 | 
 69 |   MatrixXd Njik(Ij, K), logqYj(Ij, T);
 70 |   ArrayXXd qZjiLike(Ij, T);
 71 | 
 72 |   // Get bottom-level cluster counts per "document/image"
 73 |   for (unsigned int i = 0; i < Ij; ++i)
 74 |     Njik.row(i) = qZj[i].colwise().sum();
 75 | 
 76 |   // Find Expectations of log joint observation probs
 77 |   for (unsigned int t = 0; t < T; ++t)
 78 |   {
 79 |     qZjiLike.col(t) = Njik * weights_t[t].Elogweight().matrix();
 80 |     logqYj.col(t)   = qZjiLike.col(t) + E_logwj(t)
 81 |                       + clusters_t[t].Eloglike(Wj).array();
 82 |   }
 83 | 
 84 |   // Log normalisation constant of log observation likelihoods
 85 |   VectorXd logZyj = logsumexp(logqYj);
 86 | 
 87 |   // Normalise and Compute Responsibilities
 88 |   qYj = (logqYj.colwise() - logZyj).array().exp().matrix();
 89 | 
 90 |   return ((qYj.array() * qZjiLike).rowwise().sum() - logZyj.array()).sum();
 91 | }
 92 | 
 93 | 
 94 | /* The Variational Bayes Expectation step for clusters in each "document", ji.
 95 |  *
 96 |  *  mutable: Bottom-level cluster assignment probabilities, qZji
 97 |  *  returns: The complete-data free energy, Z dep. terms, for group j.
 98 |  *  throws: invalid_argument rethrown from other functions.
 99 |  */
100 | template <class WT, class CK> double vbeZ (
101 |     const MatrixXd& Xji,          // Observations in i in group j
102 |     const RowVectorXd& qYji,      // Top-level cluster assignment
103 |     const vector<WT>& weights_t,  // Top-level cluster parameters
104 |     const vector<CK>& clusters_k, // Bottom-level cluster parameters
105 |     MatrixXd& qZji                // Observation to cluster assignments
106 |     )
107 | {
108 |   const int K   = clusters_k.size(),
109 |             Nji = Xji.rows(),
110 |             T   = weights_t.size();
111 | 
112 |   // No observations (may happen when splitting)
113 |   if (Nji == 0)
114 |     return 0;
115 | 
116 |   // Make top-level cluster global weights from weighted label parameters
117 |   RowVectorXd E_logqYljt = RowVectorXd::Zero(K);
118 | 
119 |   for (int t = 0; t < T; ++t)
120 |     E_logqYljt.noalias() += qYji(t) * weights_t[t].Elogweight().matrix();
121 | 
122 |   // Find Expectations of log joint observation probs
123 |   MatrixXd logqZji = MatrixXd::Zero(Nji, K);
124 | 
125 |   for (int k = 0; k < K; ++k)
126 |     logqZji.col(k) = E_logqYljt(k) + clusters_k[k].Eloglike(Xji).array();
127 | 
128 |   // Log normalisation constant of log observation likelihoods
129 |   const VectorXd logZzji = logsumexp(logqZji);
130 | 
131 |   // Normalise and Compute Responsibilities
132 |   qZji = (logqZji.colwise() - logZzji).array().exp().matrix();
133 | 
134 |   return -logZzji.sum();
135 | }
136 | 
137 | 
138 | /* Calculates the free energy lower bound for the model parameter distributions.
139 |  *
140 |  *  returns: the free energy of the model
141 |  */
142 | template <class WJ, class WT, class CT, class CK> double fenergy (
143 |     const vector<WJ>& weights_j,  // Group top-level cluster weights
144 |     const vector<WT>& weights_t,  // Top-level cluster proportion parameters
145 |     const vector<CT>& clusters_t, // Top-level cluster other parameters
146 |     const vector<CK>& clusters_k, // Bottom-level cluster parameters
147 |     const double Fyz,             // Free energy Y and cross Y-Z terms
148 |     const double Fz               // Free energy Z terms
149 |     )
150 | {
151 |   const int T = weights_t.size(),
152 |             K = clusters_k.size(),
153 |             J = weights_j.size();
154 | 
155 |   // Class parameter free energy
156 |   double Ft = 0;
157 |   for (int t = 0; t < T; ++t)
158 |     Ft += weights_t[t].fenergy() + clusters_t[t].fenergy();
159 | 
160 |   // Cluster parameter free energy
161 |   double Fk = 0;
162 |   for (int k = 0; k < K; ++k)
163 |     Fk += clusters_k[k].fenergy();
164 | 
165 |   // Weight parameter free energy
166 |   double Fw = 0;
167 |   for (int j = 0; j < J; ++j)
168 |     Fw += weights_j[j].fenergy();
169 | 
170 |   return Fw + Ft + Fk + Fyz + Fz;
171 | }
172 | 
173 | 
174 | /* Variational Bayes EM. 
175 |  *
176 |  *  returns: Free energy of the whole model.
177 |  *  mutable: the bottom-level cluster indicators, qZ
178 |  *  mutable: the top-level cluster indicators, qY
179 |  *  mutable: model parameters weights_j, weights_t, clusters_k, clusters_t
180 |  *  throws: invalid_argument rethrown from other functions.
181 |  *  throws: runtime_error if there is a negative free energy.
182 |  */
183 | template<class WJ, class WT, class CT, class CK> double vbem (
184 |     const vMatrixXd& W,           // Top-level observations
185 |     const vvMatrixXd& X,          // Bottom-level observations
186 |     vMatrixXd& qY,                // Top-level labels
187 |     vvMatrixXd& qZ,               // Bottom-level cluster labels
188 |     vector<WJ>& weights_j,        // Group top-level cluster weights
189 |     vector<WT>& weights_t,        // Top-level proportion cluster parameters
190 |     vector<CT>& clusters_t,       // Top-level other cluster parameters
191 |     vector<CK>& clusters_k,       // Bottom-level cluster parameters
192 |     const double prior_t,         // Top-level cluster prior
193 |     const double prior_k,         // Bottom-level cluster prior
194 |     const int maxit = -1,         // Max VBEM iterations (-1 = no max, default)
195 |     const bool verbose = false    // Verbose output
196 |     )
197 | {
198 |   const unsigned int J = X.size(),
199 |                      K = qZ[0][0].cols(),
200 |                      T = qY[0].cols();
201 | 
202 |   // Construct (empty) parameters
203 |   weights_j.resize(J, WJ());
204 |   weights_t.resize(T, WT());
205 |   clusters_t.resize(T, CT(prior_t, W[0].cols()));
206 |   clusters_k.resize(K, CK(prior_k, X[0][0].cols()));
207 | 
208 |   // Other loop variables for initialisation
209 |   int it = 0;
210 |   double F = numeric_limits<double>::max(), Fold;
211 | 
212 |   do
213 |   {
214 |     Fold = F;
215 | 
216 |     MatrixXd Ntk = MatrixXd::Zero(T, K); // Clear Sufficient Stats
217 | 
218 |     // VBM for top-level cluster weights
219 |     #pragma omp parallel for schedule(guided)
220 |     for (unsigned int j = 0; j < J; ++j)
221 |     {
222 |       // Accumulate suff. stats for bottom-level cluster counts
223 |       for (unsigned int i = 0; i < X[j].size(); ++i)
224 |       {
225 |         MatrixXd Ntkji = qY[j].row(i).transpose() * qZ[j][i].colwise().sum();
226 |         #pragma omp critical
227 |         Ntk += Ntkji;
228 |       }
229 | 
230 |       weights_j[j].update(qY[j].colwise().sum());
231 |     }
232 | 
233 |     // VBM for top-level cluster parameters and proportions
234 |     #pragma omp parallel for schedule(guided)
235 |     for (unsigned int t = 0; t < T; ++t)
236 |     {
237 |       clusters_t[t].clearobs();                  // Clear Sufficient Stats
238 | 
239 |       for (unsigned int j = 0; j < J; ++j)       // Accumulate sufficient stats
240 |         clusters_t[t].addobs(qY[j].col(t), W[j]);
241 | 
242 |       weights_t[t].update(Ntk.row(t));           // Bottom-level cluster counts.
243 |       clusters_t[t].update();
244 |     }
245 | 
246 |     // VBM for bottom-level cluster parameters
247 |     #pragma omp parallel for schedule(guided)
248 |     for (unsigned int k = 0; k < K; ++k)
249 |     {
250 |       clusters_k[k].clearobs();                  // Clear Sufficient Stats
251 | 
252 |       for (unsigned int j = 0; j < J; ++j)       // Accumulate sufficient stats
253 |         for (unsigned int i = 0; i < X[j].size(); ++i)
254 |           clusters_k[k].addobs(qZ[j][i].col(k), X[j][i]);
255 | 
256 |       clusters_k[k].update();                    // Bottom-level observations
257 |     }
258 | 
259 |     // Free energy data fit term accumulators
260 |     double Fz = 0, Fyz = 0;
261 | 
262 |     // VBE for top-level cluster indicators
263 |     #pragma omp parallel for schedule(guided) reduction(+ : Fyz)
264 |     for (unsigned int j = 0; j < J; ++j)
265 |       Fyz += vbeY<WJ,WT,CT>(W[j], qZ[j], weights_j[j], weights_t, clusters_t,
266 |                             qY[j]);
267 | 
268 |     // VBE for bottom-level cluster indicators
269 |     for (unsigned int j = 0; j < J; ++j)
270 |     {
271 |       #pragma omp parallel for schedule(guided) reduction(+ : Fz)
272 |       for (unsigned int i = 0; i < X[j].size(); ++i)
273 |         Fz += vbeZ<WT,CK>(X[j][i], qY[j].row(i), weights_t, clusters_k, 
274 |                           qZ[j][i]);
275 |     }
276 | 
277 |     // Calculate free energy of model
278 |     F = fenergy<WJ,WT,CT,CK>(weights_j, weights_t, clusters_t, clusters_k, Fyz, 
279 |                             Fz);
280 | 
281 |     // Check bad free energy step
282 |     if ((F-Fold)/abs(Fold) > libcluster::FENGYDEL)
283 |       throw runtime_error("Free energy increase!");
284 | 
285 |     if (verbose == true)              // Notify iteration
286 |       cout << '-' << flush;
287 |   }
288 |   while ( (abs((Fold-F)/Fold) > libcluster::CONVERGE)
289 |           && ( (++it < maxit) || (maxit < 0) ) );
290 | 
291 |   return F;
292 | }
293 | 
294 | 
295 | //
296 | //  Model Selection and Heuristics Private Functions
297 | //
298 | 
299 | /*  Search in a greedy fashion for a mixture split that lowers model free
300 |  *    energy, or return false. An attempt is made at looking for good, untried,
301 |  *    split candidates first, as soon as a split canditate is found that lowers
302 |  *    model F, it is returned. This may not be the "best" split, but it is
303 |  *    certainly faster than an exhaustive search for the "best" split.
304 |  *
305 |  *    returns: true if a split was found, false if no splits can be found
306 |  *    mutable: qZ is augmented with a new split if one is found, otherwise left
307 |  *    mutable: qY is updated if a new split if one is found, otherwise left
308 |  *    mutable tally is a tally of times a cluster has been unsuccessfully split
309 |  *    throws: invalid_argument rethrown from other functions
310 |  *    throws: runtime_error from its internal VBEM calls
311 |  */
312 | template <class WJ, class WT, class CT, class CK> bool ssplit (
313 |     const vMatrixXd& W,             // Top-level observations
314 |     const vvMatrixXd& X,            // Bottom-level observations
315 |     const vector<CT>& clusters_t,   // Top-level cluster Distributions
316 |     const vector<CK>& clusters_k,   // Bottom-level cluster Distributions
317 |     vMatrixXd& qY,                  // Top-level cluster labels qY
318 |     vvMatrixXd& qZ,                 // Bottom-level Cluster labels qZ
319 |     vector<int>& tally,             // Count of unsuccessful splits
320 |     const double F,                 // Current model free energy
321 |     const int maxK,                 // max number of (bottom) clusters
322 |     const bool verbose              // Verbose output
323 |     )
324 | {
325 |   const unsigned int J = X.size(),
326 |                      K = clusters_k.size();
327 | 
328 |   // Check if we have reached the max number of clusters
329 |   if ( ((signed) K >= maxK) && (maxK >= 0) )
330 |       return false;
331 | 
332 |   // Split order chooser and bottom-level cluster parameters
333 |   tally.resize(K, 0); // Make sure tally is the right size
334 |   vector<GreedOrder> ord(K);
335 | 
336 |   // Get cluster parameters and their free energy
337 |   for (unsigned int k = 0; k < K; ++k)
338 |   {
339 |     ord[k].k     = k;
340 |     ord[k].tally = tally[k];
341 |     ord[k].Fk    = clusters_k[k].fenergy();
342 |   }
343 | 
344 |   // Get bottom-level cluster likelihoods
345 |   for (unsigned int j = 0; j < J; ++j)
346 |   {
347 |     // Add in cluster log-likelihood, weighted by global responsability
348 |     #pragma omp parallel for schedule(guided)
349 |     for (unsigned int i = 0; i < X[j].size(); ++i)
350 |       for (unsigned int k = 0; k < K; ++k)
351 |       {
352 |         double LL = qZ[j][i].col(k).dot(clusters_k[k].Eloglike(X[j][i]));
353 | 
354 |         #pragma omp atomic
355 |         ord[k].Fk -= LL;
356 |       }
357 |   }
358 | 
359 |   // Sort clusters by split tally, then free energy contributions
360 |   sort(ord.begin(), ord.end(), greedcomp);
361 | 
362 |   // Pre allocate big objects for loops (this makes a runtime difference)
363 |   vector< vector<ArrayXi> > mapidx(J);
364 |   vvMatrixXd qZref(J), qZaug(J), Xk(J);
365 | 
366 |   // Loop through each potential cluster in order and split it
367 |   for (vector<GreedOrder>::iterator ko = ord.begin(); ko < ord.end(); ++ko)
368 |   {
369 |     const int k = ko->k;
370 | 
371 |     ++tally[k]; // increase this cluster's unsuccessful split tally by default
372 | 
373 |     // Don't waste time with clusters that can't really be split min (2:2)
374 |     if (clusters_k[k].getN() < 4)
375 |       continue;
376 | 
377 |     // Now split observations and qZ.
378 |     int scount = 0, Mtot = 0;
379 | 
380 |     for (unsigned int j = 0; j < J; ++j)
381 |     {
382 |       mapidx[j].resize(X[j].size());
383 |       qZref[j].resize(X[j].size());
384 |       qZaug[j].resize(X[j].size());
385 |       Xk[j].resize(X[j].size());
386 | 
387 |       #pragma omp parallel for schedule(guided) reduction(+ : Mtot, scount)
388 |       for (unsigned int i = 0; i < X[j].size(); ++i)
389 |       {
390 |         // Make COPY of the observations with only relevant data points, p > 0.5
391 |         mapidx[j][i] = partobs(X[j][i], (qZ[j][i].col(k).array()>0.5),
392 |                                Xk[j][i]);
393 |         Mtot += Xk[j][i].rows();
394 | 
395 |         // Initial cluster split
396 |         ArrayXb splitk = clusters_k[k].splitobs(Xk[j][i]);
397 |         qZref[j][i].setZero(Xk[j][i].rows(), 2);
398 |         qZref[j][i].col(0) = (splitk == true).cast<double>();
399 |         qZref[j][i].col(1) = (splitk == false).cast<double>();
400 | 
401 |         // keep a track of number of splits
402 |         scount += splitk.count();
403 |       }
404 |     }
405 | 
406 |     // Don't waste time with clusters that haven't been split sufficiently
407 |     if ( (scount < 2) || (scount > (Mtot-2)) )
408 |       continue;
409 | 
410 |     // Refine the split
411 |     vector<WJ> iwspl;
412 |     vector<CT> icspl;
413 |     vector<WT> swspl;
414 |     vector<CK> scspl;
415 |     vMatrixXd qYaug = qY;                             // Copy :-(
416 |     vbem<WJ,WT,CT,CK>(W, Xk, qYaug, qZref, iwspl, swspl, icspl, scspl,
417 |                 clusters_t[0].getprior(), clusters_k[0].getprior(), SPLITITER);
418 | 
419 |     if (anyempty<CK>(scspl) == true) // One cluster only
420 |       continue;
421 | 
422 |     // Map the refined splits back to original whole-data problem
423 |     for (unsigned int j = 0; j < J; ++j)
424 |     {
425 |       #pragma omp parallel for schedule(guided)
426 |       for (unsigned int i = 0; i < X[j].size(); ++i)
427 |         qZaug[j][i] = auglabels(k, mapidx[j][i],
428 |                                 (qZref[j][i].col(1).array() > 0.5), qZ[j][i]);
429 |     }
430 | 
431 |     // Calculate free energy of this split with ALL data (and refine a bit)
432 |     qYaug = qY;                             // Copy :-(
433 |     double Fs = vbem<WJ,WT,CT,CK>(W, X, qYaug, qZaug, iwspl, swspl, icspl,
434 |                   scspl, clusters_t[0].getprior(), clusters_k[0].getprior(), 1);
435 | 
436 |     if (anyempty<CK>(scspl) == true) // One cluster only
437 |       continue;
438 | 
439 |     // Only notify here of split candidates
440 |     if (verbose == true)
441 |       cout << '=' << flush;
442 | 
443 |     // Test whether this cluster split is a keeper
444 |     if ( (Fs < F) && (abs((F-Fs)/F) > CONVERGE) )
445 |     {
446 |       qY = qYaug;
447 |       qZ = qZaug;
448 |       tally[k] = 0;   // Reset tally if successfully split
449 |       return true;
450 |     }
451 |   }
452 | 
453 |   // Failed to find splits
454 |   return false;
455 | }
456 | 
457 | 
458 | /*  Find and remove all empty top-level clusters.
459 |  *
460 |  *    returns: true if any clusters have been deleted, false if all are kept.
461 |  *    mutable: qY may have columns deleted if there are empty clusters found.
462 |  *    mutable: weights_t if there are empty top-level clusters found.
463 |  *    mutable: clusters_t if there are empty top-level clusters found.
464 |  */
465 | template <class WT, class CT> bool prune_clusters_t (
466 |     vMatrixXd& qY,          // Probabilities qY
467 |     vector<WT>& weights_t,  // Top-level bottom-level cluster proportions
468 |     vector<CT>& clusters_t, // Top-level clusters
469 |     bool verbose = false    // print status
470 |     )
471 | {
472 |   const unsigned int T = weights_t.size(),
473 |                      J = qY.size();
474 | 
475 |   // Look for empty clusters
476 |   ArrayXd Nt(T);
477 |   for (unsigned int t = 0; t < T; ++t)
478 |     Nt(t) = weights_t[t].getNk().sum();
479 | 
480 |   // Find location of empty and full clusters
481 |   ArrayXi eidx, fidx;
482 |   arrfind(Nt.array() < 1, eidx, fidx);
483 |   const unsigned int nempty = eidx.size();
484 | 
485 |   // If everything is not empty, return false
486 |   if (nempty == 0)
487 |     return false;
488 | 
489 |   if (verbose == true)
490 |     cout << '*' << flush;
491 | 
492 |   // Delete empty clusters
493 |   for (int i = (nempty - 1); i >= 0; --i)
494 |   {
495 |     weights_t.erase(weights_t.begin() + eidx(i));
496 |     clusters_t.erase(clusters_t.begin() + eidx(i));
497 |   }
498 | 
499 |   // Delete empty cluster indicators by copying only full indicators
500 |   const unsigned int newT = fidx.size();
501 |   vMatrixXd newqY(J);
502 | 
503 |   for (unsigned int j = 0; j < J; ++j)
504 |   {
505 |     newqY[j].setZero(qY[j].rows(), newT);
506 |     for (unsigned int t = 0; t < newT; ++t)
507 |       newqY[j].col(t) = qY[j].col(fidx(t));
508 |   }
509 | 
510 |   qY = newqY;
511 | 
512 |   return true;
513 | }
514 | 
515 | 
516 | /* The model selection algorithm
517 |  *
518 |  *  returns: Free energy of the final model
519 |  *  mutable: qY the probabilistic top-level cluster assignments
520 |  *  mutable: qZ the probabilistic bottom-level cluster assignments
521 |  *  mutable: The top-level clusters and weights
522 |  *  mutable: The bottom-level clusters and bottom-level cluster weights
523 |  *  throws: invalid_argument from other functions
524 |  *  throws: runtime_error if free energy increases
525 |  */
526 | template<class WJ, class WT, class CT, class CK> double mcluster (
527 |     const vMatrixXd& W,           // Top-level observations
528 |     const vvMatrixXd& X,          // Bottom-level observations
529 |     vMatrixXd& qY,                // Top-level labels
530 |     vvMatrixXd& qZ,               // Bottom-level labels
531 |     vector<WJ>& weights_j,        // Group top-level cluster weights
532 |     vector<WT>& weights_t,        // Tope-level proportion cluster parameters 
533 |     vector<CT>& clusters_t,       // Top-level cluster parameters
534 |     vector<CK>& clusters_k,       // Bottom-level cluster parameters
535 |     const double prior_t,         // Top-level cluster prior
536 |     const double prior_k,         // Bottom-level cluster prior
537 |     const unsigned int maxT,      // Truncation level for top-level clusters
538 |     const int maxK,               // max number of (bottom) clusters
539 |     const bool verbose,           // Verbose output
540 |     const unsigned int nthreads   // Number of threads for OpenMP to use
541 |     )
542 | {
543 |   if (nthreads < 1)
544 |     throw invalid_argument("Must specify at least one thread for execution!");
545 |   omp_set_num_threads(nthreads);
546 | 
547 |   // Do some observation validity checks
548 |   if (W.size() != X.size()) // Same number of groups in observations
549 |     throw invalid_argument("W and X need to have the same number of groups!");
550 | 
551 |   const unsigned int J = W.size();
552 | 
553 |   for (unsigned int j = 0; j < J; ++j) // Same number of images/docs in groups
554 |     if ((unsigned) W[j].rows() != X[j].size())
555 |       throw invalid_argument("W and X need to have the same number of 'docs'!");
556 | 
557 |   // Initialise qY randomly and qZ to ones
558 |   qY.resize(J);
559 |   qZ.resize(J);
560 | 
561 |   for (unsigned int j = 0; j < J; ++j)
562 |   {
563 |     ArrayXXd randm = (ArrayXXd::Random(X[j].size(), maxT)).abs();
564 |     ArrayXd norm = randm.rowwise().sum();
565 |     qY[j] = (randm.log().colwise() - norm.log()).exp();
566 | 
567 |     qZ[j].resize(X[j].size());
568 | 
569 |     for (unsigned int i = 0; i < X[j].size(); ++i)
570 |       qZ[j][i].setOnes(X[j][i].rows(), 1);
571 |   }
572 | 
573 |   bool emptyclasses = true, split = true;
574 |   double F = 0;
575 |   vector<int> stally;
576 | 
577 |   // Main loop
578 |   while ((split == true) || (emptyclasses == true))
579 |   {
580 | 
581 |     F = vbem<WJ,WT,CT,CK>(W, X, qY, qZ, weights_j, weights_t, clusters_t,
582 |                           clusters_k, prior_t, prior_k, -1, verbose);
583 | 
584 |     if (verbose == true)
585 |       cout << '<' << flush; // Notify start bottom-level cluster search
586 | 
587 |     if (split == false)     // Remove any empty weights
588 |       emptyclasses = prune_clusters_t<WT, CT>(qY, weights_t, clusters_t, 
589 |                                               verbose);
590 |     else
591 |       split = ssplit<WJ,WT,CT,CK>(W, X, clusters_t, clusters_k, qY, qZ, stally,
592 |                                   F, maxK, verbose);
593 | 
594 |     if (verbose == true)
595 |       cout << '>' << endl;  // Notify end bottom-level cluster search
596 |   }
597 | 
598 |   // Print finished notification if verbose
599 |   if (verbose == true)
600 |   {
601 |     cout << "Finished!" << endl;
602 |     cout << "Number of top level clusters = " << clusters_t.size();
603 |     cout << ", and bottom level clusters = " << clusters_k.size() << endl;
604 |     cout << "Free energy = " << F << endl;
605 |   }
606 | 
607 |   return F;
608 | }
609 | 
610 | 
611 | //
612 | // Public Functions
613 | //
614 | 
615 | double libcluster::learnMCM (
616 |     const vMatrixXd& W,
617 |     const vvMatrixXd& X,
618 |     vMatrixXd& qY,
619 |     vvMatrixXd& qZ,
620 |     vector<GDirichlet>& weights_j,
621 |     vector<Dirichlet>& weights_t,
622 |     vector<GaussWish>& clusters_t,
623 |     vector<GaussWish>& clusters_k,
624 |     const double prior_t,
625 |     const double prior_k,
626 |     const unsigned int maxT,
627 |     const int maxK,
628 |     const bool verbose,
629 |     const unsigned int nthreads
630 |     )
631 | {
632 | 
633 |   if (verbose == true)
634 |     cout << "Learning MCM..." << endl;
635 | 
636 |   // Model selection and Variational Bayes learning
637 |   double F = mcluster<GDirichlet, Dirichlet, GaussWish, GaussWish>(W, X, qY, qZ,
638 |                 weights_j, weights_t, clusters_t, clusters_k, prior_t, prior_k,
639 |                 maxT, maxK, verbose, nthreads);
640 | 
641 |   return F;
642 | }
643 | 


--------------------------------------------------------------------------------
/src/probutils.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
  3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
  4 |  *
  5 |  * This file is part of libcluster.
  6 |  *
  7 |  * libcluster is free software: you can redistribute it and/or modify it under
  8 |  * the terms of the GNU Lesser General Public License as published by the Free
  9 |  * Software Foundation, either version 3 of the License, or (at your option)
 10 |  * any later version.
 11 |  *
 12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 15 |  * for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU Lesser General Public License
 18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | #include "probutils.h"
 22 | #include <boost/math/special_functions.hpp>
 23 | 
 24 | 
 25 | //
 26 | // Namespaces
 27 | //
 28 | 
 29 | 
 30 | using namespace std;
 31 | using namespace Eigen;
 32 | 
 33 | 
 34 | //
 35 | // Local Constants
 36 | //
 37 | 
 38 | 
 39 | const double EIGCONTHRESH = 1.0e-8f;
 40 | const int    MAXITER      = 100;
 41 | 
 42 | 
 43 | //
 44 | // Public Functions
 45 | //
 46 | 
 47 | 
 48 | RowVectorXd probutils::mean (const MatrixXd& X)
 49 | {
 50 |   return X.colwise().sum()/X.rows();
 51 | }
 52 | 
 53 | 
 54 | RowVectorXd probutils::mean (const vector<MatrixXd>& X)
 55 | {
 56 |   const int J = X.size(),
 57 |             D = X[0].cols();
 58 |   int N = 0;
 59 |   RowVectorXd mean = RowVectorXd::Zero(D);
 60 | 
 61 |   for (int j = 0; j < J; ++j)
 62 |   {
 63 |     if (X[j].cols() != D)
 64 |       throw invalid_argument("X dimensions are inconsistent between groups!");
 65 | 
 66 |     mean += X[j].colwise().sum();
 67 |     N    += X[j].rows();
 68 |   }
 69 |   return mean / N;
 70 | }
 71 | 
 72 | 
 73 | RowVectorXd probutils::stdev (const MatrixXd& X)
 74 | {
 75 |   RowVectorXd meanX = mean(X);
 76 |   return ((X.rowwise() - meanX).array().square().colwise().sum()
 77 |           / (X.rows()-1)).sqrt();
 78 | }
 79 | 
 80 | 
 81 | MatrixXd probutils::cov (const MatrixXd& X)
 82 | {
 83 |   if (X.rows() <= 1)
 84 |     throw invalid_argument("Insufficient no. of observations.");
 85 | 
 86 |   MatrixXd X_mu = X.rowwise() - probutils::mean(X); // X - mu
 87 |   return (X_mu.transpose()*X_mu)/(X.rows()-1);      // (X-mu)'*(X-mu)/(N-1)
 88 | }
 89 | 
 90 | 
 91 | MatrixXd probutils::cov (const vector<MatrixXd>& X)
 92 | {
 93 |   const int J = X.size(),
 94 |             D = X[0].cols();
 95 |   int N = 0;
 96 |   const RowVectorXd mean = probutils::mean(X);
 97 |   MatrixXd cov = MatrixXd::Zero(D, D),
 98 |            X_mu;
 99 | 
100 |   for (int j = 0; j < J; ++j)
101 |   {
102 |     if (X[j].rows() <= 1)
103 |       throw invalid_argument("Insufficient no. of observations.");
104 |     X_mu = X[j].rowwise() - mean;
105 |     N   += X[j].rows();
106 |     cov.noalias() += (X_mu.transpose() * X_mu); // (X_j-mu)'*(X_j-mu)
107 |   }
108 | 
109 |   return cov / (N-1);
110 | }
111 | 
112 | 
113 | VectorXd probutils::mahaldist (
114 |     const MatrixXd& X,
115 |     const RowVectorXd& mu,
116 |     const MatrixXd& A
117 |     )
118 | {
119 |   // Check for same number of dimensions, D
120 |   if((X.cols() != mu.cols()) || (X.cols() != A.cols()))
121 |     throw invalid_argument("Arguments do not have the same dimensionality");
122 | 
123 |   // Check if A is square
124 |   if (A.rows() != A.cols())
125 |     throw invalid_argument("Matrix A must be square!");
126 | 
127 |   // Decompose A
128 |   LDLT<MatrixXd> Aldl(A);
129 | 
130 |   // Check if A is PD
131 |   if ((Aldl.vectorD().array() <= 0).any() == true)
132 |     throw invalid_argument("Matrix A is not positive definite");
133 | 
134 |   // Do the Mahalanobis distance for each sample (N times)
135 |   MatrixXd X_mu = (X.rowwise() - mu).transpose();
136 |   return ((X_mu.array() * (Aldl.solve(X_mu)).array())
137 |           .colwise().sum()).transpose();
138 | }
139 | 
140 | 
141 | VectorXd probutils::logsumexp (const MatrixXd& X)
142 | {
143 |   const VectorXd mx = X.rowwise().maxCoeff(); // Get max of each row
144 | 
145 |   // Perform the sum(exp(x - mx)) part
146 |   ArrayXd se = ((X.colwise() - mx).array().exp()).rowwise().sum();
147 | 
148 |   // return total log(sum(exp(x))) - hoping for return value optimisation
149 |   return (se.log()).matrix() + mx;
150 | }
151 | 
152 | 
153 | double probutils::eigpower (const MatrixXd& A, VectorXd& eigvec)
154 | {
155 |   // Check if A is square
156 |   if (A.rows() != A.cols())
157 |     throw invalid_argument("Matrix A must be square!");
158 | 
159 |   // Check if A is a scalar
160 |   if (A.rows() == 1)
161 |   {
162 |     eigvec.setOnes(1);
163 |     return A(0,0);
164 |   }
165 | 
166 |   // Initialise working vectors
167 |   VectorXd v = VectorXd::LinSpaced(A.rows(), -1, 1);
168 |   VectorXd oeigvec(A.rows());
169 | 
170 |   // Initialise eigenvalue and eigenvectors etc
171 |   double eigval = v.norm();
172 |   double vdist = numeric_limits<double>::infinity();
173 |   eigvec = v/eigval;
174 | 
175 |   // Loop until eigenvector converges or we reach max iterations
176 |   for (int i=0; (vdist>EIGCONTHRESH) && (i<MAXITER); ++i)
177 |   {
178 |     oeigvec = eigvec;
179 |     v.noalias() = A * oeigvec;
180 |     eigval = v.norm();
181 |     eigvec = v/eigval;
182 |     vdist = (eigvec - oeigvec).norm();
183 |   }
184 | 
185 |   return eigval;
186 | }
187 | 
188 | 
189 | double probutils::logdet (const MatrixXd& A)
190 | {
191 |   // Check if A is square
192 |   if (A.rows() != A.cols())
193 |     throw invalid_argument("Matrix A must be square!");
194 | 
195 |   VectorXd d = A.ldlt().vectorD();  // Get the diagonal from a Cholesky decomp.
196 | 
197 |   // Check if A is PD
198 |   if ((d.array() <= 0).any() == true)
199 |     throw domain_error("Matrix A is not positive definite.");
200 | 
201 |   return (d.array().log()).sum();   // ln(det(A)) = sum(log(d))
202 | }
203 | 
204 | 
205 | MatrixXd probutils::mxdigamma (const MatrixXd& X)
206 | {
207 |   const int I = X.rows(),
208 |             J = X.cols();
209 |   MatrixXd result(I, J);
210 | 
211 |   for (int i = 0; i < I; ++i)
212 |     for (int j = 0; j < J; ++j)
213 |       result(i,j) = boost::math::digamma(X(i, j));
214 | 
215 |   return result;
216 | }
217 | 
218 | 
219 | MatrixXd probutils::mxlgamma (const MatrixXd& X)
220 | {
221 |   const int I = X.rows(),
222 |             J = X.cols();
223 |   MatrixXd result(I, J);
224 | 
225 |   for (int i = 0; i < I; ++i)
226 |     for (int j = 0; j < J; ++j)
227 |       result(i, j) = boost::math::lgamma(X(i, j));
228 | 
229 |   return result;
230 | }
231 | 


--------------------------------------------------------------------------------
/src/scluster.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
  3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
  4 |  *
  5 |  * This file is part of libcluster.
  6 |  *
  7 |  * libcluster is free software: you can redistribute it and/or modify it under
  8 |  * the terms of the GNU Lesser General Public License as published by the Free
  9 |  * Software Foundation, either version 3 of the License, or (at your option)
 10 |  * any later version.
 11 |  *
 12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 15 |  * for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU Lesser General Public License
 18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | #include <limits>
 22 | #include "libcluster.h"
 23 | #include "probutils.h"
 24 | #include "comutils.h"
 25 | 
 26 | 
 27 | //
 28 | // Namespaces
 29 | //
 30 | 
 31 | using namespace std;
 32 | using namespace Eigen;
 33 | using namespace probutils;
 34 | using namespace distributions;
 35 | using namespace comutils;
 36 | using namespace libcluster;
 37 | 
 38 | 
 39 | //
 40 | //  Variational Bayes Private Functions
 41 | //
 42 | 
 43 | /* The Variational Bayes Expectation step for weights in each group.
 44 |  *
 45 |  *  mutable: Top-level cluster assignment probabilities, qYj
 46 |  *  returns: The complete-data free energy, Y and Y+Z dep. terms, for group j.
 47 |  *  throws: invalid_argument rethrown from other functions.
 48 |  */
 49 | template <class WJ, class WT> double vbeY (
 50 |     const vMatrixXd& qZj,        // Cluster assignments for group j
 51 |     const WJ& weightsj,          // Group top-level cluster weights
 52 |     const vector<WT>& weights_t, // Top-level cluster parameters
 53 |     MatrixXd& qYj                // Top-level cluster assignments for group j
 54 |     )
 55 | {
 56 |   const unsigned int T  = weights_t.size(),
 57 |                      Ij = qZj.size(),
 58 |                      K  = qZj[0].cols();
 59 | 
 60 |   // Get log marginal weight likelihoods
 61 |   const ArrayXd E_logwj = weightsj.Elogweight();
 62 | 
 63 |   MatrixXd Njik(Ij, K), logqYj(Ij, T);
 64 |   ArrayXXd qZjiLike(Ij, T);
 65 | 
 66 |   // Get bottom-level cluster counts per top-level cluster
 67 |   for (unsigned int i = 0; i < Ij; ++i)
 68 |     Njik.row(i) = qZj[i].colwise().sum();
 69 | 
 70 |   // Find Expectations of log joint observation probs
 71 |   for (unsigned int t = 0; t < T; ++t)
 72 |   {
 73 |     qZjiLike.col(t) = Njik * weights_t[t].Elogweight().matrix();
 74 |     logqYj.col(t)   = E_logwj(t) + qZjiLike.col(t);
 75 |   }
 76 | 
 77 |   // Log normalisation constant of log observation likelihoods
 78 |   VectorXd logZyj = logsumexp(logqYj);
 79 | 
 80 |   // Normalise and Compute Responsibilities
 81 |   qYj = (logqYj.colwise() - logZyj).array().exp().matrix();
 82 | 
 83 |   return ((qYj.array() * qZjiLike).rowwise().sum() - logZyj.array()).sum();
 84 | }
 85 | 
 86 | 
 87 | /* The Variational Bayes Expectation step for clusters in each "document" 
 88 |  *
 89 |  *  mutable: Bottom-level cluster assignment probabilities, qZji
 90 |  *  returns: The complete-data  free energy, Z dep. terms, for group j.
 91 |  *  throws: invalid_argument rethrown from other functions.
 92 |  */
 93 | template <class WT, class C> double vbeZ (
 94 |     const MatrixXd& Xji,         // Observations in i in group j
 95 |     const RowVectorXd& qYji,     // Top-level cluster assignment of this doc
 96 |     const vector<WT>& weights_t, // Top-level cluster parameters
 97 |     const vector<C>& clusters,   // Bottom-level cluster parameters
 98 |     MatrixXd& qZji               // Observation to cluster assignments
 99 |     )
100 | {
101 |   const int K   = clusters.size(),
102 |             Nji = Xji.rows(),
103 |             T   = weights_t.size();
104 | 
105 |   // Make top-level cluster global weights from weighted label parameters
106 |   RowVectorXd E_logqYljt = RowVectorXd::Zero(K);
107 | 
108 |   for (int t = 0; t < T; ++t)
109 |     E_logqYljt.noalias() += qYji(t) * weights_t[t].Elogweight().matrix();
110 | 
111 |   // Find Expectations of log joint observation probs
112 |   MatrixXd logqZji = MatrixXd::Zero(Nji, K);
113 | 
114 |   for (int k = 0; k < K; ++k)
115 |     logqZji.col(k) = E_logqYljt(k) + clusters[k].Eloglike(Xji).array();
116 | 
117 |   // Log normalisation constant of log observation likelihoods
118 |   const VectorXd logZzji = logsumexp(logqZji);
119 | 
120 |   // Normalise and Compute Responsibilities
121 |   qZji = (logqZji.colwise() - logZzji).array().exp().matrix();
122 | 
123 |   return -logZzji.sum();
124 | }
125 | 
126 | 
127 | /* Calculates the free energy lower bound for the model parameter distributions.
128 |  *
129 |  *  returns: the free energy of the model
130 |  */
131 | template <class WJ, class WT, class C> double fenergy (
132 |     const vector<WJ>& weights_j, // Group top-level cluster weights
133 |     const vector<WT>& weights_t, // Top-level cluster parameters
134 |     const vector<C>& clusters,   // Bottom-level cluster parameters
135 |     const double Fyz,            // Free energy Y and Z+Y terms
136 |     const double Fz              // Free energy Z terms
137 |     )
138 | {
139 |   const int T = weights_t.size(),
140 |             K = clusters.size(),
141 |             J = weights_j.size();
142 | 
143 |   // Class parameter free energy
144 |   double Fc = 0;
145 |   for (int t = 0; t < T; ++t)
146 |     Fc += weights_t[t].fenergy();
147 | 
148 |   // Cluster parameter free energy
149 |   double Fk = 0;
150 |   for (int k = 0; k < K; ++k)
151 |     Fk += clusters[k].fenergy();
152 | 
153 |   // Weight parameter free energy
154 |   double Fw = 0;
155 |   for (int j = 0; j < J; ++j)
156 |     Fw += weights_j[j].fenergy();
157 | 
158 |   return Fw + Fc + Fk + Fyz + Fz;
159 | }
160 | 
161 | 
162 | /* Variational Bayes EM. 
163 |  *
164 |  *  returns: Free energy of the whole model.
165 |  *  mutable: the bottom-level cluster indicators, qZ
166 |  *  mutable: the top-level cluster indicators, qY
167 |  *  mutable: model parameters weights_j, weights_t, clusters
168 |  *  throws: invalid_argument rethrown from other functions.
169 |  *  throws: runtime_error if there is a negative free energy.
170 |  */
171 | template <class WJ, class WT, class C> double vbem (
172 |     const vvMatrixXd& X,        // Observations JxIjx[NjixD]
173 |     vvMatrixXd& qZ,             // Observations to cluster assigns JxIjx[NjixK]
174 |     vMatrixXd& qY,              // Indicator to label assignments Jx[IjxT]
175 |     vector<WJ>& weights_j,      // Group weight distributions
176 |     vector<WT>& weights_t,      // Top-level cluster distributions
177 |     vector<C>& clusters,        // Bottom-level cluster Distributions
178 |     const double prior_t,       // Prior value top-level cluster dists. 
179 |     const double prior_k,       // Prior value bottom-level cluster dists.
180 |     const int maxit = -1,       // Max VBEM iterations (-1 = no max, default)
181 |     const bool verbose = false  // Verbose output (default false)
182 |     )
183 | {
184 |   const unsigned int J = X.size(),
185 |                      K = qZ[0][0].cols(),
186 |                      T = qY[0].cols();
187 | 
188 |   // Construct (empty) parameters
189 |   weights_j.resize(J, WJ());
190 |   weights_t.resize(T, WT(prior_t));
191 |   clusters.resize(K, C(prior_k, X[0][0].cols()));
192 | 
193 |   // Other loop variables for initialisation
194 |   int it = 0;
195 |   double F = numeric_limits<double>::max(), Fold;
196 | 
197 |   do
198 |   {
199 |     Fold = F;
200 | 
201 |     MatrixXd Ntk = MatrixXd::Zero(T, K);  // Clear Sufficient Stats
202 | 
203 |     // VBM for top-level cluster weights
204 |     #pragma omp parallel for schedule(guided)
205 |     for (unsigned int j = 0; j < J; ++j)
206 |     {
207 |       for(unsigned int i = 0; i < X[j].size(); ++i)
208 |       {
209 |         MatrixXd Ntkji = qY[j].row(i).transpose() * qZ[j][i].colwise().sum();
210 |         #pragma omp critical
211 |         Ntk += Ntkji;
212 |       }
213 | 
214 |       weights_j[j].update(qY[j].colwise().sum());
215 |     }
216 | 
217 |     // VBM for top-level cluster parameters
218 |     #pragma omp parallel for schedule(guided)
219 |     for (unsigned int t = 0; t < T; ++t)
220 |       weights_t[t].update(Ntk.row(t));  // Weighted multinomials.
221 | 
222 |     // VBM for bottom-level cluster parameters
223 |     #pragma omp parallel for schedule(guided)
224 |     for (unsigned int k = 0; k < K; ++k)
225 |     {
226 |       clusters[k].clearobs();
227 | 
228 |       for (unsigned int j = 0; j < J; ++j)
229 |         for(unsigned int i = 0; i < X[j].size(); ++i)
230 |           clusters[k].addobs(qZ[j][i].col(k), X[j][i]);
231 | 
232 |       clusters[k].update();
233 |     }
234 | 
235 |     double Fz = 0, Fyz = 0;
236 | 
237 |     // VBE for top-level cluster indicators
238 |     #pragma omp parallel for schedule(guided) reduction(+ : Fyz)
239 |     for (unsigned int j = 0; j < J; ++j)
240 |       Fyz += vbeY<WJ,WT>(qZ[j], weights_j[j], weights_t, qY[j]);
241 | 
242 |     // VBE for bottom-level cluster indicators
243 |     for (unsigned int j = 0; j < J; ++j)
244 |     {
245 |       #pragma omp parallel for schedule(guided) reduction(+ : Fz)
246 |       for (unsigned int i = 0; i < X[j].size(); ++i)
247 |         Fz += vbeZ<WT,C>(X[j][i], qY[j].row(i), weights_t, clusters, qZ[j][i]);
248 |     }
249 | 
250 |     // Calculate free energy of model
251 |     F = fenergy<WJ,WT,C>(weights_j, weights_t, clusters, Fyz, Fz);
252 | 
253 |     // Check bad free energy step
254 |     if ((F-Fold)/abs(Fold) > libcluster::FENGYDEL)
255 |       throw runtime_error("Free energy increase!");
256 | 
257 |     if (verbose == true)              // Notify iteration
258 |       cout << '-' << flush;
259 |   }
260 |   while ( (abs((Fold-F)/Fold) > libcluster::CONVERGE)
261 |           && ( (++it < maxit) || (maxit < 0) ) );
262 | 
263 |   return F;
264 | }
265 | 
266 | 
267 | //
268 | //  Model Selection and Heuristics Private Functions
269 | //
270 | 
271 | /*  Search in a greedy fashion for a mixture split that lowers model free
272 |  *    energy, or return false. An attempt is made at looking for good, untried,
273 |  *    split candidates first, as soon as a split canditate is found that lowers
274 |  *    model F, it is returned. This may not be the "best" split, but it is
275 |  *    certainly faster than an exhaustive search for the "best" split.
276 |  *
277 |  *    returns: true if a split was found, false if no splits can be found
278 |  *    mutable: qZ is augmented with a new split if one is found, otherwise left
279 |  *    mutable: qY is updated if a new split if one is found, otherwise left
280 |  *    mutable tally is a tally of times a cluster has been unsuccessfully split
281 |  *    throws: invalid_argument rethrown from other functions
282 |  *    throws: runtime_error from its internal VBEM calls
283 |  */
284 | template <class WJ, class WT, class C> bool split_gr (
285 |     const vvMatrixXd& X,            // Observations
286 |     const vector<C>& clusters,      // Cluster Distributions
287 |     const double prior_t,           // Prior value for top-level clusters
288 |     vMatrixXd& qY,                  // Top-level cluster labels qY
289 |     vvMatrixXd& qZ,                 // Bottom-level Cluster labels qZ
290 |     vector<int>& tally,             // Count of unsuccessful splits
291 |     const double F,                 // Current model free energy
292 |     const int maxK,                 // max number of (bottom) clusters
293 |     const bool verbose              // Verbose output
294 |     )
295 | {
296 |   const unsigned int J = X.size(),
297 |                      K = clusters.size();
298 | 
299 |   // Check if we have reached the max number of clusters
300 |   if ( ((signed) K >= maxK) && (maxK >= 0) )
301 |       return false;
302 | 
303 |   // Split order chooser and bottom-level cluster parameters
304 |   tally.resize(K, 0); // Make sure tally is the right size
305 |   vector<GreedOrder> ord(K);
306 | 
307 |   // Get cluster parameters and their free energy
308 |   for (unsigned int k = 0; k < K; ++k)
309 |   {
310 |     ord[k].k     = k;
311 |     ord[k].tally = tally[k];
312 |     ord[k].Fk    = clusters[k].fenergy();
313 |   }
314 | 
315 |   // Get bottom-level cluster likelihoods
316 |   for (unsigned int j = 0; j < J; ++j)
317 |   {
318 |     // Add in cluster log-likelihood, weighted by global responsability
319 |     #pragma omp parallel for schedule(guided)
320 |     for (unsigned int i = 0; i < X[j].size(); ++i)
321 |       for (unsigned int k = 0; k < K; ++k)
322 |       {
323 |         double LL = qZ[j][i].col(k).dot(clusters[k].Eloglike(X[j][i]));
324 | 
325 |         #pragma omp atomic
326 |         ord[k].Fk -= LL;
327 |       }
328 |   }
329 | 
330 |   // Sort clusters by split tally, then free energy contributions
331 |   sort(ord.begin(), ord.end(), greedcomp);
332 | 
333 |   // Pre allocate big objects for loops (this makes a runtime difference)
334 |   vector< vector<ArrayXi> > mapidx(J);
335 |   vMatrixXd qYref(J);
336 |   vvMatrixXd qZref(J), qZaug(J), Xk(J);
337 | 
338 |   // Loop through each potential cluster in order and split it
339 |   for (vector<GreedOrder>::iterator ko = ord.begin(); ko < ord.end(); ++ko)
340 |   {
341 |     const int k = ko->k;
342 | 
343 |     ++tally[k]; // increase this cluster's unsuccessful split tally by default
344 | 
345 |     // Don't waste time with clusters that can't really be split min (2:2)
346 |     if (clusters[k].getN() < 4)
347 |       continue;
348 | 
349 |     // Now split observations and qZ.
350 |     int scount = 0, Mtot = 0;
351 | 
352 |     for (unsigned int j = 0; j < J; ++j)
353 |     {
354 |       mapidx[j].resize(X[j].size());
355 |       qZref[j].resize(X[j].size());
356 |       qZaug[j].resize(X[j].size());
357 |       Xk[j].resize(X[j].size());
358 |       qYref[j].setOnes(X[j].size(), 1);
359 | 
360 |       #pragma omp parallel for schedule(guided) reduction(+ : Mtot, scount)
361 |       for (unsigned int i = 0; i < X[j].size(); ++i)
362 |       {
363 |         // Make COPY of the observations with only relevant data points, p > 0.5
364 |         mapidx[j][i] = partobs(X[j][i], (qZ[j][i].col(k).array() > 0.5), 
365 |                                Xk[j][i]);
366 |         Mtot += Xk[j][i].rows();
367 | 
368 |         // Initial cluster split
369 |         ArrayXb splitk = clusters[k].splitobs(Xk[j][i]);
370 |         qZref[j][i].setZero(Xk[j][i].rows(), 2);
371 |         qZref[j][i].col(0) = (splitk == true).cast<double>();
372 |         qZref[j][i].col(1) = (splitk == false).cast<double>();
373 | 
374 |         // keep a track of number of splits
375 |         scount += splitk.count();
376 |       }
377 |     }
378 | 
379 |     // Don't waste time with clusters that haven't been split sufficiently
380 |     if ( (scount < 2) || (scount > (Mtot-2)) )
381 |       continue;
382 | 
383 |     // Refine the split
384 |     vector<WJ> wspl;
385 |     vector<WT> lspl;
386 |     vector<C> cspl;
387 |     vbem<WJ,WT,C>(Xk, qZref, qYref, wspl, lspl, cspl, prior_t,
388 |                   clusters[0].getprior(), SPLITITER);
389 | 
390 |     if (anyempty<C>(cspl) == true) // One cluster only
391 |       continue;
392 | 
393 |     // Map the refined splits back to original whole-data problem
394 |     for (unsigned int j = 0; j < J; ++j)
395 |     {
396 |       #pragma omp parallel for schedule(guided)
397 |       for (unsigned int i = 0; i < X[j].size(); ++i)
398 |         qZaug[j][i] = auglabels(k, mapidx[j][i],
399 |                                 (qZref[j][i].col(1).array() > 0.5), qZ[j][i]);
400 |     }
401 | 
402 |     // Calculate free energy of this split with ALL data (and refine a bit)
403 |     vMatrixXd qYaug = qY;                             // Copy :-(
404 |     double Fs = vbem<WJ,WT,C>(X, qZaug, qYaug, wspl, lspl, cspl, prior_t,
405 |                               clusters[0].getprior(), 1);
406 | 
407 |     if (anyempty<C>(cspl) == true) // One cluster only
408 |       continue;
409 | 
410 |     // Only notify here of split candidates
411 |     if (verbose == true)
412 |       cout << '=' << flush;
413 | 
414 |     // Test whether this cluster split is a keeper
415 |     if ( (Fs < F) && (abs((F-Fs)/F) > CONVERGE) )
416 |     {
417 |       qY = qYaug;
418 |       qZ = qZaug;
419 |       tally[k] = 0;   // Reset tally if successfully split
420 |       return true;
421 |     }
422 |   }
423 | 
424 |   // Failed to find splits
425 |   return false;
426 | }
427 | 
428 | /*  Find and remove all empty top-level clusters.
429 |  *
430 |  *    returns: true if any clusters have been deleted, false if all are kept.
431 |  *    mutable: qY may have columns deleted if there are empty weights found.
432 |  *    mutable: weights_t if there are empty top-level clusters found.
433 |  */
434 | template <class WT> bool prune_clusters_t (
435 |     vMatrixXd& qY,         // Probabilities qY
436 |     vector<WT>& weights_t, // weights distributions
437 |     bool verbose = false   // print status
438 |     )
439 | {
440 |   const unsigned int T = weights_t.size(),
441 |                      J = qY.size();
442 | 
443 |   // Look for empty clusters
444 |   ArrayXd Nt(T);
445 |   for (unsigned int t = 0; t < T; ++t)
446 |     Nt(t) = weights_t[t].getNk().sum();
447 | 
448 |   // Find location of empty and full clusters
449 |   ArrayXi eidx, fidx;
450 |   arrfind(Nt.array() < 1, eidx, fidx);
451 |   const unsigned int nempty = eidx.size();
452 | 
453 |   // If everything is not empty, return false
454 |   if (nempty == 0)
455 |     return false;
456 | 
457 |   if (verbose == true)
458 |     cout << '*' << flush;
459 | 
460 |   // Delete empty cluster suff. stats.
461 |   for (int i = (nempty - 1); i >= 0; --i)
462 |     weights_t.erase(weights_t.begin() + eidx(i));
463 | 
464 |   // Delete empty cluster indicators by copying only full indicators
465 |   const unsigned int newT = fidx.size();
466 |   vMatrixXd newqY(J);
467 | 
468 |   for (unsigned int j = 0; j < J; ++j)
469 |   {
470 |     newqY[j].setZero(qY[j].rows(), newT);
471 |     for (unsigned int t = 0; t < newT; ++t)
472 |       newqY[j].col(t) = qY[j].col(fidx(t));
473 |   }
474 | 
475 |   qY = newqY;
476 | 
477 |   return true;
478 | }
479 | 
480 | 
481 | /* The model selection algorithm
482 |  *
483 |  *  returns: Free energy of the final model
484 |  *  mutable: qY the probabilistic top-level cluster assignments
485 |  *  mutable: qZ the probabilistic observation to bottom-level cluster assigns.
486 |  *  mutable: the top-level cluster weights and parameters.
487 |  *  mutable: the bottom-level cluster weights and parameters.
488 |  *  throws: invalid_argument from other functions.
489 |  *  throws: runtime_error if free energy increases.
490 |  */
491 | template <class WJ, class WT, class C> double scluster (
492 |     const vvMatrixXd& X,        // Observations
493 |     vMatrixXd& qY,              // Top-level cluster assignments
494 |     vvMatrixXd& qZ,             // Bottom-level cluster assignments
495 |     vector<WJ>& weights_j,      // Group weight distributions
496 |     vector<WT>& weights_t,      // Top-level cluster distributions
497 |     vector<C>& clusters,        // Bottom-level cluster Distributions
498 |     const double prior_t,       // Prior value for top-level cluster dists.
499 |     const double prior_k,       // Prior value for bottom-level cluster dists.
500 |     const unsigned int maxT,    // Truncation level for number of weights
501 |     const int maxK,             // max number of (bottom) clusters
502 |     const bool verbose,         // Verbose output
503 |     const unsigned int nthreads // Number of threads for OpenMP to use
504 |     )
505 | {
506 |   if (nthreads < 1)
507 |     throw invalid_argument("Must specify at least one thread for execution!");
508 |   omp_set_num_threads(nthreads);
509 | 
510 |   const unsigned int J = X.size();
511 |   unsigned int Itot = 0;
512 | 
513 |   // Randomly initialise qY and initialise qZ to ones
514 |   qY.resize(J);
515 |   qZ.resize(J);
516 | 
517 |   for (unsigned int j = 0; j < J; ++j)
518 |   {
519 |     const unsigned int Ij = X[j].size();
520 | 
521 |     ArrayXXd randm = (ArrayXXd::Random(Ij, maxT)).abs();
522 |     ArrayXd norm = randm.rowwise().sum();
523 |     qY[j] = (randm.log().colwise() - norm.log()).exp();
524 | 
525 |     qZ[j].resize(Ij);
526 |     for (unsigned int i = 0; i < Ij; ++i)
527 |       qZ[j][i].setOnes(X[j][i].rows(), 1);
528 | 
529 |     Itot += Ij;
530 |   }
531 | 
532 |   // Some input argument checking
533 |   if (maxT > Itot)
534 |     throw invalid_argument("maxT must be less than the number of documents of"
535 |                            "X!");
536 | 
537 |   // Initialise free energy and other loop variables
538 |   bool issplit = true, emptyclasses = true;
539 |   double F = 0;
540 |   vector<int> tally;
541 | 
542 |   // Main loop
543 |   while ((issplit == true) || (emptyclasses == true))
544 |   {
545 |     // Variational Bayes
546 |     F = vbem<WJ,WT,C>(X, qZ, qY, weights_j, weights_t, clusters, prior_t,
547 |                       prior_k, -1, verbose);
548 | 
549 |     // Start model search heuristics
550 |     if (verbose == true)
551 |       cout << '<' << flush; // Notify start search
552 | 
553 |     if (issplit == false)   // Remove any empty weights
554 |       emptyclasses = prune_clusters_t<WT>(qY, weights_t, verbose);
555 |     else                    // Search for best split, augment qZ if found one
556 |       issplit = split_gr<WJ,WT,C>(X, clusters, prior_t, qY, qZ, tally, F, maxK,
557 |                                   verbose);
558 | 
559 |     if (verbose == true)
560 |       cout << '>' << endl;      // Notify end search
561 |   }
562 | 
563 |   // Print finished notification if verbose
564 |   if (verbose == true)
565 |   {
566 |     cout << "Finished!" << endl;
567 |     cout << "Number of top level clusters = " << weights_t.size();
568 |     cout << ", and bottom level clusters = " << clusters.size() << endl;
569 |     cout << "Free energy = " << F << endl;
570 |   }
571 | 
572 |   return F;
573 | }
574 | 
575 | 
576 | //
577 | // Public Functions
578 | //
579 | 
580 | double libcluster::learnSCM (
581 |     const vvMatrixXd& X,
582 |     vMatrixXd& qY,
583 |     vvMatrixXd& qZ,
584 |     vector<GDirichlet>& weights_j,
585 |     vector<Dirichlet>& weights_t,
586 |     vector<GaussWish>& clusters,
587 |     const double dirprior,
588 |     const double gausprior,
589 |     const unsigned int maxT,
590 |     const int maxK,
591 |     const bool verbose,
592 |     const unsigned int nthreads
593 |     )
594 | {
595 | 
596 |   if (verbose == true)
597 |     cout << "Learning SCM..." << endl;
598 | 
599 |   // Model selection and Variational Bayes learning
600 |   double F = scluster<GDirichlet, Dirichlet, GaussWish>(X, qY, qZ,
601 |                 weights_j, weights_t, clusters, dirprior, gausprior, maxT,
602 |                 maxK, verbose, nthreads);
603 | 
604 |   return F;
605 | }
606 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Test executable build instructions
 2 | 
 3 | # Make Cluster models batch test executable (test VDP and GMC)
 4 | add_executable(cluster_test
 5 |   ${TEST_SOURCE_DIR}/cluster_test.cpp
 6 |   ${TEST_SOURCE_DIR}/testdata.h
 7 | )
 8 | 
 9 | target_link_libraries(cluster_test ${PROJECT_NAME})
10 | 
11 | # Make Topic models batch test executable
12 | add_executable(scluster_test
13 |   ${TEST_SOURCE_DIR}/scluster_test.cpp
14 |   ${TEST_SOURCE_DIR}/testdata.h
15 | )
16 | 
17 | target_link_libraries(scluster_test ${PROJECT_NAME})
18 | 
19 | # Make Topic models batch test executable
20 | add_executable(mcluster_test
21 |   ${TEST_SOURCE_DIR}/mcluster_test.cpp
22 |   ${TEST_SOURCE_DIR}/testdata.h
23 | )
24 | 
25 | target_link_libraries(mcluster_test ${PROJECT_NAME})
26 | 


--------------------------------------------------------------------------------
/test/cluster_test.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
 3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
 4 |  *
 5 |  * This file is part of libcluster.
 6 |  *
 7 |  * libcluster is free software: you can redistribute it and/or modify it under
 8 |  * the terms of the GNU Lesser General Public License as published by the Free
 9 |  * Software Foundation, either version 3 of the License, or (at your option)
10 |  * any later version.
11 |  *
12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
15 |  * for more details.
16 |  *
17 |  * You should have received a copy of the GNU Lesser General Public License
18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #include "libcluster.h"
22 | #include "distributions.h"
23 | #include "testdata.h"
24 | 
25 | 
26 | //
27 | // Namespaces
28 | //
29 | 
30 | 
31 | using namespace std;
32 | using namespace Eigen;
33 | using namespace libcluster;
34 | using namespace distributions;
35 | 
36 | 
37 | // Main
38 | int main()
39 | {
40 | 
41 |   // Populate test data from testdata.h
42 |   MatrixXd Xcat;
43 |   vMatrixXd X;
44 |   makeXdata(Xcat, X);
45 | 
46 |   // GMC
47 |   vector<GDirichlet> weights;
48 |   vector<GaussWish>  clusters;
49 |   vMatrixXd qZgroup;
50 |   clock_t start = clock();
51 |   learnGMC (X, qZgroup, weights, clusters, PRIORVAL, -1, false, true);
52 | 
53 |   double stop = (double)((clock() - start))/CLOCKS_PER_SEC;
54 |   cout << "GMC Elapsed time = " << stop << " sec." << endl;
55 | 
56 |   cout << endl << "Cluster Weights:" << endl;
57 |   for (vector<GDirichlet>::iterator j = weights.begin(); j < weights.end(); ++j)
58 |     cout << j->Elogweight().exp().transpose() << endl;
59 | 
60 |   cout << endl << "Cluster means:" << endl;
61 |   for (vector<GaussWish>::iterator k=clusters.begin(); k < clusters.end(); ++k)
62 |     cout << k->getmean() << endl;
63 | 
64 |   cout << endl << "Cluster covariances:" << endl;
65 |   for (vector<GaussWish>::iterator k=clusters.begin(); k < clusters.end(); ++k)
66 |     cout << k->getcov() << endl << endl;
67 | 
68 |   return 0;
69 | }
70 | 


--------------------------------------------------------------------------------
/test/mcluster_test.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
 3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
 4 |  *
 5 |  * This file is part of libcluster.
 6 |  *
 7 |  * libcluster is free software: you can redistribute it and/or modify it under
 8 |  * the terms of the GNU Lesser General Public License as published by the Free
 9 |  * Software Foundation, either version 3 of the License, or (at your option)
10 |  * any later version.
11 |  *
12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
15 |  * for more details.
16 |  *
17 |  * You should have received a copy of the GNU Lesser General Public License
18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #include "libcluster.h"
22 | #include "distributions.h"
23 | #include "testdata.h"
24 | 
25 | 
26 | //
27 | // Namespaces
28 | //
29 | 
30 | using namespace std;
31 | using namespace Eigen;
32 | using namespace libcluster;
33 | using namespace distributions;
34 | 
35 | 
36 | //
37 | // Functions
38 | //
39 | 
40 | 
41 | // Main
42 | int main()
43 | {
44 | 
45 |   // Populate test data from testdata.h
46 |   MatrixXd Xcat, Ocat;
47 |   vMatrixXd X, W;
48 |   vvMatrixXd Xv(2);
49 |   makeXdata(Xcat, X);
50 |   makeOdata(Ocat, W);
51 | 
52 |   // Divide up X into 2 meta datasets
53 |   for (unsigned int j = 0; j < X.size(); ++j)
54 |   {
55 |     if (j < (X.size()/2))
56 |       Xv[0].push_back(X[j]);
57 |     else
58 |       Xv[1].push_back(X[j]);
59 |   }
60 | 
61 |   vector<GDirichlet> iweights;
62 |   vector<Dirichlet>  sweights;
63 |   vector<GaussWish>  sclusters;
64 |   vector<GaussWish>  iclusters;
65 |   vMatrixXd qY;
66 |   vvMatrixXd qZ;
67 |   clock_t start = clock();
68 | 
69 |   learnMCM(W, Xv, qY, qZ, iweights, sweights, iclusters, sclusters, PRIORVAL,
70 |            PRIORVAL, 10, -1, true);
71 | 
72 |   double stop = (double)((clock() - start))/CLOCKS_PER_SEC;
73 |   cout << "Topic Elapsed time = " << stop << " sec." << endl;
74 | 
75 |   cout << endl << "Image cluster proportions:" << endl;
76 |   for (vector<GDirichlet>::iterator j = iweights.begin(); j<iweights.end(); ++j)
77 |     cout << j->Elogweight().exp().transpose() << endl;
78 | 
79 |   cout << endl << "Segment cluster proportions per image cluster:" << endl;
80 |   for (vector<Dirichlet>::iterator t = sweights.begin(); t<sweights.end(); ++t)
81 |     cout << t->Elogweight().exp().transpose() << endl;
82 | 
83 |   cout << endl << "Image cluster means:" << endl;
84 |   for (vector<GaussWish>::iterator t=iclusters.begin(); t<iclusters.end(); ++t)
85 |     cout << t->getmean() << endl;
86 | 
87 |   cout << endl << "Image cluster covariances:" << endl;
88 |   for (vector<GaussWish>::iterator t=iclusters.begin(); t<iclusters.end(); ++t)
89 |     cout << t->getcov() << endl << endl;
90 | 
91 |   cout << endl << "Segment cluster means:" << endl;
92 |   for (vector<GaussWish>::iterator k=sclusters.begin(); k<sclusters.end(); ++k)
93 |     cout << k->getmean() << endl;
94 | 
95 |   cout << endl << "Segment cluster covariances:" << endl;
96 |   for (vector<GaussWish>::iterator k=sclusters.begin(); k<sclusters.end(); ++k)
97 |     cout << k->getcov() << endl << endl;
98 | }
99 | 


--------------------------------------------------------------------------------
/test/scluster_test.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
 3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
 4 |  *
 5 |  * This file is part of libcluster.
 6 |  *
 7 |  * libcluster is free software: you can redistribute it and/or modify it under
 8 |  * the terms of the GNU Lesser General Public License as published by the Free
 9 |  * Software Foundation, either version 3 of the License, or (at your option)
10 |  * any later version.
11 |  *
12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
15 |  * for more details.
16 |  *
17 |  * You should have received a copy of the GNU Lesser General Public License
18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
19 |  */
20 | 
21 | #include "libcluster.h"
22 | #include "distributions.h"
23 | #include "testdata.h"
24 | 
25 | 
26 | //
27 | // Namespaces
28 | //
29 | 
30 | using namespace std;
31 | using namespace Eigen;
32 | using namespace libcluster;
33 | using namespace distributions;
34 | 
35 | 
36 | //
37 | // Functions
38 | //
39 | 
40 | 
41 | // Main
42 | int main()
43 | {
44 | 
45 |   // Populate test data from testdata.h
46 |   MatrixXd Xcat;
47 |   vMatrixXd X;
48 |   vvMatrixXd Xv(2);
49 |   makeXdata(Xcat, X);
50 | 
51 |   // Divide up X into 2 meta datasets
52 |   for (unsigned int j = 0; j < X.size(); ++j)
53 |   {
54 |     if (j < (X.size()/2))
55 |       Xv[0].push_back(X[j]);
56 |     else
57 |       Xv[1].push_back(X[j]);
58 |   }
59 | 
60 |   vector<GDirichlet> iweights;
61 |   vector<Dirichlet>  sweights;
62 |   vector<GaussWish>  clusters;
63 |   vMatrixXd qY;
64 |   vvMatrixXd qZ;
65 |   clock_t start = clock();
66 | 
67 |   learnSCM(Xv, qY, qZ, iweights, sweights, clusters, PRIORVAL, PRIORVAL, 4, -1, 
68 |            true);
69 | 
70 |   double stop = (double)((clock() - start))/CLOCKS_PER_SEC;
71 |   cout << "Topic Elapsed time = " << stop << " sec." << endl;
72 | 
73 |   cout << endl << "Image cluster proportions:" << endl;
74 |   for (vector<GDirichlet>::iterator j = iweights.begin(); j<iweights.end(); ++j)
75 |     cout << j->Elogweight().exp().transpose() << endl;
76 | 
77 |   cout << endl << "Segment cluster proportions per image cluster:" << endl;
78 |   for (vector<Dirichlet>::iterator t = sweights.begin(); t<sweights.end(); ++t)
79 |     cout << t->Elogweight().exp().transpose() << endl;
80 | 
81 |   cout << endl << "Segment cluster means:" << endl;
82 |   for (vector<GaussWish>::iterator k=clusters.begin(); k < clusters.end(); ++k)
83 |     cout << k->getmean() << endl;
84 | 
85 |   cout << endl << "Segment cluster covariances:" << endl;
86 |   for (vector<GaussWish>::iterator k=clusters.begin(); k < clusters.end(); ++k)
87 |     cout << k->getcov() << endl << endl;
88 | 
89 |   return 0;
90 | }
91 | 


--------------------------------------------------------------------------------
/test/testdata.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * libcluster -- A collection of hierarchical Bayesian clustering algorithms.
  3 |  * Copyright (C) 2013 Daniel M. Steinberg (daniel.m.steinberg@gmail.com)
  4 |  *
  5 |  * This file is part of libcluster.
  6 |  *
  7 |  * libcluster is free software: you can redistribute it and/or modify it under
  8 |  * the terms of the GNU Lesser General Public License as published by the Free
  9 |  * Software Foundation, either version 3 of the License, or (at your option)
 10 |  * any later version.
 11 |  *
 12 |  * libcluster is distributed in the hope that it will be useful, but WITHOUT
 13 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 14 |  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 15 |  * for more details.
 16 |  *
 17 |  * You should have received a copy of the GNU Lesser General Public License
 18 |  * along with libcluster. If not, see <http://www.gnu.org/licenses/>.
 19 |  */
 20 | 
 21 | #ifndef TESTDATA_H
 22 | #define TESTDATA_H
 23 | 
 24 | #include <Eigen/Dense>
 25 | #include <vector>
 26 | 
 27 | // Populates some test data -- Twelve groups of 3 identity 2D covariance
 28 | //  Gaussians.
 29 | // TODO: MORE DESCRIPTION
 30 | void makeXdata (
 31 |     Eigen::MatrixXd& Xcat,            // [Group one; Group 2; ..] concatenated
 32 |     std::vector<Eigen::MatrixXd>& X   // {Group one, Group 2, ...} in a vector
 33 |     )
 34 | {
 35 | 
 36 |   X.clear();
 37 | 
 38 |   X.push_back(Eigen::MatrixXd(10,2)); // [0 0], [-10 10]
 39 |   X[0]  <<  2.0243,    1.9085,
 40 |            -2.3595,    0.1222,
 41 |            -0.5100,    1.0470,
 42 |            -1.3216,   -0.2269,
 43 |            -0.6361,   -0.1625,
 44 |             0.3179,    0.6901,
 45 |             0.1380,    0.5558,
 46 |           -10.5718,   11.0533,
 47 |           -10.2500,    9.2511,
 48 |           -11.5693,    9.0637;
 49 | 
 50 |   X.push_back(Eigen::MatrixXd(10,2)); // [-10 10], [10 10]
 51 |   X[1]  << -9.5793,   10.5411,
 52 |            -9.5993,    8.4591,
 53 |            -9.9049,    9.7969,
 54 |             9.4565,    8.7706,
 55 |             9.0881,    9.7290,
 56 |            10.6527,    9.1000,
 57 |             9.2657,    9.7143,
 58 |            10.5406,    9.5376,
 59 |            10.9758,    9.5902,
 60 |             9.8431,    9.4965;
 61 | 
 62 |   X.push_back(Eigen::MatrixXd(10,2)); // [0 0], [-10 10]
 63 |   X[2]  << -0.7107,   -1.1203,
 64 |             0.7770,   -1.5327,
 65 |             0.6224,   -1.0979,
 66 |             0.6474,   -1.4158,
 67 |            -0.4256,    0.0596,
 68 |             1.0486,   -0.4113,
 69 |             0.6607,   -0.3680,
 70 |           -11.3380,   10.4980,
 71 |            -9.9697,   12.7891,
 72 |            -9.1469,   10.7276;
 73 | 
 74 |   X.push_back(Eigen::MatrixXd(10,2)); // [-10 10], [10 10]
 75 |   X[3]  << -9.5033,    9.5000,
 76 |            -8.9178,   10.3830,
 77 |            -9.0296,   10.4120,
 78 |            10.2778,   11.2333,
 79 |            10.6395,   10.6103,
 80 |             9.9190,   10.0591,
 81 |            10.5409,    8.5331,
 82 |             8.7374,    8.3742,
 83 |            11.1104,    8.0352,
 84 |             9.0104,   12.6052;
 85 | 
 86 |   X.push_back(Eigen::MatrixXd(10,2)); // [0 0], [-10 10]
 87 |   X[4]  <<  2.5088,   -1.3610,
 88 |             1.0635,    0.7796,
 89 |             1.1569,    0.4394,
 90 |             0.0530,   -0.0896,
 91 |            -1.2884,    1.0212,
 92 |            -0.3712,   -0.8740,
 93 |            -0.7578,    0.4147,
 94 |            -9.5957,    9.2269,
 95 |           -10.7006,   10.8366,
 96 |           -11.6305,    8.8717;
 97 | 
 98 |   X.push_back(Eigen::MatrixXd(10,2)); // [-10 10], [10 10]
 99 |   X[5]  << -10.5686,   10.4055,
100 |            -9.1900,    9.6362,
101 |            -9.8268,    9.4007,
102 |            10.6263,    9.5506,
103 |             9.7133,    9.9157,
104 |             9.8027,    8.0080,
105 |            10.4056,   10.8412,
106 |             8.5807,    9.5853,
107 |             9.2706,   11.9122,
108 |            11.1473,    9.6091;
109 | 
110 |   X.push_back(Eigen::MatrixXd(10,2)); // [0 0], [-10 10]
111 |   X[6]  << -0.5640,    0.3484,
112 |             0.5551,    0.3493,
113 |            -0.5568,   -0.7292,
114 |            -0.8951,    0.3268,
115 |            -0.4093,   -0.5149,
116 |            -0.1609,   -0.8964,
117 |             0.4093,   -1.2033,
118 |            -9.5957,    9.2269,
119 |           -10.7006,   10.8366,
120 |           -11.6305,    8.8717;
121 | 
122 |   X.push_back(Eigen::MatrixXd(10,2)); // [-10 10], [10 10]
123 |   X[7]  << -10.5055,    9.4104,
124 |            -11.1933,   10.8535,
125 |             -9.3530,    8.1470,
126 |              8.1712,   10.9724,
127 |             11.3845,   10.2570,
128 |              9.9373,    9.0258,
129 |             10.4489,    8.8536,
130 |              9.6367,   10.5476,
131 |              8.9794,   11.5651,
132 |              6.9270,    8.3067;
133 | 
134 |   X.push_back(Eigen::MatrixXd(10,2)); // [0 0], [-10 10]
135 |   X[8]  << -0.9526,    1.0378,
136 |             0.3173,   -0.8459,
137 |             0.0780,   -0.1729,
138 |             1.3244,   -1.2087,
139 |            -0.2132,   -0.2971,
140 |            -0.1345,   -3.2320,
141 |            -1.1714,   -1.0870,
142 |            -8.5400,    8.5755,
143 |            -7.9500,   10.7174,
144 |            -9.8795,    9.2221;
145 | 
146 |   X.push_back(Eigen::MatrixXd(10,2)); // [-10 10], [10 10]
147 |   X[9] <<  -10.3536,    9.7927,
148 |             -9.9536,   10.2704,
149 |            -10.7929,    9.3472,
150 |             10.5979,   10.4092,
151 |              8.7187,    8.8576,
152 |              7.7967,    9.3751,
153 |              9.4288,    8.8313,
154 |             10.2140,   10.3926,
155 |             10.9424,   11.3018,
156 |             10.0937,    9.4064;
157 | 
158 | 
159 |   X.push_back(Eigen::MatrixXd(10,2)); // [0 0], [-10 10]
160 |   X[10] << -1.3853,   -1.4264,
161 |             0.3105,   -1.0145,
162 |            -0.2495,   -0.2133,
163 |             0.5037,   -0.3253,
164 |            -0.8927,    1.9444,
165 |           -10.4698,   10.9297,
166 |            -9.1136,    8.3942,
167 |           -11.3852,   10.6615,
168 |           -10.4774,    8.7309,
169 |           -11.9568,   12.1385;
170 | 
171 |   X.push_back(Eigen::MatrixXd(10,2)); // [-10 10], [10 10]
172 |   X[11] << -11.5505,   10.4772,
173 |             -9.8284,    9.9287,
174 |            -10.0621,    9.0617,
175 |             -8.8010,   10.1614,
176 |             -9.1983,    9.7318,
177 |              9.5901,    8.8777,
178 |              9.2887,   10.3062,
179 |             10.0614,    8.8277,
180 |              8.1539,    9.0390,
181 |              9.6017,    9.3463;
182 | 
183 |   Xcat.setZero(120,2);
184 |   const int J = X.size();
185 |   for (int j=0; j < J; ++j)
186 |     Xcat.block(j*10, 0, 10, 2) = X[j];
187 | 
188 | }
189 | 
190 | // Populates some more test data -- Two groups of 1 identity 2D covariance
191 | //  Gaussians.
192 | // TODO: MORE DESCRIPTION
193 | void makeOdata (
194 |     Eigen::MatrixXd& Ocat,            // [Group one; Group 2] concatenated
195 |     std::vector<Eigen::MatrixXd>& O   // {Group one, Group 2} in a vector
196 |     )
197 | {
198 |   O.clear();
199 | 
200 |   O.push_back(Eigen::MatrixXd(6,2));
201 |   O[0] <<  5.4889,   5.8884,
202 |           -4.6748,  -4.6808,
203 |            6.0347,   3.8529,
204 |           -5.7549,  -4.6871,
205 |            5.7269,   3.9311,
206 |           -3.6297,  -5.8649;
207 | 
208 |   O.push_back(Eigen::MatrixXd(6,2));
209 |   O[1] <<  4.6966,   4.1905,
210 |           -6.7115,  -5.0301,
211 |            5.2939,   2.0557,
212 |           -5.1022,  -5.1649,
213 |            4.2127,   6.4384,
214 |           -5.2414,  -4.3723;
215 | 
216 |   Ocat.setZero(12,2);
217 |   Ocat.block(0, 0, 6, 2) = O[0];
218 |   Ocat.block(6, 0, 6, 2) = O[1];
219 | 
220 | }
221 | 
222 | #endif // TESTDATA_H
223 | 


--------------------------------------------------------------------------------