├── .gitignore
├── .ycm_extra_conf.py
├── LICENSE
├── Makefile
├── README
├── plot-gmm.py
├── plot-point-3d.py
├── plot-point.py
├── python
├── pygmm.py
└── test.py
└── src
├── Threadpool.cc
├── Threadpool
├── COPYING
├── Makefile
├── README.md
├── Threadpool.hpp
├── main.cpp.prevent-compile
└── threadpool-example
├── common.hh
├── datamanip.cc
├── datamanip.hh
├── dataset.hh
├── fastexp.cc
├── fastexp.hh
├── gmm.c.hh
├── gmm.cc
├── gmm.hh
├── helper.hh
├── kmeans++.cc
├── kmeans++.hh
├── kmeans.cc
├── kmeans.hh
├── kmeansII.cc
├── kmeansII.hh
├── main.cc
├── pygmm.cc
├── pygmm.hh
├── random.hh
├── tclap
├── Arg.h
├── ArgException.h
├── ArgTraits.h
├── CmdLine.h
├── CmdLineInterface.h
├── CmdLineOutput.h
├── Constraint.h
├── DocBookOutput.h
├── HelpVisitor.h
├── IgnoreRestVisitor.h
├── Makefile
├── Makefile.am
├── Makefile.in
├── MultiArg.h
├── MultiSwitchArg.h
├── OptionalUnlabeledTracker.h
├── StandardTraits.h
├── StdOutput.h
├── SwitchArg.h
├── UnlabeledMultiArg.h
├── UnlabeledValueArg.h
├── ValueArg.h
├── ValuesConstraint.h
├── VersionVisitor.h
├── Visitor.h
├── XorHandler.h
└── ZshCompletionOutput.h
├── timer.hh
└── type.hh
/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 |
--------------------------------------------------------------------------------
/.ycm_extra_conf.py:
--------------------------------------------------------------------------------
1 | # This file is NOT licensed under the GPLv3, which is the license for the rest
2 | # of YouCompleteMe.
3 | #
4 | # Here's the license text for this file:
5 | #
6 | # This is free and unencumbered software released into the public domain.
7 | #
8 | # Anyone is free to copy, modify, publish, use, compile, sell, or
9 | # distribute this software, either in source code form or as a compiled
10 | # binary, for any purpose, commercial or non-commercial, and by any
11 | # means.
12 | #
13 | # In jurisdictions that recognize copyright laws, the author or authors
14 | # of this software dedicate any and all copyright interest in the
15 | # software to the public domain. We make this dedication for the benefit
16 | # of the public at large and to the detriment of our heirs and
17 | # successors. We intend this dedication to be an overt act of
18 | # relinquishment in perpetuity of all present and future rights to this
19 | # software under copyright law.
20 | #
21 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
25 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
26 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27 | # OTHER DEALINGS IN THE SOFTWARE.
28 | #
29 | # For more information, please refer to
30 |
31 | import os
32 | import ycm_core
33 |
34 | # These are the compilation flags that will be used in case there's no
35 | # compilation database set (by default, one is not set).
36 | # CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
37 | flags = [
38 | '-Wall',
39 | '-Wextra',
40 | '-DUSER_CLANG_COMPLETER',
41 | # THIS IS IMPORTANT! Without a "-std=" flag, clang won't know which
42 | # language to use when compiling headers. So it will guess. Badly. So C++
43 | # headers will be compiled as C headers. You don't want that so ALWAYS specify
44 | # a "-std=".
45 | # For a C project, you would set this to something like 'c99' instead of
46 | # 'c++11'.
47 | '-std=c++11',
48 | # ...and the same thing goes for the magic -x option which specifies the
49 | # language that the files to be compiled are written in. This is mostly
50 | # relevant for c++ headers.
51 | # For a C project, you would set this to 'c' instead of 'c++'.
52 | '-x',
53 | 'c++',
54 | '-I',
55 | 'src',
56 | # This path will only work on OS X, but extra paths that don't exist are not
57 | # harmful
58 | ]
59 |
60 | # Set this to the absolute path to the folder (NOT the file!) containing the
61 | # compile_commands.json file to use that instead of 'flags'. See here for
62 | # more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
63 | #
64 | # Most projects will NOT need to set this to anything; you can just change the
65 | # 'flags' list of compilation flags. Notice that YCM itself uses that approach.
66 | compilation_database_folder = ''
67 |
68 | if compilation_database_folder:
69 | database = ycm_core.CompilationDatabase( compilation_database_folder )
70 | else:
71 | database = None
72 |
73 |
74 | def DirectoryOfThisScript():
75 | return os.path.dirname( os.path.abspath( __file__ ) )
76 |
77 |
78 | def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
79 | if not working_directory:
80 | return list( flags )
81 | new_flags = []
82 | make_next_absolute = False
83 | path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
84 | for flag in flags:
85 | new_flag = flag
86 |
87 | if make_next_absolute:
88 | make_next_absolute = False
89 | if not flag.startswith( '/' ):
90 | new_flag = os.path.join( working_directory, flag )
91 |
92 | for path_flag in path_flags:
93 | if flag == path_flag:
94 | make_next_absolute = True
95 | break
96 |
97 | if flag.startswith( path_flag ):
98 | path = flag[ len( path_flag ): ]
99 | new_flag = path_flag + os.path.join( working_directory, path )
100 | break
101 |
102 | if new_flag:
103 | new_flags.append( new_flag )
104 | return new_flags
105 |
106 |
107 | def FlagsForFile( filename ):
108 | if database:
109 | # Bear in mind that compilation_info.compiler_flags_ does NOT return a
110 | # python list, but a "list-like" StringVec object
111 | compilation_info = database.GetCompilationInfoForFile( filename )
112 | final_flags = MakeRelativePathsInFlagsAbsolute(
113 | compilation_info.compiler_flags_,
114 | compilation_info.compiler_working_dir_ )
115 |
116 | # NOTE: This is just for YouCompleteMe; it's highly likely that your project
117 | # does NOT need to remove the stdlib flag. DO NOT USE THIS IN YOUR
118 | # ycm_extra_conf IF YOU'RE NOT 100% YOU NEED IT.
119 | try:
120 | final_flags.remove( '-stdlib=libc++' )
121 | except ValueError:
122 | pass
123 | else:
124 | relative_to = DirectoryOfThisScript()
125 | final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
126 |
127 | return {
128 | 'flags': final_flags,
129 | 'do_cache': True
130 | }
131 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2013,2014 Xinyu Zhou
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 |
21 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | #
2 | # $File: Makefile
3 | # $Date: Wed Dec 11 18:57:54 2013 +0800
4 | #
5 | # A single output portable Makefile for
6 | # simple c++ project
7 |
8 | SRC_DIR = src
9 | OBJ_DIR = obj
10 | BIN_DIR = bin
11 | LIB_DIR = lib
12 | TARGET = gmm
13 |
14 | CXX = g++
15 | #CXX = clang++
16 |
17 | BIN_TARGET = $(BIN_DIR)/$(TARGET)
18 | PROF_FILE = $(BIN_TARGET).prof
19 |
20 | INCLUDE_DIR = -I $(SRC_DIR)
21 | #LDFLAGS = -L /home/zhanpeng/.local/lib -lGClasses
22 | #LDFLAGS += -lprofiler
23 | #DEFINES += -D__DEBUG
24 | #DEFINES += -D__DEBUG_CHECK
25 |
26 |
27 | CXXFLAGS += -O3
28 | # CXXFLAGS += -g -O0
29 | # CXXFLAGS += -pg
30 | CXXFLAGS += -fomit-frame-pointer -msse2 -mfpmath=sse -ffast-math -lm
31 |
32 | CXXFLAGS += #$(DEFINES)
33 | CXXFLAGS += -std=c++11
34 | #CXXFLAGS += -ansi
35 | CXXFLAGS += -Wall -Wextra
36 | CXXFLAGS += $(INCLUDE_DIR)
37 | CXXFLAGS += $(LDFLAGS)
38 | #CXXFLAGS += $(shell pkg-config --libs --cflags opencv)
39 | #CXXFLAGS += -pthread
40 | CXXFLAGS += -lpthread
41 | #CXXFLAGS += -fopenmp
42 |
43 | CXXFLAGS += -fPIC
44 |
45 | #CC = /usr/share/clang/scan-build/ccc-analyzer
46 | #CXX = /usr/share/clang/scan-build/c++-analyzer
47 | CXXSOURCES = $(shell find $(SRC_DIR)/ -name "*.cc")
48 | OBJS = $(addprefix $(OBJ_DIR)/,$(CXXSOURCES:.cc=.o))
49 | DEPFILES = $(OBJS:.o=.d)
50 |
51 | .PHONY: all clean run rebuild gdb
52 |
53 | all: $(BIN_TARGET) $(LIB_DIR)/pygmm.so
54 |
55 | $(LIB_DIR)/pygmm.so: $(OBJS) $(LIB_DIR)
56 | g++ -shared $(OBJS) -o $(LIB_DIR)/pygmm.so $(CXXFLAGS)
57 |
58 | $(LIB_DIR)/pygmm.o: $(OBJ_DIR)/$(SRC_DIR)/pygmm.o $(LIB_DIR)
59 | cp $(OBJ_DIR)/$(SRC_DIR)/pygmm.o $(LIB_DIR)/pygmm.o
60 |
61 | $(LIB_DIR):
62 | mkdir $(LIB_DIR)
63 |
64 | $(OBJ_DIR)/%.o: %.cc
65 | @echo "[cc] $< ..."
66 | @$(CXX) -c $< $(CXXFLAGS) -o $@
67 |
68 | $(OBJ_DIR)/%.d: %.cc
69 | @mkdir -pv $(dir $@)
70 | @echo "[dep] $< ..."
71 | @$(CXX) $(INCLUDE_DIR) $(CXXFLAGS) -MM -MT "$(OBJ_DIR)/$(<:.cc=.o) $(OBJ_DIR)/$(<:.cc=.d)" "$<" > "$@"
72 |
73 | sinclude $(DEPFILES)
74 |
75 | $(BIN_TARGET): $(OBJS)
76 | @echo "[link] $< ..."
77 | @mkdir -p $(BIN_DIR)
78 | @$(CXX) $(OBJS) -o $@ $(LDFLAGS) $(CXXFLAGS)
79 | @echo have a nice day!
80 |
81 | clean:
82 | rm -rf $(OBJ_DIR) $(BIN_DIR) $(LIB_DIR)
83 |
84 | run: $(BIN_TARGET)
85 | ./$(BIN_TARGET)
86 |
87 | rebuild:
88 | +@make clean
89 | +@make
90 |
91 | gdb: $(BIN_TARGET)
92 | gdb ./$(BIN_TARGET)
93 |
94 | run-prof $(PROF_FILE): $(BIN_TARGET)
95 | CPUPROFILE=$(PROF_FILE) CPUPROFILE_FREQUENCY=1000 ./$(BIN_TARGET)
96 |
97 | show-prof: $(PROF_FILE)
98 | google-pprof --text $(BIN_TARGET) $(PROF_FILE) | tee prof.txt
99 |
100 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | Fast GMM
2 | ========
3 | A fast Gaussian Mixture Model implementation.
4 |
5 | Features
6 | ========
7 | - Multi-thread utilization
8 | - fast exponential function computation
9 |
10 | Thanks
11 | ======
12 | - fastexp
13 | - Threadpool
14 | - tclap
15 |
--------------------------------------------------------------------------------
/plot-gmm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | # -*- coding: utf-8 -*-
3 | # $File: plot-gmm.py
4 | # $Date: Tue Dec 10 16:14:53 2013 +0800
5 | # $Author: Xinyu Zhou
6 |
7 | import matplotlib.pyplot as plt
8 | import matplotlib.mlab as mlab
9 | from matplotlib import cm
10 | from scipy import stats, mgrid, c_, reshape, random, rot90
11 | import argparse
12 | from numpy import *
13 | import numpy as np
14 |
15 | class GassianTypeNotImplemented(Exception):
16 | pass
17 |
18 | def get_args():
19 | description = 'plot gmm'
20 | parser = argparse.ArgumentParser(description = description)
21 |
22 | parser.add_argument('-i', '--input', help = 'data file', required = True)
23 | parser.add_argument('-m', '--model', help = 'model file', required = True)
24 |
25 | args = parser.parse_args()
26 |
27 | return args
28 |
29 |
30 | class Gaussian(object):
31 | def __init__(self):
32 | self.covtype = 1
33 | self.dim = 0
34 | self.mean = array([])
35 | self.sigma = array([])
36 | self.covariance = array([[]])
37 |
38 | def probability_of(self, x):
39 | assert len(x) == self.dim
40 |
41 | return exp((x - mean)**2 / (2 * self.sigma**2)) / (2 * pi * self.sigma)
42 |
43 |
44 | class GMM(object):
45 | def __init__(self):
46 | self.nr_mixtures = 0
47 | self.weights = array([])
48 | self.gaussians = []
49 |
50 | def read_data(fname):
51 | with open(fname) as fin:
52 | return zip(*map( lambda line: map(float, line.rstrip().split()), fin))
53 |
54 |
55 | def read_gaussian(fin):
56 | gaussian = Gaussian()
57 | gaussian.dim, gaussian.covtype = map(int, fin.readline().rstrip().split())
58 | if gaussian.covtype == 1:
59 | gaussian.mean = map(float, fin.readline().rstrip().split())
60 | gaussian.sigma = map(float, fin.readline().rstrip().split())
61 | assert len(gaussian.mean) == gaussian.dim
62 | assert len(gaussian.sigma) == gaussian.dim
63 | else:
64 | raise GassianTypeNotImplemented()
65 | return gaussian
66 |
67 | def read_model(fname):
68 | gmm = GMM()
69 | with open(fname) as fin:
70 | gmm.nr_mixtures = int(fin.readline().rstrip())
71 | gmm.weights = map(float, fin.readline().rstrip().split())
72 | for i in range(gmm.nr_mixtures):
73 | gmm.gaussians.append(read_gaussian(fin))
74 |
75 | return gmm
76 |
77 | def main():
78 | args = get_args()
79 | data = read_data(args.input)
80 | x, y = data[:2]
81 | gmm = read_model(args.model)
82 |
83 | fig = plt.figure()
84 | ax = fig.add_subplot(111, aspect = 'equal')
85 | ax.scatter(x, y)
86 | x0, x1, y0, y1 = ax.axis()
87 |
88 | x = linspace(x0, x1, 1000)
89 | y = linspace(y0, y1, 1000)
90 | X, Y = meshgrid(x, y)
91 |
92 | def get_Z(X, Y, gaussian):
93 | return mlab.bivariate_normal(X, Y, gaussian.sigma[0], gaussian.sigma[1],
94 | gaussian.mean[0], gaussian.mean[1], 0)
95 |
96 | Z = get_Z(X, Y, gmm.gaussians[0])
97 | for gaussian in gmm.gaussians[1:]:
98 | Z += get_Z(X, Y, gaussian)
99 | plt.contour(X, Y, Z, cmap=cm.PuBu_r)
100 | for gaussian in gmm.gaussians:
101 | # print gaussian.mean
102 | plt.scatter(gaussian.mean[0], gaussian.mean[1], s = 50, c = 'yellow')
103 |
104 | plt.show()
105 |
106 |
107 | if __name__ == '__main__':
108 | main()
109 |
110 |
111 | # vim: foldmethod=marker
112 |
113 |
--------------------------------------------------------------------------------
/plot-point-3d.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 |
3 | import numpy as np
4 | import matplotlib.pyplot as plt
5 | from mpl_toolkits.mplot3d import Axes3D
6 | import argparse, sys
7 |
8 | stdin_fname = '$stdin$'
9 |
10 | def get_args():
11 | description = "plot points into graph. x and y seperated with white space in one line, or just y's"
12 | parser = argparse.ArgumentParser(description = description)
13 | parser.add_argument('-i', '--input',
14 | help = 'input data file, "-" for stdin, default stdin',
15 | default = '-')
16 | parser.add_argument('-o', '--output',
17 | help = 'output image', default = '')
18 | parser.add_argument('--show',
19 | help = 'show the figure after rendered',
20 | action = 'store_true')
21 | parser.add_argument('-t', '--title',
22 | help = 'title of the graph',
23 | default = '')
24 | parser.add_argument('--xlabel',
25 | help = 'x label',
26 | default = 'x')
27 | parser.add_argument('--ylabel',
28 | help = 'y label',
29 | default = 'y')
30 | parser.add_argument('--zlabel',
31 | help = 'z label',
32 | default = 'z')
33 | parser.add_argument('--xlim', help = 'xlim')
34 | parser.add_argument('--ylim', help = 'ylim')
35 | parser.add_argument('--zlim', help = 'zlim')
36 |
37 | parser.add_argument('--annotate-maximum',
38 | help = 'annonate maximum value in graph',
39 | action = 'store_true')
40 | parser.add_argument('--annotate-minimum',
41 | help = 'annonate minimum value in graph',
42 | action = 'store_true')
43 | parser.add_argument('--xkcd',
44 | help = 'xkcd style',
45 | action = 'store_true')
46 |
47 | args = parser.parse_args();
48 |
49 | if (not args.show) and len(args.output) == 0:
50 | raise Exception("at least one of --show and --output/-o must be specified")
51 | if args.xlim:
52 | args.xlim = map(float, args.xlim.rstrip().split(','))
53 | if args.ylim:
54 | args.ylim = map(float, args.ylim.rstrip().split(','))
55 | if args.zlim:
56 | args.zlim = map(float, args.zlim.rstrip().split(','))
57 |
58 | return args
59 |
60 |
61 | def filter_valid_range(points, rect):
62 | """rect = (min_x, max_x, min_y, max_y)"""
63 | ret = []
64 | for x, y in points:
65 | if x >= rect[0] and x <= rect[1] and y >= rect[2] and y <= rect[3]:
66 | ret.append((x, y))
67 | if len(ret) == 0:
68 | ret.append(points[0])
69 | return ret
70 |
71 | def do_plot(data_x, data_y, data_z, args):
72 | fig = plt.figure(figsize = (16.18, 10))
73 | projection = '2d'
74 | if len(data_z) > 0:
75 | projection = '3d'
76 | ax = fig.add_axes((0.1, 0.2, 0.8, 0.7), projection = projection)
77 | if projection == '2d':
78 | ax.scatter(data_x, data_y)
79 | else:
80 | ax.scatter(data_x, data_y, data_z)
81 | if args.xlim:
82 | ax.set_xlim(args.xlim)
83 | if args.ylim:
84 | ax.set_ylim(args.ylim)
85 | if args.zlim:
86 | ax.set_zlim3d(args.zlim)
87 | if args.xlim or args.ylim or args.zlim:
88 | pass
89 | ax.set_aspect('equal')
90 | else:
91 | ax.set_aspect('equal', 'datalim')
92 | #ax.spines['right'].set_color('none')
93 | #ax.spines['left'].set_color('none')
94 | #plt.xticks([])
95 | #plt.yticks([])
96 |
97 | if args.annotate_maximum or args.annotate_minimum:
98 | max_x, min_x = max(data_x), min(data_x)
99 | max_y, min_y = max(data_y), min(data_y)
100 | x_range = max_x - min_x
101 | y_range = max_y - min_y
102 | x_max, y_max = data_y[0], data_y[0]
103 | x_min, y_min = data_x[0], data_y[0]
104 |
105 | rect = ax.axis()
106 |
107 | for i in xrange(1, len(data_x)):
108 | if data_y[i] > y_max:
109 | y_max = data_y[i]
110 | x_max = data_x[i]
111 | if data_y[i] < y_min:
112 | y_min = data_y[i]
113 | x_min = data_x[i]
114 | if args.annotate_maximum:
115 | text_x, text_y = filter_valid_range([
116 | (x_max + 0.05 * x_range,
117 | y_max + 0.025 * y_range),
118 | (x_max - 0.05 * x_range,
119 | y_max + 0.025 * y_range),
120 | (x_max + 0.05 * x_range,
121 | y_max - 0.025 * y_range),
122 | (x_max - 0.05 * x_range,
123 | y_max - 0.025 * y_range)],
124 | rect)[0]
125 | ax.annotate('maximum ({:.3f},{:.3f})' . format(x_max, y_max),
126 | xy = (x_max, y_max),
127 | xytext = (text_x, text_y),
128 | arrowprops = dict(arrowstyle = '->'))
129 | if args.annotate_minimum:
130 | text_x, text_y = filter_valid_range([
131 | (x_min + 0.05 * x_range,
132 | y_min - 0.025 * y_range),
133 | (x_min - 0.05 * x_range,
134 | y_min - 0.025 * y_range),
135 | (x_min + 0.05 * x_range,
136 | y_min + 0.025 * y_range),
137 | (x_min - 0.05 * x_range,
138 | y_min + 0.025 * y_range)],
139 | rect)[0]
140 | ax.annotate('minimum ({:.3f},{:.3f})' . format(x_min, y_min),
141 | xy = (x_min, y_min),
142 | xytext = (text_x, text_y),
143 | arrowprops = dict(arrowstyle = '->'))
144 |
145 | ax.set_xlabel(args.xlabel)
146 | ax.set_ylabel(args.ylabel)
147 | if projection == '3d':
148 | ax.set_zlabel(args.zlabel)
149 |
150 | fig.text(0.5, 0.05, args.title, ha = 'center')
151 | if args.output != '':
152 | plt.savefig(args.output)
153 |
154 | if args.show:
155 | plt.show()
156 |
157 | def main():
158 | args = get_args()
159 | if args.input == stdin_fname:
160 | fin = sys.stdin
161 | else:
162 | fin = open(args.input)
163 |
164 | data_x = []
165 | data_y = []
166 | data_z = []
167 | data_format = -1
168 | for lineno, line in enumerate(fin.readlines()):
169 | line = [float(i) for i in line.rstrip().split()]
170 | line_data_format = -1
171 | x, y, z = None, None, None
172 | if len(line) == 0:
173 | continue
174 | if len(line) == 2:
175 | line_data_format = 0
176 | x, y = line
177 | elif len(line) == 1:
178 | line_data_format = 1
179 | x, y = lineno, line[0]
180 | elif len(line) == 3:
181 | x, y, z = line
182 | line_data_format = 2;
183 | else:
184 | raise RuntimeError('Can not parse input data at line {}' . format(lineno + 1))
185 |
186 | if data_format == -1:
187 | data_format = line_data_format
188 | else:
189 | if line_data_format != data_format:
190 | raise RuntimeError('data format is not consistent, at line {}' \
191 | . format(lineno + 1))
192 | data_x.append(x)
193 | data_y.append(y)
194 | if z != None:
195 | data_z.append(z)
196 | print len(data_x)
197 | if args.input != stdin_fname:
198 | fin.close()
199 |
200 | if len(data_x) == 1:
201 | return
202 |
203 | if args.xkcd:
204 | with plt.xkcd():
205 | do_plot(data_x, data_y, data_z, args)
206 | else:
207 | do_plot(data_x, data_y, data_z, args)
208 |
209 |
210 |
211 | if __name__ == '__main__':
212 | main()
213 |
--------------------------------------------------------------------------------
/plot-point.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 |
3 | import numpy as np
4 | import matplotlib.pyplot as plt
5 | import argparse, sys
6 |
7 | stdin_fname = '$stdin$'
8 |
9 | def get_args():
10 | description = "plot points into graph. x and y seperated with white space in one line, or just y's"
11 | parser = argparse.ArgumentParser(description = description)
12 | parser.add_argument('-i', '--input',
13 | help = 'input data file, "-" for stdin, default stdin',
14 | default = '-')
15 | parser.add_argument('-o', '--output',
16 | help = 'output image', default = '')
17 | parser.add_argument('--show',
18 | help = 'show the figure after rendered',
19 | action = 'store_true')
20 | parser.add_argument('-t', '--title',
21 | help = 'title of the graph',
22 | default = '')
23 | parser.add_argument('--xlabel',
24 | help = 'x label',
25 | default = 'x')
26 | parser.add_argument('--ylabel',
27 | help = 'y label',
28 | default = 'y')
29 | parser.add_argument('--annotate-maximum',
30 | help = 'annonate maximum value in graph',
31 | action = 'store_true')
32 | parser.add_argument('--annotate-minimum',
33 | help = 'annonate minimum value in graph',
34 | action = 'store_true')
35 | parser.add_argument('--xkcd',
36 | help = 'xkcd style',
37 | action = 'store_true')
38 |
39 | args = parser.parse_args();
40 |
41 | if (not args.show) and len(args.output) == 0:
42 | raise Exception("at least one of --show and --output/-o must be specified")
43 |
44 | return args
45 |
46 |
47 | def filter_valid_range(points, rect):
48 | """rect = (min_x, max_x, min_y, max_y)"""
49 | ret = []
50 | for x, y in points:
51 | if x >= rect[0] and x <= rect[1] and y >= rect[2] and y <= rect[3]:
52 | ret.append((x, y))
53 | if len(ret) == 0:
54 | ret.append(points[0])
55 | return ret
56 |
57 | def do_plot(data_x, data_y, args):
58 | fig = plt.figure(figsize = (16.18, 10))
59 | ax = fig.add_axes((0.1, 0.2, 0.8, 0.7))
60 | plt.scatter(data_x, data_y)
61 | # ax.set_aspect('equal', 'datalim')
62 | #ax.spines['right'].set_color('none')
63 | #ax.spines['left'].set_color('none')
64 | #plt.xticks([])
65 | #plt.yticks([])
66 |
67 | if args.annotate_maximum or args.annotate_minimum:
68 | max_x, min_x = max(data_x), min(data_x)
69 | max_y, min_y = max(data_y), min(data_y)
70 | x_range = max_x - min_x
71 | y_range = max_y - min_y
72 | x_max, y_max = data_y[0], data_y[0]
73 | x_min, y_min = data_x[0], data_y[0]
74 |
75 | rect = ax.axis()
76 |
77 | for i in xrange(1, len(data_x)):
78 | if data_y[i] > y_max:
79 | y_max = data_y[i]
80 | x_max = data_x[i]
81 | if data_y[i] < y_min:
82 | y_min = data_y[i]
83 | x_min = data_x[i]
84 | if args.annotate_maximum:
85 | text_x, text_y = filter_valid_range([
86 | (x_max + 0.05 * x_range,
87 | y_max + 0.025 * y_range),
88 | (x_max - 0.05 * x_range,
89 | y_max + 0.025 * y_range),
90 | (x_max + 0.05 * x_range,
91 | y_max - 0.025 * y_range),
92 | (x_max - 0.05 * x_range,
93 | y_max - 0.025 * y_range)],
94 | rect)[0]
95 | ax.annotate('maximum ({:.3f},{:.3f})' . format(x_max, y_max),
96 | xy = (x_max, y_max),
97 | xytext = (text_x, text_y),
98 | arrowprops = dict(arrowstyle = '->'))
99 | if args.annotate_minimum:
100 | text_x, text_y = filter_valid_range([
101 | (x_min + 0.05 * x_range,
102 | y_min - 0.025 * y_range),
103 | (x_min - 0.05 * x_range,
104 | y_min - 0.025 * y_range),
105 | (x_min + 0.05 * x_range,
106 | y_min + 0.025 * y_range),
107 | (x_min - 0.05 * x_range,
108 | y_min + 0.025 * y_range)],
109 | rect)[0]
110 | ax.annotate('minimum ({:.3f},{:.3f})' . format(x_min, y_min),
111 | xy = (x_min, y_min),
112 | xytext = (text_x, text_y),
113 | arrowprops = dict(arrowstyle = '->'))
114 |
115 | plt.xlabel(args.xlabel)
116 | plt.ylabel(args.ylabel)
117 |
118 | ax.grid(color = 'gray', linestyle = 'dashed')
119 |
120 | fig.text(0.5, 0.05, args.title, ha = 'center')
121 | if args.output != '':
122 | plt.savefig(args.output)
123 |
124 | if args.show:
125 | plt.show()
126 |
127 | def main():
128 | args = get_args()
129 | if args.input == stdin_fname:
130 | fin = sys.stdin
131 | else:
132 | fin = open(args.input)
133 |
134 | data_x = []
135 | data_y = []
136 | data_format = -1
137 | for lineno, line in enumerate(fin.readlines()):
138 | line = [float(i) for i in line.rstrip().split()]
139 | line_data_format = -1
140 | if len(line) == 0:
141 | continue
142 | if len(line) == 2:
143 | line_data_format = 0
144 | x, y = line
145 | elif len(line) == 1:
146 | line_data_format = 1
147 | x, y = lineno, line[0]
148 | else:
149 | raise RuntimeError('Can not parse input data at line {}' . format(lineno + 1))
150 |
151 | if data_format == -1:
152 | data_format = line_data_format
153 | else:
154 | if line_data_format != data_format:
155 | raise RuntimeError('data format is not consistent, at line {}' \
156 | . format(lineno + 1))
157 | data_x.append(x)
158 | data_y.append(y)
159 | print len(data_x)
160 | if args.input != stdin_fname:
161 | fin.close()
162 |
163 | if len(data_x) == 1:
164 | return
165 |
166 | if args.xkcd:
167 | with plt.xkcd():
168 | do_plot(data_x, data_y, args)
169 | else:
170 | do_plot(data_x, data_y, args)
171 |
172 |
173 |
174 | if __name__ == '__main__':
175 | main()
176 |
--------------------------------------------------------------------------------
/python/pygmm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | # -*- coding: utf-8 -*-
3 | # $File: pygmm.py
4 | # $Date: Wed Dec 11 18:49:34 2013 +0800
5 | # $Author: Xinyu Zhou
6 |
7 | from ctypes import *
8 | import os
9 | from os import path
10 | from numpy import array
11 |
12 |
13 | dirname = path.dirname(path.abspath(__file__))
14 |
15 | pygmm = cdll.LoadLibrary(path.join(dirname, '../lib/pygmm.so'))
16 |
17 | class GMMParameter(Structure):
18 | _fields_ = [("nr_instance", c_int),
19 | ("nr_dim", c_int),
20 | ("nr_mixture", c_int),
21 | ("min_covar", c_double),
22 | ("threshold", c_double),
23 | ("nr_iteration", c_int),
24 | ("init_with_kmeans", c_int),
25 | ("concurrency", c_int),
26 | ("verbosity", c_int)]
27 |
28 | #pygmm.train_model.argtypes = [c_char_p, POINTER(POINTER(c_double)), POINTER(GMMParameter)]
29 | pygmm.score_all.restype = c_double
30 | pygmm.score_instance.restype = c_double
31 |
32 | for num, var in enumerate(['COVTYPE_SPHEREICAL', 'COVTYPE_DIAGONAL',
33 | 'COVTYPE_FULL']):
34 | exec("{} = {}" . format(var, num))
35 |
36 | class GMM(object):
37 | def __init__(self, nr_mixture = 10,
38 | covariance_type = COVTYPE_DIAGONAL,
39 | min_covar = 1e-3,
40 | threshold = 0.01,
41 | nr_iteration = 200,
42 | init_with_kmeans = 1,
43 | concurrency = 1,
44 | verbosity = 2):
45 | for name, c_type in GMMParameter._fields_:
46 | if name in ['nr_instance', 'nr_dim']:
47 | continue
48 | exec("self.{0} = {0}" . format(name))
49 |
50 | self.gmm = pygmm.new_gmm(c_int(nr_mixture), c_int(covariance_type))
51 |
52 |
53 | def _fill_param_from_model_file(self, model_file):
54 | with open(model_file) as f:
55 | self.nr_mixture = int(f.readline().rstrip())
56 |
57 | @staticmethod
58 | def load(model_file):
59 | gmm = GMM()
60 | gmm._fill_param_from_model_file(model_file)
61 | gmm.gmm = pygmm.load(c_char_p(model_file))
62 | return gmm
63 |
64 | def dump(self, model_file):
65 | pygmm.dump(self.gmm, c_char_p(model_file))
66 |
67 |
68 | def _double_array_python_to_ctype(self, X_py):
69 | X_c = []
70 | for x in X_py:
71 | xs = (c_double * len(x))(*x)
72 | X_c.append(xs)
73 | X_c = (POINTER(c_double) * len(X_c))(*X_c)
74 | return X_c
75 |
76 | def _gen_param(self, X):
77 | param = GMMParameter()
78 | for name, c_type in GMMParameter._fields_:
79 | if name in ['nr_instance', 'nr_dim']:
80 | continue
81 | exec("param.{0} = {1}(self.{0})" . format(name, c_type.__name__))
82 |
83 | param.nr_instance = c_int(len(X))
84 | param.nr_dim = c_int(len(X[0]))
85 | return param
86 |
87 | def fit(self, X):
88 | X_c = self._double_array_python_to_ctype(X)
89 | param = self._gen_param(X)
90 | param_ptr = pointer(param)
91 | pygmm.train_model(self.gmm, X_c, param_ptr)
92 |
93 | def score(self, X):
94 | X_c = self._double_array_python_to_ctype(X)
95 | param = self._gen_param(X)
96 | prob = (c_double * len(X))(*([0.0] * len(X)))
97 | pygmm.score_batch(self.gmm, X_c, prob, param.nr_instance, param.nr_dim, \
98 | param.concurrency)
99 | return array(list(prob))
100 |
101 |
102 | # vim: foldmethod=marker
103 |
104 |
--------------------------------------------------------------------------------
/python/test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | # -*- coding: utf-8 -*-
3 | # $File: test.py
4 | # $Date: Wed Dec 11 18:56:21 2013 +0800
5 | # $Author: Xinyu Zhou
6 |
7 | import pygmm
8 | from sklearn.mixture import GMM as SKGMM
9 | from numpy import *
10 | import numpy as np
11 |
12 |
13 | def read_data(fname):
14 | with open(fname) as fin:
15 | return map(lambda line: map(float, line.rstrip().split()), fin)
16 |
17 | def get_gmm(where):
18 | nr_mixture = 256
19 | nr_iteration = 1000
20 | if where == 'pygmm':
21 | return pygmm.GMM(nr_mixture = nr_mixture,
22 | min_covar = 1e-3,
23 | nr_iteration = nr_iteration,
24 | concurrency = 4)
25 | elif where == 'sklearn':
26 | return SKGMM(nr_mixture, n_iter = nr_iteration)
27 | return None
28 |
29 | def random_vector(n, dim):
30 | import random
31 | ret = []
32 | for j in range(n):
33 | ret.append([random.random() for i in range(dim)])
34 | return ret
35 |
36 | def extend_X(X, n):
37 | import copy
38 | Xt = copy.deepcopy(X)
39 | for i in range(n - 1):
40 | X.extend(Xt)
41 |
42 | X = read_data('../test.data')
43 | X.extend(X + X + X)
44 | #X = random_vector(100, 13)
45 | #extend_X(X, 10)
46 | #print(len(X))
47 |
48 |
49 | #gmm_type = 'sklearn'
50 |
51 | global gmm
52 |
53 | def timing(code):
54 | global gmm
55 | import time
56 | start = time.time()
57 | exec(code)
58 | print(time.time() - start)
59 |
60 | def test():
61 | global gmm
62 | for gmm_type in ['pygmm', 'sklearn']:
63 | print(gmm_type)
64 | gmm = get_gmm(gmm_type)
65 | timing("gmm.fit(X)")
66 | if gmm_type == 'pygmm':
67 | gmm.dump('gmm.model')
68 | print(np.sum(gmm.score(X)))
69 | print("-------")
70 |
71 | test()
72 |
73 | # vim: foldmethod=marker
74 |
75 |
--------------------------------------------------------------------------------
/src/Threadpool.cc:
--------------------------------------------------------------------------------
1 | #include "Threadpool/Threadpool.hpp"
2 |
3 | namespace ThreadLib {
4 |
5 | void Worker::operator()() {
6 | while(true) {
7 | std::unique_lock lock(pool.queue_mutex);
8 |
9 | while(!pool.stop && pool.tasks.empty())
10 | pool.condition.wait(lock);
11 |
12 | if(pool.stop && pool.tasks.empty())
13 | return;
14 |
15 | std::function task(pool.tasks.top().second);
16 | pool.tasks.pop();
17 |
18 | lock.unlock();
19 |
20 | task();
21 | }
22 | }
23 |
24 | // the constructor just launches some amount of workers
25 | Threadpool::Threadpool(Threadpool::size_type threads) : stop(false) {
26 | workers.reserve(threads);
27 |
28 | for(Threadpool::size_type i = 0; i < threads; ++i)
29 | workers.emplace_back(Worker(*this));
30 | }
31 |
32 | // the destructor joins all threads
33 | Threadpool::~Threadpool() {
34 | {
35 | std::unique_lock lock(queue_mutex);
36 | stop = true;
37 | condition.notify_all();
38 | }
39 |
40 | for(Threadpool::size_type i = 0; i < workers.size(); ++i) {
41 | workers[i].join();
42 | }
43 |
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/Threadpool/COPYING:
--------------------------------------------------------------------------------
1 | Copyright (c) 2012 Jakob Progsch
2 |
3 | This software is provided 'as-is', without any express or implied
4 | warranty. In no event will the authors be held liable for any damages
5 | arising from the use of this software.
6 |
7 | Permission is granted to anyone to use this software for any purpose,
8 | including commercial applications, and to alter it and redistribute it
9 | freely, subject to the following restrictions:
10 |
11 | 1. The origin of this software must not be misrepresented; you must not
12 | claim that you wrote the original software. If you use this software
13 | in a product, an acknowledgment in the product documentation would be
14 | appreciated but is not required.
15 |
16 | 2. Altered source versions must be plainly marked as such, and must not be
17 | misrepresented as being the original software.
18 |
19 | 3. This notice may not be removed or altered from any source
20 | distribution.
21 |
--------------------------------------------------------------------------------
/src/Threadpool/Makefile:
--------------------------------------------------------------------------------
1 | CXX = g++
2 | CXXFLAGS = -std=c++11 -Wall -Wextra -O2
3 | LDFLAGS = -lpthread
4 | TARGET = threadpool-example
5 |
6 |
7 | all: $(TARGET)
8 |
9 |
10 | $(TARGET): main.o Threadpool.hpp
11 | $(CXX) $(LDFLAGS) $(CXXFLAGS) main.o -o $(TARGET)
12 |
13 | main.o: main.cpp Threadpool.hpp
14 | $(CXX) $(CXXFLAGS) -c $<
15 |
16 |
17 | clean:
18 | rm -f *.o $(TARGET)
19 |
--------------------------------------------------------------------------------
/src/Threadpool/README.md:
--------------------------------------------------------------------------------
1 | ThreadPool
2 | ==========
3 |
4 | A simple C++11 Thread Pool implementation, providing an optional task priority.
5 |
6 | Fork from [Jakob Progsch' repository](https://github.com/progschj/ThreadPool).
7 |
8 |
9 | Possible improvements
10 | ---------------------
11 |
12 | * variadic enqueue, no need for std::bind
13 | * prevent starvation, priority aging
14 |
--------------------------------------------------------------------------------
/src/Threadpool/Threadpool.hpp:
--------------------------------------------------------------------------------
1 | #ifndef THREAD_POOL_HPP
2 | #define THREAD_POOL_HPP
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 |
16 | // MODIFICATION: wrap inside namespace ThreadLib
17 | namespace ThreadLib {
18 |
19 | class Threadpool;
20 |
21 | class Worker {
22 | public:
23 | Worker(Threadpool& s) : pool(s) { }
24 |
25 | void operator()();
26 |
27 | private:
28 | Threadpool& pool;
29 | };
30 |
31 | class Threadpool {
32 | public:
33 | typedef std::vector::size_type size_type;
34 |
35 | Threadpool() : Threadpool(std::max(1u, std::thread::hardware_concurrency())) { }
36 | Threadpool(size_type);
37 | ~Threadpool();
38 |
39 | //template auto enqueue(F&& f, int priority = 0) -> std::future(f)())>;
40 |
41 | // add new work item to the pool
42 | template
43 | auto enqueue(F&& f, int priority) -> std::future(f)())> {
44 | typedef decltype(std::forward(f)()) R;
45 |
46 | if(stop)
47 | throw std::runtime_error("enqueue on stopped threadpool");
48 |
49 | auto task = std::make_shared>(std::forward(f));
50 | std::future res = task->get_future();
51 |
52 | {
53 | std::unique_lock lock(queue_mutex);
54 | tasks.emplace(priority, [task]{ (*task)(); });
55 | }
56 |
57 | condition.notify_one();
58 |
59 | return res;
60 | }
61 |
62 | private:
63 | friend class Worker;
64 |
65 | // need to keep track of threads so we can join them
66 | std::vector workers;
67 |
68 | typedef std::pair> priority_task;
69 |
70 | // emulate 'nice'
71 | struct task_comp {
72 | bool operator()(const priority_task& lhs, const priority_task& rhs) const {
73 | return lhs.first > rhs.first;
74 | }
75 | };
76 |
77 | // the prioritized task queue
78 | std::priority_queue, task_comp> tasks;
79 |
80 | // synchronization
81 | std::mutex queue_mutex;
82 | std::condition_variable condition;
83 | bool volatile stop;
84 | };
85 | }
86 |
87 | #endif
88 |
--------------------------------------------------------------------------------
/src/Threadpool/main.cpp.prevent-compile:
--------------------------------------------------------------------------------
1 | // workaround
2 | #define _GLIBCXX_USE_NANOSLEEP
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | #include "Threadpool.hpp"
11 |
12 |
13 | int main() {
14 | // feel free to raise the number of threads;
15 | // or use the default constructor for automatic detection
16 | Threadpool pool(1);
17 |
18 | // dummy task
19 | auto task = [](int priority, int sleep_sec) -> int {
20 | std::cout << "processing task with priority " << priority << " in thread with id " << std::this_thread::get_id() <> results;
29 |
30 | // priority is cycled every 4 tasks, in order to see the prioritizing effect
31 | for(auto i = 0; i < num_tasks; ++i)
32 | results.emplace_back(pool.enqueue(std::bind(task, i % 4, 1)));
33 |
34 | // we need the results here
35 | for(auto i = 0; i < num_tasks; ++i)
36 | std::cout << "result of task " << i << " is " << results[i].get() << std::endl;
37 | }
38 |
39 | /**
40 | * vim: syntax=cpp11
41 | */
42 |
43 |
--------------------------------------------------------------------------------
/src/Threadpool/threadpool-example:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zxytim/fast-gmm/5b6940d62c950889d51d5a3dfc99907e3b631958/src/Threadpool/threadpool-example
--------------------------------------------------------------------------------
/src/common.hh:
--------------------------------------------------------------------------------
1 | /*
2 | * $File: common.hh
3 | * $Date: Sun Sep 08 08:35:24 2013 +0800
4 | * $Author: Xinyu Zhou
5 | */
6 |
7 | #include "type.hh"
8 | #include "dataset.hh"
9 |
10 | #include
11 | #include
12 | #include
13 | #include
14 |
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include