├── tests └── run.cec.tests.R ├── data ├── Tset.rda ├── mixShapes.rda ├── fourGaussians.rda └── threeGaussians.rda ├── inst └── cec_tests │ ├── centers2.data │ ├── centers1.data │ ├── centers3d.data │ ├── four.gaussians.centers.data │ ├── centers43d.data │ ├── variable.centers.test.R │ ├── covariance.calculation.test.R │ ├── clustering.test.R │ ├── split.test.R │ ├── energy.calculation.test.mouseset1.R │ ├── energy.calculation.test.ball1.R │ ├── energy.calculation.test.various.data.sets.R │ ├── four.gaussians.result.dp │ └── two.gausses.4d.data ├── .travis.yml ├── .Rbuildignore ├── src ├── Makevars ├── r_result.h ├── random.cpp ├── random.h ├── cec_starter.cpp ├── m.h ├── cec_r.h ├── r_params.h ├── variable_starter.cpp ├── models │ ├── diagonal.h │ ├── model.h │ ├── spherical.h │ ├── all.h │ ├── fixed_radius.h │ ├── fixed_mean.h │ ├── fixed_covariance.h │ ├── cov_utils.h │ ├── fixed_eigenvalues.h │ └── cov_utils.cpp ├── common.h ├── variable_starter.h ├── r_result.cpp ├── split_starter.h ├── starter.h ├── init.h ├── cluster.h ├── cec_starter.h ├── init.cpp ├── r_ext_ptr.h ├── params.cpp ├── cov.h ├── r_params.cpp ├── exceptions.h ├── split_starter.cpp ├── starter.cpp ├── r_utils.h ├── params.h ├── cec_r.cpp ├── parallel_starter.h └── vec.h ├── man ├── Tset.Rd ├── fourGaussians.Rd ├── mixShapes.Rd ├── threeGaussians.Rd ├── run.cec.tests.Rd ├── print.cec.Rd ├── ball.Rd ├── init.centers.Rd ├── CEC-package.Rd ├── mouseset.Rd ├── plot.cec.Rd └── cec.Rd ├── cec.Rproj ├── R ├── init.centers.R ├── ellipse.R ├── print.cec.R ├── model.covariance.R ├── plot.cec.R ├── utils.R ├── cec.params.R ├── tests.R └── cec.R ├── NAMESPACE ├── DESCRIPTION ├── NEWS.md ├── CMakeLists.txt └── README.md /tests/run.cec.tests.R: -------------------------------------------------------------------------------- 1 | CEC:::run.cec.tests() -------------------------------------------------------------------------------- /data/Tset.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/azureblue/cec/HEAD/data/Tset.rda -------------------------------------------------------------------------------- /data/mixShapes.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/azureblue/cec/HEAD/data/mixShapes.rda -------------------------------------------------------------------------------- /data/fourGaussians.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/azureblue/cec/HEAD/data/fourGaussians.rda -------------------------------------------------------------------------------- /data/threeGaussians.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/azureblue/cec/HEAD/data/threeGaussians.rda -------------------------------------------------------------------------------- /inst/cec_tests/centers2.data: -------------------------------------------------------------------------------- 1 | -1.83842953015119 -0.579769441857934 2 | 1.87598357236481 1.81867028901822 3 | -------------------------------------------------------------------------------- /inst/cec_tests/centers1.data: -------------------------------------------------------------------------------- 1 | -1.83842953015119 -0.579769441857934 2 | 1.87598357236481 1.81867028901822 3 | -1.32322026322133 2.14774889925963 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | sudo: false 3 | cache: packages 4 | warnings_are_errors: false 5 | r: 6 | - oldrel 7 | - release 8 | - devel 9 | notifications: 10 | email: false 11 | -------------------------------------------------------------------------------- /inst/cec_tests/centers3d.data: -------------------------------------------------------------------------------- 1 | 0.043744727037847 0.293719492852688 0.0458224480971694 2 | -1.4780771696335 2.5249770479603 -0.2801017282065 3 | 2.52717570730954 2.48268893038391 0.313854934321717 4 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.o$ 2 | ^.*\.Rproj$ 3 | ^\.Rproj\.user$ 4 | \.travis\.yml$ 5 | \.idea$ 6 | LICENSE$ 7 | ^cmake-build* 8 | CMakeLists.txt$ 9 | CMakeFiles$ 10 | ^.*\.tar.gz$ 11 | ^.*\.sh$ 12 | -------------------------------------------------------------------------------- /inst/cec_tests/four.gaussians.centers.data: -------------------------------------------------------------------------------- 1 | 1.01907725638331 2.35490540074148 2 | -3.02767008221039 -2.86906071414027 3 | -0.0242720163788669 -0.648301422440495 4 | 2.55004588700554 0.0810649917238939 5 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX11 2 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 3 | OBJECTS = cec_r.o cec_starter.o init.o models/cov_utils.o params.o r_params.o r_result.o random.o split_starter.o starter.o variable_starter.o 4 | -------------------------------------------------------------------------------- /src/r_result.h: -------------------------------------------------------------------------------- 1 | #ifndef RESULT_R_H 2 | #define RESULT_R_H 3 | 4 | #include "starter.h" 5 | 6 | #include 7 | 8 | namespace cec { 9 | SEXP create_R_result(const clustering_results &res); 10 | } 11 | 12 | #endif //RESULT_R_H 13 | -------------------------------------------------------------------------------- /inst/cec_tests/centers43d.data: -------------------------------------------------------------------------------- 1 | 0.043744727037847 0.293719492852688 0.0458224480971694 2 | -2.78402884428858 1.8008775928384 -0.205372427916154 3 | -0.598468789830804 -0.0386110367253423 1.81109665054828 4 | 2.62663089445329 1.79166762915908 0.17358229495585 5 | -------------------------------------------------------------------------------- /man/Tset.Rd: -------------------------------------------------------------------------------- 1 | \name{Tset} 2 | \alias{Tset} 3 | \docType{data} 4 | \title{ 5 | Tset 6 | } 7 | \description{ 8 | Matrix of 2-dimensional points that form T letter. 9 | } 10 | \usage{data(Tset)} 11 | 12 | \examples{ 13 | data(Tset) 14 | plot(Tset, cex = 0.5, pch = 19); 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /src/random.cpp: -------------------------------------------------------------------------------- 1 | #include "random.h" 2 | unsigned long cec::random::seed = 0; 3 | 4 | cec::random::rand_gen cec::random::create_generator() { 5 | std::mt19937 mt(seed); 6 | seed++; 7 | return mt; 8 | } 9 | 10 | void cec::random::set_seed(result_type seed) noexcept { 11 | random::seed = seed; 12 | } 13 | -------------------------------------------------------------------------------- /man/fourGaussians.Rd: -------------------------------------------------------------------------------- 1 | \name{fourGaussians} 2 | \alias{fourGaussians} 3 | \docType{data} 4 | \title{ 5 | fourGaussians 6 | } 7 | \description{ 8 | Matrix of 2-dimensional points of four Gaussians. 9 | } 10 | \usage{data(fourGaussians)} 11 | 12 | \examples{ 13 | data(fourGaussians) 14 | plot(fourGaussians, cex = 0.5, pch = 19); 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/mixShapes.Rd: -------------------------------------------------------------------------------- 1 | \name{mixShapes} 2 | \alias{mixShapes} 3 | \docType{data} 4 | \title{ 5 | mixShapes 6 | } 7 | \description{ 8 | Matrix of 2-dimensional points that form circular and elliptical patterns. 9 | } 10 | \usage{data(mixShapes)} 11 | 12 | \examples{ 13 | data(mixShapes) 14 | plot(mixShapes, cex = 0.5, pch = 19); 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /cec.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageInstallArgs: --with-keep.source 17 | PackageCheckArgs: --as-cran 18 | -------------------------------------------------------------------------------- /man/threeGaussians.Rd: -------------------------------------------------------------------------------- 1 | \name{threeGaussians} 2 | \alias{threeGaussians} 3 | \docType{data} 4 | \title{ 5 | threeGaussians 6 | } 7 | \description{ 8 | Matrix of 2-dimensional points from three Gaussians with means equal (0, 0). 9 | } 10 | \usage{data(threeGaussians)} 11 | 12 | \examples{ 13 | data(threeGaussians) 14 | plot(threeGaussians, cex = 0.5, pch = 19); 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /R/init.centers.R: -------------------------------------------------------------------------------- 1 | init.centers <- function(x, k, method = c("kmeans++", "random")) 2 | { 3 | method <- switch ( match.arg(method), "kmeans++" = "kmeanspp", "random" = "random") 4 | 5 | if (! is.matrix(x)) stop("init.centers: x is not a matrix") 6 | if (k < 0) stop("init.centers: k < 0") 7 | 8 | centers <- .Call(cec_init_centers_r, x, as.integer(k), method); 9 | centers 10 | } 11 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | importFrom("graphics", "lines", "par", "plot", "points", "title") 2 | importFrom("methods", "hasArg") 3 | importFrom("stats", "complete.cases", "runif") 4 | importFrom("utils", "lsf.str") 5 | export(cec) 6 | export(init.centers) 7 | export(mouseset) 8 | export(ball) 9 | S3method(print, cec) 10 | S3method(plot, cec) 11 | useDynLib(CEC, cec_r) 12 | useDynLib(CEC, cec_split_r) 13 | useDynLib(CEC, cec_init_centers_r) 14 | -------------------------------------------------------------------------------- /src/random.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_RANDOM_H 2 | #define CEC_RANDOM_H 3 | 4 | #include 5 | 6 | namespace cec { 7 | class random { 8 | public: 9 | typedef std::mt19937 rand_gen; 10 | typedef rand_gen::result_type result_type; 11 | 12 | static rand_gen create_generator(); 13 | 14 | static void set_seed(result_type seed) noexcept; 15 | 16 | private: 17 | static unsigned long seed; 18 | }; 19 | } 20 | #endif //CEC_RANDOM_H 21 | -------------------------------------------------------------------------------- /src/cec_starter.cpp: -------------------------------------------------------------------------------- 1 | #include "cec_starter.h" 2 | 3 | namespace cec { 4 | unique_ptr cec_starter::start(const clustering_input &ip) { 5 | const mat &x = ip.x; 6 | const vector> &models = ip.models; 7 | int k = models.size(); 8 | best.reset(); 9 | for (int i = 0; i < starts; i++) 10 | best(cec.start(x, closest.init(x, init->init(x, k)), models)); 11 | return best(); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/m.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_M_H 2 | #define CEC_M_H 3 | 4 | #include 5 | #include 6 | 7 | namespace cec { 8 | namespace m { 9 | const double PI = 3.14159265358979323846; 10 | const double E = 2.7182818284590452354; 11 | const double QNAN = std::numeric_limits::quiet_NaN(); 12 | const double INF = std::numeric_limits::infinity(); 13 | using std::log; 14 | using std::isnan; 15 | } 16 | } 17 | #endif //CEC_M_H 18 | -------------------------------------------------------------------------------- /src/cec_r.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_R_H 2 | #define CEC_R_H 3 | 4 | #include 5 | #include 6 | 7 | extern "C" { 8 | SEXP cec_r(SEXP x_r, SEXP centers_param_r, SEXP control_param_r, SEXP models_param_r); 9 | SEXP cec_init_centers_r(SEXP x_r, SEXP k_r, SEXP method_r); 10 | SEXP cec_split_r(SEXP x_r, SEXP centers_param_r, SEXP control_param_r, SEXP models_param_r, 11 | SEXP split_param_r); 12 | void R_init_CEC(DllInfo *dllInfo); 13 | } 14 | 15 | #endif /* CEC_R_H */ 16 | -------------------------------------------------------------------------------- /man/run.cec.tests.Rd: -------------------------------------------------------------------------------- 1 | \name{run.cec.tests} 2 | \alias{run.cec.tests} 3 | 4 | \title{ 5 | CEC package tests. 6 | } 7 | \description{ 8 | This function is used to run \code{cec} package "unit test"-like system. 9 | The set of tests is located in \code{inst/cec_tests} directory and it consists of .R files 10 | defining each test case. This is also used for R CMD check. 11 | } 12 | 13 | \usage{ 14 | run.cec.tests() 15 | } 16 | 17 | \value{No return value. Stops with error when a test fails.} 18 | \concept{unit testing} 19 | -------------------------------------------------------------------------------- /src/r_params.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_PARAMS_R_H 2 | #define CEC_PARAMS_R_H 3 | 4 | #include "params.h" 5 | #include "r_ext_ptr.h" 6 | #include 7 | 8 | namespace cec { 9 | namespace r { 10 | r_ext_ptr get_centers_param(SEXP centers_param_r); 11 | 12 | r_ext_ptr get_control_param(SEXP control_param_r); 13 | 14 | r_ext_ptr get_models_param(SEXP models_param_r, int n); 15 | 16 | r_ext_ptr get_split_param(SEXP split_param_r); 17 | } 18 | } 19 | 20 | #endif //CEC_PARAMS_R_H 21 | -------------------------------------------------------------------------------- /src/variable_starter.cpp: -------------------------------------------------------------------------------- 1 | #include "variable_starter.h" 2 | 3 | namespace cec { 4 | unique_ptr 5 | variable_starter::start(const mat &x, vector> m_specs) { 6 | best_results_collector best; 7 | for (auto &&k : centers_number) { 8 | vector> models_specs_subset(m_specs.begin(), m_specs.begin() + k); 9 | try { 10 | best(cl_starter(x, models_specs_subset)); 11 | } catch (clustering_exception &ce) {} 12 | } 13 | return best(); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /R/ellipse.R: -------------------------------------------------------------------------------- 1 | # draws ellipse for the given mean vector and covariance matrix 2 | ellipse <- function (mean, cov, npoints = 250) 3 | { 4 | E <- eigen((cov)) 5 | eve <- E$vec 6 | eva <- E$val 7 | r <- seq(-pi, pi, len=npoints) 8 | Xa <- 2 * sqrt(eva[1]) * cos(r) 9 | Ya <- 2 * sqrt(eva[2]) * sin(r) 10 | mm <- c(rep(mean[1], npoints), rep(mean[2], npoints)) 11 | means.multiplied <- matrix(mm, nrow = length(Ya), ncol = 2) 12 | pts <- cbind(Xa,Ya); 13 | pts <- pts %*% eve 14 | pts[, 1] <- pts[, 1] * -1 15 | pts <- pts + means.multiplied 16 | pts 17 | } 18 | -------------------------------------------------------------------------------- /man/print.cec.Rd: -------------------------------------------------------------------------------- 1 | \name{print.cec} 2 | \alias{print.cec} 3 | 4 | \title{ 5 | Print CEC. 6 | } 7 | 8 | \description{ 9 | Presents a structure of the \code{cec} results object in the form of text. 10 | } 11 | 12 | \usage{ 13 | \method{print}{cec}(x, ...) 14 | } 15 | 16 | \arguments{ 17 | 18 | \item{x}{ 19 | Result of the \code{cec} function. 20 | } 21 | 22 | \item{\dots}{ 23 | Ignored. 24 | } 25 | } 26 | 27 | \value{Returns the summary of the \code{cec} function result as text.} 28 | 29 | \seealso{ 30 | \code{\link{plot.cec}} 31 | } 32 | 33 | \examples{ 34 | ## See the examples of function cec. 35 | } 36 | -------------------------------------------------------------------------------- /src/models/diagonal.h: -------------------------------------------------------------------------------- 1 | #ifndef DIAGONAL_H 2 | #define DIAGONAL_H 3 | 4 | #include "cov_utils.h" 5 | #include "model.h" 6 | 7 | namespace cec { 8 | class diagonal: public model { 9 | public: 10 | explicit diagonal(int n) 11 | : ce_constant(n * std::log(2.0 * m::PI * m::E)) {} 12 | 13 | double cross_entropy(const covariance &cov) const noexcept override { 14 | double diag = diagonal_product(cov); 15 | return (ce_constant + std::log(diag)) / 2; 16 | } 17 | 18 | private: 19 | const double ce_constant; 20 | }; 21 | } 22 | #endif /* DIAGONAL_H */ 23 | 24 | -------------------------------------------------------------------------------- /src/models/model.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_MODEL_H 2 | #define CEC_MODEL_H 3 | 4 | #include "../vec.h" 5 | #include "../m.h" 6 | #include "../cov.h" 7 | 8 | namespace cec { 9 | class model { 10 | public: 11 | explicit model() = default; 12 | 13 | virtual ~model() = default; 14 | 15 | virtual double cross_entropy(const covariance &cov) const noexcept = 0; 16 | 17 | inline double energy(const covariance &cov, int m) const noexcept { 18 | double p = cov.card() / (double) m; 19 | return p * (-m::log(p) + cross_entropy(cov)); 20 | } 21 | }; 22 | } 23 | 24 | #endif //CEC_MODEL_H 25 | -------------------------------------------------------------------------------- /man/ball.Rd: -------------------------------------------------------------------------------- 1 | \name{ball} 2 | \alias{ball} 3 | 4 | \title{ 5 | Ball 6 | } 7 | 8 | \description{ 9 | Generates points that form a ball with uniform density. 10 | } 11 | 12 | \usage{ 13 | ball(n, r, dim) 14 | } 15 | 16 | \arguments{ 17 | 18 | \item{n}{ 19 | Number of points to generate. 20 | } 21 | 22 | \item{r}{ 23 | Radius of the ball. 24 | } 25 | 26 | \item{dim}{ 27 | Dimension of the points. 28 | } 29 | 30 | } 31 | \value{ 32 | Matrix of points with \code{n} rows and \code{dim} cols. 33 | } 34 | 35 | \seealso{ 36 | \code{\link{mouseset}} 37 | } 38 | 39 | \examples{ 40 | M = ball(4000, 0.9) 41 | plot(M, cex = 0.5, pch = 19) 42 | } 43 | 44 | \keyword{datagen} 45 | -------------------------------------------------------------------------------- /R/print.cec.R: -------------------------------------------------------------------------------- 1 | print.cec <- function(x, ...) 2 | { 3 | cat("CEC clustering result: \n") 4 | cat("\nProbability vector:\n") 5 | print(x$probability) 6 | cat("\nMeans of clusters:\n") 7 | print(x$centers) 8 | cat("\nCost function:\n") 9 | print(x$cost) 10 | cat("\nNumber of clusters:\n") 11 | print(x$nclusters) 12 | cat("\nNumber of iterations:\n") 13 | print(x$iterations) 14 | cat("\nComputation time:\n") 15 | print(x$time) 16 | cat("\nAvailable components:\n") 17 | print(c("data", "cluster", "probabilities", "centers", "cost.function", "nclusters", "iterations", "covariances", "covariances.model", "time" )) 18 | } 19 | -------------------------------------------------------------------------------- /src/models/spherical.h: -------------------------------------------------------------------------------- 1 | #ifndef SPHERICAL_H 2 | #define SPHERICAL_H 3 | 4 | #include "cov_utils.h" 5 | #include "model.h" 6 | 7 | namespace cec { 8 | class spherical: public model { 9 | public: 10 | explicit spherical(int n) 11 | : n(n), 12 | ce_constant(std::log(2.0 * m::PI * m::E / n)) {} 13 | 14 | double cross_entropy(const covariance &cov) const noexcept override { 15 | double tr = trace(cov); 16 | return (ce_constant + std::log(tr)) * n / 2; 17 | } 18 | 19 | private: 20 | const int n; 21 | const double ce_constant; 22 | }; 23 | } 24 | #endif /* SPHERICAL_H */ 25 | 26 | -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_STD_COMMON_H 2 | #define CEC_STD_COMMON_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace cec { 9 | using std::vector; 10 | using std::string; 11 | using std::unique_ptr; 12 | using std::shared_ptr; 13 | using std::make_shared; 14 | 15 | #if defined(__cplusplus) && __cplusplus >= 201402L || defined(__cpp_lib_make_unique) 16 | using std::make_unique; 17 | #else 18 | template 19 | std::unique_ptr make_unique(Args&& ...args) { 20 | return std::unique_ptr(new T(std::forward(args)...)); 21 | } 22 | #endif 23 | } 24 | 25 | #endif //CEC_STD_COMMON_H 26 | -------------------------------------------------------------------------------- /src/models/all.h: -------------------------------------------------------------------------------- 1 | #ifndef ALL_H 2 | #define ALL_H 3 | 4 | #include "cov_utils.h" 5 | #include "model.h" 6 | #include "../m.h" 7 | 8 | namespace cec { 9 | class all: public model { 10 | public: 11 | explicit all(int n) 12 | : det_calc(n), 13 | ce_constant(n * std::log(2.0 * m::PI * m::E)) {} 14 | 15 | double cross_entropy(const covariance &cov) const noexcept override { 16 | double det = det_calc.determinant(cov); 17 | return (ce_constant + m::log(det)) / 2; 18 | } 19 | 20 | private: 21 | determinant_calculator det_calc; 22 | const double ce_constant; 23 | }; 24 | } 25 | #endif /* ALL_H */ 26 | 27 | -------------------------------------------------------------------------------- /src/models/fixed_radius.h: -------------------------------------------------------------------------------- 1 | #ifndef FIXED_RADIUS_H 2 | #define FIXED_RADIUS_H 3 | 4 | #include "cov_utils.h" 5 | #include "model.h" 6 | 7 | namespace cec { 8 | class fixed_radius: public model { 9 | public: 10 | explicit fixed_radius(int n, double r) 11 | : r(r), 12 | ce_constant(n * m::log(2.0 * m::PI * r) / 2.0) {} 13 | 14 | double cross_entropy(const covariance &cov) const noexcept override { 15 | double tr = trace(cov); 16 | return ce_constant + tr / (2.0 * r); 17 | } 18 | 19 | private: 20 | const double r; 21 | const double ce_constant; 22 | }; 23 | } 24 | #endif /* FIXED_RADIUS_H */ 25 | 26 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: CEC 2 | Title: Cross-Entropy Clustering 3 | Version: 0.10.3 4 | Date: 2021-09-20 5 | Author: Konrad Kamieniecki [aut, cre], Przemyslaw Spurek [ctb] 6 | Maintainer: Konrad Kamieniecki 7 | Description: Clustering that splits data into Gaussian type clusters. The implementation allows the simultaneous use of various type Gaussian mixture models, performs the reduction of unnecessary clusters and it's able to discover new groups. Based on Spurek, P. and Tabor, J. (2014) . 8 | ByteCompile: true 9 | URL: https://github.com/azureblue/cec 10 | Encoding: UTF-8 11 | NeedsCompilation: yes 12 | SystemRequirements: C++11 13 | License: GPL-3 14 | Imports: graphics, methods, stats, utils 15 | Packaged: 2021-09-20 21:50:01 UTC; konrad 16 | -------------------------------------------------------------------------------- /inst/cec_tests/variable.centers.test.R: -------------------------------------------------------------------------------- 1 | testname <- "Variable centers number" 2 | 3 | setup <- function() { 4 | set.seed(1234567) 5 | X1 = matrix(rnorm(1000), 500, 2) 6 | X2 = rbind(X1, X1 + 5) 7 | X3 = rbind(X2, X1 + 10) 8 | } 9 | 10 | test.should.use.1.cluster <- function() { 11 | C = cec(X1, 1:3, "sp", keep.removed=T, iter.max=0, card.min=4, nstart=10) 12 | CEC:::checkNumericEquals(1, nrow(C$centers)) 13 | } 14 | 15 | test.should.use.2.cluster <- function() { 16 | C = cec(X2, 1:3, "sp", keep.removed=T, iter.max=0, card.min=4, nstart=10) 17 | CEC:::checkNumericEquals(2, nrow(C$centers)) 18 | } 19 | 20 | test.should.use.3.cluster <- function() { 21 | C = cec(X3, 1:3, "sp", keep.removed=T, iter.max=0, card.min=4, nstart=5) 22 | CEC:::checkNumericEquals(3, nrow(C$centers)) 23 | } 24 | -------------------------------------------------------------------------------- /inst/cec_tests/covariance.calculation.test.R: -------------------------------------------------------------------------------- 1 | testname <- "Covariance calculation" 2 | setup <- function() 3 | { 4 | B <- as.matrix(read.table(system.file("cec_tests", "ball1.data", package="CEC"))) 5 | centers <- as.matrix(read.table(system.file("cec_tests", "centers2.data", package="CEC"))) 6 | } 7 | 8 | test.covariances.before.first.iteraion <- function() 9 | { 10 | M <- matrix(c(-1, 102, 141, -1, 104, 2, -1, -1, 12, 4), 5, 2) 11 | cov <- cov.mle(M) 12 | C <- cec(M, centers=1, iter.max=0) 13 | checkNumericMatrixEquals(cov, C$covariances[[1]], msg="Covariances") 14 | } 15 | 16 | test.covariances.after.point.movements.between.clusters <- function() 17 | { 18 | cov <- cov.mle(B) 19 | C <- cec(B, centers=centers, type="sp") 20 | checkNumericMatrixEquals(cov, C$covariances[[1]], msg="Covariances") 21 | } 22 | -------------------------------------------------------------------------------- /src/variable_starter.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_VARIABLE_STARTER_H 2 | #define CEC_VARIABLE_STARTER_H 3 | 4 | #include "cec_starter.h" 5 | #include "parallel_starter.h" 6 | 7 | namespace cec { 8 | class variable_starter { 9 | public: 10 | using clustering_function = std::function 11 | (const mat&, const vector>&)>; 12 | 13 | variable_starter(clustering_function &&cl_starter, vector centers_number) 14 | : cl_starter(std::move(cl_starter)), 15 | centers_number(centers_number) {} 16 | 17 | unique_ptr 18 | start(const mat &x, vector> m_specs); 19 | 20 | private: 21 | clustering_function cl_starter; 22 | vector centers_number; 23 | }; 24 | } 25 | 26 | #endif //CEC_VARIABLE_STARTER_H 27 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | v0.10.3 2 | ------- 3 | - fixing gcc-11 issues 4 | - fixing other CRAN issues 5 | 6 | v0.10.2 7 | ------- 8 | - adding fixed mean model 9 | - adding data set: threeGaussians 10 | - fixing compilation issues on some platforms 11 | 12 | v0.10.1 13 | ------ 14 | - rewriting all C the code in C++11 15 | - adding split method 16 | - adding threads 17 | 18 | v0.9.4 19 | ------ 20 | - adding README.md 21 | - lots of refactoring 22 | - small fixes 23 | 24 | v0.9.3 25 | ------ 26 | - giving up support of -1 iterations (fixing memcheck problems) 27 | - changing the way initial centers vector is handled: for each start, length(centers) clusterings are performed 28 | - adding two datasets: fourGaussians and mixShapes 29 | 30 | v0.9.2 31 | ------ 32 | - checking input data for NA values (session crushing) 33 | - changing 'ZERO_EPSILON' to '1.0e-32' 34 | - lots of refactoring 35 | 36 | v0.9.1 37 | ------ 38 | Initial Release. 39 | -------------------------------------------------------------------------------- /man/init.centers.Rd: -------------------------------------------------------------------------------- 1 | \name{init.centers} 2 | \alias{init.centers} 3 | \title{ 4 | Center initialization 5 | } 6 | \description{ 7 | Creates a matrix of \code{k} points (centers) based on a given matrix of points. 8 | One of two method can be used: Kmeans++ centers initialization method or a random choice of data points. 9 | } 10 | \usage{ 11 | init.centers(x, k, method = c("kmeans++", "random")) 12 | } 13 | \arguments{ 14 | 15 | \item{x}{ 16 | Dataset as a matrix of n-dimensional points. 17 | } 18 | 19 | \item{k}{ 20 | Number of points (centers) to generate. 21 | } 22 | 23 | \item{method}{ 24 | Generation method. Possible values are: "kmeans++", "random.points". 25 | } 26 | 27 | } 28 | 29 | \value{ 30 | Matrix points (centers) with \code{k} rows. 31 | } 32 | 33 | \examples{ 34 | m = matrix(runif(3000), 1000, 3) 35 | init.centers(m, 3, method = "km") 36 | } 37 | 38 | \keyword{ ~centers } 39 | \keyword{ ~initialization } 40 | -------------------------------------------------------------------------------- /man/CEC-package.Rd: -------------------------------------------------------------------------------- 1 | \name{CEC-package} 2 | \alias{CEC-package} 3 | \docType{package} 4 | \title{ 5 | Cross-Entropy Clustering 6 | } 7 | \description{ 8 | CEC divides data into Gaussian type clusters. The implementation allows the simultaneous use of various type Gaussian mixture models, performs the reduction of unnecessary clusters and it's able to discover new groups. Based on Spurek, P. and Tabor, J. (2014) \code{cec}. 9 | } 10 | \details{ 11 | \tabular{ll}{ 12 | Package: \tab CEC\cr 13 | Type: \tab Package\cr 14 | Version: \tab 0.10.2\cr 15 | Date: \tab 2018-07-26\cr 16 | License: \tab GPL-3 \cr 17 | } 18 | } 19 | \author{ 20 | Konrad Kamieniecki 21 | } 22 | \seealso{ 23 | \code{\link{cec}} 24 | } 25 | 26 | \keyword{cluster} 27 | \keyword{models} 28 | \keyword{multivariate} 29 | \keyword{package} 30 | \concept{entropy} 31 | \concept{gaussian} 32 | \concept{gaussian mixture models} 33 | \concept{kmeans} 34 | -------------------------------------------------------------------------------- /inst/cec_tests/clustering.test.R: -------------------------------------------------------------------------------- 1 | testname <- "Clustering" 2 | setup <- function() 3 | { 4 | data("fourGaussians", package = "CEC") 5 | centers <- as.matrix(read.table(system.file("cec_tests", "four.gaussians.centers.data", package="CEC"))) 6 | expected <- dget(system.file("cec_tests", "four.gaussians.result.dp", package="CEC")) 7 | } 8 | 9 | test.clustering.four.gaussians <- function() 10 | { 11 | CEC <- cec(fourGaussians, centers) 12 | CEC:::checkNumericVectorEquals(expected$cluster, CEC$cluster, msg="Clustering vector") 13 | CEC:::checkNumericVectorEquals(expected$cost, CEC$cost, msg="Energy") 14 | CEC:::checkNumericMatrixEquals(expected$centers, CEC$centers, msg="Centers") 15 | CEC:::checkNumericMatrixEquals(fourGaussians, CEC$data, msg="Data") 16 | CEC:::checkNumericVectorEquals(expected$probability, CEC$probability, msg="Probability") 17 | CEC:::checkNumericVectorEquals(expected$nclusters, CEC$nclusters, msg="Number of clusters") 18 | CEC:::checkNumericVectorEquals(expected$iterations, CEC$iterations, msg="Iterations") 19 | } 20 | -------------------------------------------------------------------------------- /src/models/fixed_mean.h: -------------------------------------------------------------------------------- 1 | #ifndef MEAN_H 2 | #define MEAN_H 3 | 4 | #include "cov_utils.h" 5 | #include "model.h" 6 | #include "../m.h" 7 | 8 | namespace cec { 9 | class fixed_mean: public model { 10 | public: 11 | explicit fixed_mean(int n, const row &fixed_mean) 12 | : det_calc(n), 13 | mahalanobis_dist_calc(n), 14 | cov_inv(n, n), 15 | mean(fixed_mean), 16 | ce_constant(n * std::log(2.0 * m::PI * m::E)) {} 17 | 18 | double cross_entropy(const covariance &cov) const noexcept override { 19 | double det = det_calc.determinant(cov); 20 | if (!invert(cov, cov_inv)) 21 | return m::QNAN; 22 | double md2 = mahalanobis_dist_calc.mahalanobis2(cov_inv, cov.mean(), mean); 23 | return (m::log(1 + md2) + ce_constant + m::log(det)) / 2; 24 | } 25 | 26 | private: 27 | determinant_calculator det_calc; 28 | mahalanobis_dist_calculator mahalanobis_dist_calc; 29 | mutable mat cov_inv; 30 | const vec mean; 31 | const double ce_constant; 32 | }; 33 | } 34 | #endif /* MEAN_H */ 35 | -------------------------------------------------------------------------------- /R/model.covariance.R: -------------------------------------------------------------------------------- 1 | model.mean <- function(type, center, param) 2 | { 3 | if (length(which(is.na(center))) > 0) 4 | matrix(NA, 1, ncol(center)) 5 | else if (type == resolve.type("mean")) 6 | param$mean 7 | else center 8 | } 9 | 10 | model.covariance <- function(type, cov, mean, param) 11 | { 12 | if (length(which(is.na(cov))) > 0) 13 | matrix(NA, nrow(cov), ncol(cov)) 14 | else if (type == resolve.type("covariance")) 15 | param$cov 16 | else if (type == resolve.type("fixedr")) 17 | diag(ncol(cov)) * param$r 18 | else if (type == resolve.type("spherical")) 19 | diag(ncol(cov)) * sum(diag(ncol(cov)) * cov) / ncol(cov) 20 | else if (type == resolve.type("diagonal")) 21 | cov * diag(ncol(cov)) 22 | else if (type == resolve.type("eigenvalues")) 23 | { 24 | V <- eigen(cov)$vec 25 | D <- diag(sort(param$eigenvalues, decreasing=T)) 26 | V %*% D %*% t(V) 27 | } 28 | else if (type == resolve.type("mean")) 29 | { 30 | m = param$mean 31 | mean_diff = m - mean 32 | cov + (mean_diff %*% t(mean_diff)) 33 | } 34 | else if (type == resolve.type("all")) 35 | cov 36 | } -------------------------------------------------------------------------------- /man/mouseset.Rd: -------------------------------------------------------------------------------- 1 | \name{mouseset} 2 | \alias{mouseset} 3 | 4 | \title{ 5 | Mouse set 6 | } 7 | 8 | \description{ 9 | Creates a matrix of \code{dim}-dimensional points that form a "mouse-like" set with uniform density. 10 | } 11 | 12 | \usage{ 13 | mouseset(n = 4000, r.head = 2, r.left.ear = 1.1, r.right.ear = 1.1, left.ear.dist = 2.5, 14 | right.ear.dist = 2.5, dim = 2) 15 | } 16 | 17 | \arguments{ 18 | 19 | \item{n}{ 20 | Number of points to generate. 21 | } 22 | 23 | \item{r.head}{ 24 | Radius of mouse head. 25 | } 26 | 27 | \item{r.left.ear}{ 28 | Radius of mouse left ear. 29 | } 30 | 31 | \item{r.right.ear}{ 32 | Radius of mouse right ear. 33 | } 34 | 35 | \item{left.ear.dist}{ 36 | Distance between the center of the head and the center the left ear. 37 | } 38 | 39 | \item{right.ear.dist}{ 40 | Distance between the center of the head and the center the right ear. 41 | } 42 | 43 | \item{dim}{ 44 | Dimension of points. 45 | } 46 | } 47 | 48 | \value{ 49 | Matrix of points with \code{n} rows and \code{dim} cols. 50 | } 51 | 52 | \seealso{ 53 | \code{\link{ball}} 54 | } 55 | 56 | \examples{ 57 | M = mouseset(n=7000, r.head=2, r.left.ear=1.1, r.right.ear=1.1, left.ear.dist=2.5, 58 | right.ear.dist=2.5, dim=2) 59 | plot(M, cex = 0.5, pch = 19) 60 | } 61 | 62 | \keyword{datagen} 63 | -------------------------------------------------------------------------------- /inst/cec_tests/split.test.R: -------------------------------------------------------------------------------- 1 | testname <- "Split method" 2 | 3 | setup <- function() { 4 | set.seed(12345678) 5 | data("threeGaussians", package = "CEC") 6 | data("fourGaussians", package = "CEC") 7 | data("mixShapes", package = "CEC") 8 | 9 | mixShapesReduced = mixShapes[seq(1, nrow(mixShapes), 2),] 10 | } 11 | 12 | test.should.split.to.4.cluster <- function() { 13 | expected.cost = 2.530237 14 | tolerance = 0.001 15 | C = cec(fourGaussians, nstart = 5) 16 | CEC:::checkNumericEquals(4, C$nclusters) 17 | CEC:::checkNumericEquals(expected.cost, C$cost, msg = "cost", tolerance = tolerance) 18 | } 19 | 20 | test.should.split.to.7.cluster <- function() { 21 | expected.cost = 10.16551 22 | tolerance = 0.001 23 | C = cec(mixShapesReduced, 2, nstart = 2, split = T) 24 | CEC:::checkNumericEquals(7, C$nclusters) 25 | CEC:::checkNumericEquals(expected.cost, C$cost, msg = "cost", tolerance = tolerance) 26 | } 27 | 28 | test.should.limit.split.to.4.cluster <- function() { 29 | C = cec(mixShapesReduced, 1, nstart = 2, split = T, split.limit = 4) 30 | CEC:::checkNumericEquals(4, C$nclusters) 31 | } 32 | 33 | test.should.split.to.3.cluster.fixed.mean <- function() { 34 | C = cec(threeGaussians,, "mean", param = c(0, 0), nstart = 8) 35 | CEC:::checkNumericEquals(3, C$nclusters) 36 | CEC:::checkNumericEquals(1.726595, C$cost, tolerance = 0.00001) 37 | } 38 | -------------------------------------------------------------------------------- /src/models/fixed_covariance.h: -------------------------------------------------------------------------------- 1 | #ifndef FIXED_COVARIANCE_H 2 | #define FIXED_COVARIANCE_H 3 | 4 | #include "cov_utils.h" 5 | #include "model.h" 6 | #include "../exceptions.h" 7 | 8 | namespace cec { 9 | class fixed_covariance: public model { 10 | public: 11 | explicit fixed_covariance(int n, mat cov) 12 | : cov_inv(inv(cov)), 13 | tmp(n, n), 14 | ce_constant(std::log(std::pow(2.0 * m::PI, n) * det(cov)) / 2.0) {} 15 | 16 | double cross_entropy(const covariance &cov) const noexcept override { 17 | multiply(cov_inv, cov, tmp); 18 | double tr = trace(tmp); 19 | return ce_constant + tr / 2; 20 | } 21 | 22 | private: 23 | const mat cov_inv; 24 | mutable mat tmp; 25 | const double ce_constant; 26 | 27 | static mat inv(const mat &cov) { 28 | mat dst(cov); 29 | if (!invert(cov, dst)) 30 | throw new invalid_model_parameter("invalid covariance (not positive definite)"); 31 | return dst; 32 | } 33 | 34 | static double det(const mat &cov) { 35 | double det = determinant_calculator(cov.n).determinant(cov); 36 | if (std::isnan(det)) 37 | throw new invalid_model_parameter("invalid covariance (not positive definite)"); 38 | return det; 39 | } 40 | }; 41 | } 42 | #endif /* FIXED_COVARIANCE_H */ 43 | 44 | -------------------------------------------------------------------------------- /src/models/cov_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_COV_UTILS_H 2 | #define CEC_COV_UTILS_H 3 | 4 | #include 5 | 6 | #include "../vec.h" 7 | 8 | namespace cec { 9 | 10 | double diagonal_product(const mat &cov); 11 | 12 | double determinant(const mat &cov, mat &tmp); 13 | 14 | bool invert(const mat &cov, mat &dst); 15 | 16 | void multiply(const mat &a, const mat &b, mat &dst); 17 | 18 | double trace(const mat &cov); 19 | 20 | class eigenvalues_calculator { 21 | public: 22 | explicit eigenvalues_calculator(const int n) 23 | : tmp(n, n), 24 | workspace(WORKSPACE_SIZE_MUL * n) {} 25 | 26 | bool eigenvalues(const mat &cov, double *res) const noexcept; 27 | 28 | private: 29 | mutable mat tmp; 30 | mutable vec workspace; 31 | static const int WORKSPACE_SIZE_MUL = 130; 32 | }; 33 | 34 | class determinant_calculator { 35 | public: 36 | explicit determinant_calculator(const int n) 37 | : tmp(n, n) {} 38 | 39 | double determinant(const mat &cov) const noexcept; 40 | 41 | private: 42 | mutable mat tmp; 43 | }; 44 | 45 | class mahalanobis_dist_calculator { 46 | public: 47 | explicit mahalanobis_dist_calculator(const int n) 48 | : tmp(n) {} 49 | 50 | double mahalanobis2(const mat &cov_inv, const row &mean, const row &x) const; 51 | 52 | private: 53 | mutable vec tmp; 54 | }; 55 | } 56 | #endif //CEC_COV_UTILS_H 57 | -------------------------------------------------------------------------------- /R/plot.cec.R: -------------------------------------------------------------------------------- 1 | plot.cec <- function(x, col, cex = 0.5, pch = 19, cex.centers = 1, pch.centers = 8, ellipses.lwd = 4, ellipses = TRUE, model = TRUE, xlab, ylab, ...) 2 | { 3 | if (ncol (x $ data) != 2 ) 4 | stop("plotting available only for 2-dimensional data") 5 | 6 | if (!hasArg(col)) col = x$cluster 7 | 8 | if (!is.null(colnames(x$data))) 9 | { 10 | xl <- colnames(x$data)[1] 11 | yl <- colnames(x$data)[2] 12 | } 13 | else 14 | { 15 | xl <- "x" 16 | yl <- "y" 17 | } 18 | 19 | if (hasArg(xlab)) xl <- xlab 20 | if (hasArg(ylab)) yl <- ylab 21 | 22 | plot(x$data, col=col, cex = cex, pch = pch, xlab = xl, ylab = yl, ...) 23 | 24 | if (model == T) { 25 | covs <- x$covariances.model 26 | means <- x$means.model 27 | } else { 28 | covs <- x$covariances 29 | means <- x$centers 30 | } 31 | 32 | points(x$means.model, cex = cex.centers, pch = pch.centers) 33 | if (ellipses) 34 | { 35 | for (i in 1:nrow(means)) 36 | if (! is.na(means[i, 1])) 37 | { 38 | err = FALSE 39 | tryCatch( 40 | { 41 | cov <- covs[[i]] 42 | pts <- ellipse(means[i, ], cov) 43 | lines(pts, lwd = ellipses.lwd) 44 | }, 45 | finally = {}) 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /inst/cec_tests/energy.calculation.test.mouseset1.R: -------------------------------------------------------------------------------- 1 | testname <- "Energy calculation (mouseset1)" 2 | setup <- function() 3 | { 4 | B <- as.matrix(read.table(system.file("cec_tests", "mouse1.data", package="CEC"))) 5 | } 6 | 7 | test.type.covariance <- function() 8 | { 9 | given.cov = matrix(c(2,1,1,3), 2,2) 10 | 11 | expected.energy <- 3.540174056 12 | 13 | CE <- cec(B, centers=1, type="cov", param = given.cov, iter.max=0) 14 | 15 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 16 | } 17 | 18 | test.type.fixedr <- function() 19 | { 20 | r <- 1.5 21 | 22 | expected.energy <- 3.416637007 23 | 24 | CE <- cec(B, centers=1, type="fix", param = 1.5, iter.max=0) 25 | 26 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 27 | } 28 | 29 | test.type.spherical <- function() 30 | { 31 | expected.energy <- 3.403158062 32 | 33 | CE <- cec(B, centers=1, type="sp", iter.max=0) 34 | 35 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 36 | } 37 | 38 | 39 | test.type.diagonal <- function() 40 | { 41 | expected.energy <- 3.396500695 42 | 43 | CE <- cec(B, centers=1, type="diag", iter.max=0) 44 | 45 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 46 | } 47 | 48 | test.type.all <- function() 49 | { 50 | expected.energy <- 3.396472329 51 | 52 | CE <- cec(B, centers=1, type="all", iter.max=0) 53 | 54 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 55 | } 56 | -------------------------------------------------------------------------------- /src/models/fixed_eigenvalues.h: -------------------------------------------------------------------------------- 1 | #ifndef FIXED_EIGENVALUES_H 2 | #define FIXED_EIGENVALUES_H 3 | 4 | #include "cov_utils.h" 5 | #include "model.h" 6 | 7 | namespace cec { 8 | class fixed_eigenvalues: public model { 9 | public: 10 | explicit fixed_eigenvalues(int n, std::vector values) 11 | : n(n), 12 | given_values(std::move(values)), 13 | eigenvalues_calc(n), 14 | tmp_values(n), 15 | ce_constant(std::log(std::pow(2.0 * m::PI, n) 16 | * product(fixed_eigenvalues::given_values)) / 2.0) {} 17 | 18 | double cross_entropy(const covariance &cov) const noexcept override { 19 | if (!eigenvalues_calc.eigenvalues(cov, tmp_values.data())) 20 | return m::QNAN; 21 | double values_ratio_sum = 0; 22 | for (int i = 0; i < n; i++) 23 | values_ratio_sum += tmp_values[i] / given_values[i]; 24 | return ce_constant + values_ratio_sum / 2.0; 25 | } 26 | 27 | private: 28 | const int n; 29 | const std::vector given_values; 30 | const eigenvalues_calculator eigenvalues_calc; 31 | mutable std::vector tmp_values; 32 | const double ce_constant; 33 | 34 | static double product(const std::vector &values) { 35 | double prod = 1; 36 | for (auto &&v : values) 37 | prod *= v; 38 | return prod; 39 | } 40 | }; 41 | } 42 | #endif /* FIXED_EIGENVALUES_H */ 43 | 44 | -------------------------------------------------------------------------------- /src/r_result.cpp: -------------------------------------------------------------------------------- 1 | #include "r_result.h" 2 | #include "r_utils.h" 3 | 4 | using cec::r::put; 5 | 6 | SEXP cec::create_R_result(const clustering_results &out) { 7 | int m = out.assignment.size(); 8 | int k = out.centers.m; 9 | 10 | SEXP energy_vector; 11 | SEXP clusters_number_vector; 12 | SEXP assignment_vector; 13 | SEXP covariance_list; 14 | SEXP centers_matrix; 15 | SEXP iterations; 16 | 17 | PROTECT(energy_vector = put(out.energy)); 18 | PROTECT(clusters_number_vector = put(out.cluster_number)); 19 | PROTECT(assignment_vector = put(out.assignment)); 20 | PROTECT(covariance_list = allocVector(VECSXP, k)); 21 | PROTECT(iterations = put(out.iterations)); 22 | PROTECT(centers_matrix = put(out.centers)); 23 | 24 | int *assignment_vector_data = INTEGER(assignment_vector); 25 | for (int i = 0; i < m; i++) { 26 | assignment_vector_data[i]++; 27 | } 28 | for (int i = 0; i < k; i++) { 29 | SEXP covariance; 30 | PROTECT(covariance = put(out.covariances[i])); 31 | SET_VECTOR_ELT(covariance_list, i, covariance); 32 | } 33 | 34 | SEXP ret; 35 | PROTECT(ret = allocList(6)); 36 | SEXP ret_s = ret; 37 | 38 | SETCAR(ret, assignment_vector); 39 | SET_TAG(ret, install("cluster")); 40 | ret = CDR(ret); 41 | SETCAR(ret, centers_matrix); 42 | SET_TAG(ret, install("centers")); 43 | ret = CDR(ret); 44 | SETCAR(ret, energy_vector); 45 | SET_TAG(ret, install("energy")); 46 | ret = CDR(ret); 47 | SETCAR(ret, clusters_number_vector); 48 | SET_TAG(ret, install("nclusters")); 49 | ret = CDR(ret); 50 | SETCAR(ret, covariance_list); 51 | SET_TAG(ret, install("covariances")); 52 | ret = CDR(ret); 53 | SETCAR(ret, iterations); 54 | SET_TAG(ret, install("iterations")); 55 | 56 | UNPROTECT(k + 7); 57 | return ret_s; 58 | } 59 | -------------------------------------------------------------------------------- /man/plot.cec.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.cec} 2 | \alias{plot.cec} 3 | 4 | \title{ 5 | Plot CEC. 6 | } 7 | 8 | \description{ 9 | Presents the results of \code{cec} function in the form of a plot. 10 | Colors of data points depend of the cluster they belong to. 11 | Ellipses are drawn with regards to the covariance (either model or sample) of each cluster. 12 | } 13 | 14 | \usage{ 15 | \method{plot}{cec}(x, col, cex = 0.5, pch = 19, cex.centers = 1, pch.centers = 8, 16 | ellipses.lwd = 4, ellipses = TRUE, model = TRUE, xlab, ylab, ...) 17 | } 18 | 19 | \arguments{ 20 | 21 | \item{x}{ 22 | The result of \code{cec} function. 23 | } 24 | 25 | \item{col}{ 26 | Use this argument to change default colors of points in the clusters. 27 | } 28 | 29 | \item{cex}{ 30 | Basically the size of the points, see \link{points}. 31 | } 32 | 33 | \item{pch}{ 34 | See \link{points}. 35 | } 36 | 37 | \item{cex.centers}{ 38 | The same as \code{cex} parameter, except that it's related to the centers' means. 39 | } 40 | 41 | \item{pch.centers}{ 42 | The same as \code{pch} parameter, except that it's related to the centers' means. 43 | } 44 | 45 | \item{ellipses.lwd}{ 46 | Width of ellipses, \link{points}. 47 | } 48 | 49 | \item{ellipses}{ 50 | If this parameter is TRUE, ellipses will be drawn. 51 | } 52 | 53 | \item{model}{ 54 | If this parameter is TRUE, the model (expected) covariance will be used for each cluster insted of sample covariance (MLE) of the points in the cluster, when drawing ellipses. 55 | } 56 | 57 | \item{xlab}{ 58 | See \link{plot}. 59 | } 60 | 61 | \item{ylab}{ 62 | See \link{plot}. 63 | } 64 | 65 | \item{\dots}{ 66 | Arguments are passed to \code{plot} function when drawing data points. 67 | } 68 | } 69 | 70 | \value{No return value.} 71 | 72 | \seealso{ 73 | \code{\link{print.cec}} 74 | } 75 | 76 | \examples{ 77 | ## See the examples of function cec. 78 | } 79 | 80 | \keyword{hplot} 81 | 82 | -------------------------------------------------------------------------------- /src/split_starter.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_SPLIT_STARTER_H 2 | #define CEC_SPLIT_STARTER_H 3 | 4 | #include "starter.h" 5 | #include "cec_starter.h" 6 | 7 | namespace cec { 8 | 9 | class split_starter : public clustering_processor { 10 | public: 11 | struct parameters { 12 | cec_parameters start_params; 13 | const model_spec &model_sp; 14 | const centers_init_spec &init; 15 | int split_tries; 16 | int max_k; 17 | int max_depth; 18 | 19 | parameters(const cec_parameters &start_params, const model_spec &model_sp, 20 | const centers_init_spec &init, int split_tries, int max_k, int max_depth) 21 | : start_params(start_params), 22 | model_sp(model_sp), 23 | init(init), 24 | split_tries(split_tries), 25 | max_k(max_k), 26 | max_depth(max_depth) {} 27 | }; 28 | 29 | explicit split_starter(const parameters ¶ms) 30 | : splitter(cec_starter::parameters(params.start_params, params.init, 31 | params.split_tries)), 32 | cec(params.start_params), 33 | m_spec(params.model_sp), 34 | max_k(params.max_k), 35 | max_depth(params.max_depth), 36 | try_split_models(model_spec::create_models(m_spec, 2)) {} 37 | 38 | unique_ptr 39 | start(const unique_ptr &cl_res, 40 | const clustering_input &input_params) override; 41 | 42 | private: 43 | cec_starter splitter; 44 | cross_entropy_clustering cec; 45 | const model_spec &m_spec; 46 | int max_k; 47 | int max_depth; 48 | vector> try_split_models; 49 | unique_ptr try_split_cluster(const mat &x_mat); 50 | }; 51 | } 52 | 53 | #endif //CEC_SPLIT_STARTER_H 54 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | ball <- function(n = 4000, r = 1, dim = 2){ 2 | M <- matrix(0,n,dim) 3 | count <- 0; 4 | rr <- r ^ 2 5 | while (count < n) 6 | { 7 | p <- runif (dim, -r, r) 8 | if (sum(p ^ 2) <= rr) M[count <- count + 1,] <- p 9 | } 10 | M 11 | } 12 | 13 | nballvolume <- function (r, n) 14 | { 15 | k <- as.integer(n / 2) 16 | if (n %% 2 == 0) 17 | { 18 | pi ^ k / factorial(k) * r ^ n 19 | } 20 | else 21 | { 22 | 2 * factorial(k) * (4 * pi) ^ k / factorial(n) * r ^ n 23 | } 24 | } 25 | 26 | mouseset <- function (n=4000, r.head=2, r.left.ear=1.1, r.right.ear=1.1, left.ear.dist=2.5, right.ear.dist=2.5, dim=2) 27 | { 28 | vh <- nballvolume (r.head, dim) 29 | vl <- nballvolume (r.left.ear, dim) 30 | vr <- nballvolume (r.right.ear, dim) 31 | 32 | if (dim < 2) stop ("Illegal argument: dim < 2") 33 | pos.h <- rep(0, dim) 34 | 35 | pos.l <- pos.h 36 | pos.r <- pos.h 37 | 38 | l.offset <- left.ear.dist / sqrt(2) 39 | r.offset <- right.ear.dist / sqrt(2) 40 | 41 | pos.l[1] <- pos.l[1] - l.offset 42 | pos.l[2] <- pos.l[2] + l.offset 43 | 44 | pos.r[1] <- pos.r[1] + r.offset 45 | pos.r[2] <- pos.r[2] + r.offset 46 | 47 | hh = r.head ^ 2 48 | ll = r.left.ear ^ 2 49 | rr = r.right.ear ^ 2 50 | 51 | centers <- rbind(pos.h, pos.l, pos.r) 52 | rs <- c(r.head, r.left.ear, r.right.ear) 53 | rrs <- c(hh, ll, rr) 54 | 55 | M = matrix(0, n, dim) 56 | 57 | count <- 0 58 | 59 | while(count < n) 60 | { 61 | gen <- min (1000, n - count) 62 | hits <- 0 63 | s <- sample(x = c(1, 2, 3), size=gen, prob=c(vh, vl, vr), replace=T) 64 | for (i in s) 65 | { 66 | r <- rs[i] 67 | random.p <- runif(dim, -r, +r) 68 | p <- centers[i,] + random.p 69 | if (sum(random.p ^ 2) < rrs[i]) 70 | if (i == 1) M[count <- count + 1,] <- p 71 | else if (i == 2) 72 | { 73 | if (sum((p - pos.h) ^ 2) > hh && sum((p - pos.r) ^ 2) > rr) M[count <- count + 1,] <- p 74 | } 75 | else if (i == 3) 76 | { 77 | if (sum((p - pos.h) ^ 2) > hh && sum((p - pos.l) ^ 2) > ll) M[count <- count + 1,] <- p 78 | } 79 | } 80 | } 81 | M 82 | } 83 | -------------------------------------------------------------------------------- /src/starter.h: -------------------------------------------------------------------------------- 1 | #ifndef CROSS_ENTROPY_CLUSTERING_H 2 | #define CROSS_ENTROPY_CLUSTERING_H 3 | 4 | #include 5 | 6 | #include "vec.h" 7 | #include "common.h" 8 | #include "models/model.h" 9 | 10 | namespace cec { 11 | class clustering_results; 12 | 13 | struct cec_parameters { 14 | cec_parameters(int max_iter, int min_card) 15 | : max_iter(max_iter), 16 | min_card(min_card) {} 17 | 18 | int max_iter; 19 | int min_card; 20 | }; 21 | 22 | class points_split { 23 | public: 24 | static vector split_points(const mat &points, const vector &assignment, int k); 25 | 26 | const mat &points() const { 27 | return pts; 28 | } 29 | 30 | const vector &mapping() const { 31 | return map; 32 | } 33 | 34 | points_split(mat points, vector mapping) 35 | : pts(std::move(points)), 36 | map(std::move(mapping)) {} 37 | 38 | private: 39 | mat pts; 40 | vector map; 41 | }; 42 | 43 | class cross_entropy_clustering { 44 | public: 45 | explicit cross_entropy_clustering(const cec_parameters ¶ms) 46 | : params(params) {} 47 | 48 | unique_ptr 49 | start(const mat &x, const vector &initial_assignment, 50 | const vector> &models); 51 | 52 | private: 53 | cec_parameters params; 54 | }; 55 | 56 | class clustering_results { 57 | friend class cross_entropy_clustering; 58 | 59 | public: 60 | mat centers; 61 | vector assignment; 62 | int cluster_number; 63 | int iterations; 64 | double energy; 65 | vector covariances; 66 | 67 | clustering_results(const clustering_results &res) = default; 68 | 69 | clustering_results(clustering_results &&res) = default; 70 | 71 | private: 72 | clustering_results(mat centers, vector assignment, int cluster_number, 73 | int iterations, double energy, vector covariances) 74 | : centers(std::move(centers)), 75 | assignment(std::move(assignment)), 76 | cluster_number(cluster_number), 77 | iterations(iterations), 78 | energy(energy), 79 | covariances(std::move(covariances)) {} 80 | }; 81 | 82 | 83 | } 84 | 85 | #endif //CROSS_ENTROPY_CLUSTERING_H 86 | -------------------------------------------------------------------------------- /src/init.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_INIT_H 2 | #define CEC_INIT_H 3 | 4 | #include 5 | 6 | #include "vec.h" 7 | #include "random.h" 8 | #include "common.h" 9 | 10 | namespace cec { 11 | 12 | using std::vector; 13 | using std::unique_ptr; 14 | using std::shared_ptr; 15 | 16 | class centers_init { 17 | public: 18 | virtual ~centers_init() = default; 19 | 20 | virtual mat init(const mat &x, int k) = 0; 21 | }; 22 | 23 | class random_init : public centers_init { 24 | public: 25 | random_init() 26 | : gen(random::create_generator()) {} 27 | 28 | mat init(const mat &x, int k) override; 29 | 30 | private: 31 | random::rand_gen gen; 32 | }; 33 | 34 | class fixed_init : public centers_init { 35 | public: 36 | explicit fixed_init(mat c_mat) 37 | : c_mat(std::move(c_mat)) {} 38 | 39 | mat init(const mat &x, int k) override; 40 | 41 | private: 42 | mat c_mat; 43 | }; 44 | 45 | class kmeanspp_init : public centers_init { 46 | public: 47 | mat init(const mat &x, int k) override; 48 | 49 | explicit kmeanspp_init() 50 | : gen(random::create_generator()) {} 51 | 52 | private: 53 | random::rand_gen gen; 54 | vector dists; 55 | vector sums; 56 | }; 57 | 58 | class centers_init_spec { 59 | public: 60 | virtual ~centers_init_spec() = default; 61 | 62 | virtual unique_ptr create() const = 0; 63 | }; 64 | 65 | class random_init_spec : public centers_init_spec { 66 | public: 67 | unique_ptr create() const override; 68 | }; 69 | 70 | class fixed_init_spec : public centers_init_spec { 71 | public: 72 | const mat c_mat; 73 | 74 | explicit fixed_init_spec(mat c_mat) 75 | : c_mat(std::move(c_mat)) {} 76 | 77 | unique_ptr create() const override; 78 | }; 79 | 80 | class kmeanspp_init_spec : public centers_init_spec { 81 | public: 82 | unique_ptr create() const override; 83 | }; 84 | 85 | class assignment_init { 86 | public: 87 | virtual ~assignment_init() = default; 88 | 89 | virtual std::vector init(const mat &x, const mat &c) = 0; 90 | }; 91 | 92 | class closest_assignment : public assignment_init { 93 | public: 94 | std::vector init(const mat &x, const mat &c) override; 95 | }; 96 | } 97 | 98 | #endif //CEC_INIT_H 99 | -------------------------------------------------------------------------------- /src/cluster.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_CLUSTER_H 2 | #define CEC_CLUSTER_H 3 | 4 | #include "vec.h" 5 | #include "models/model.h" 6 | 7 | namespace cec { 8 | class deferred_update_covariance : public covariance { 9 | public: 10 | using covariance::operator=; 11 | 12 | explicit deferred_update_covariance(const covariance &initial) 13 | : covariance(initial), 14 | tmp_point(initial.m), 15 | tmp_cov(initial) {} 16 | 17 | void add_point_tmp(const row &point) { 18 | tmp_cov = *this; 19 | tmp_cov.add_point(point); 20 | } 21 | 22 | void rem_point_tmp(const row &point) { 23 | tmp_cov = *this; 24 | tmp_cov.rem_point(point); 25 | } 26 | 27 | void apply_change() { 28 | (*this) = tmp_cov; 29 | } 30 | 31 | const covariance &tmp_covariance() const { 32 | return tmp_cov; 33 | } 34 | 35 | private: 36 | vec tmp_point; 37 | covariance tmp_cov; 38 | }; 39 | 40 | class cluster { 41 | public: 42 | cluster(const model &mod, const covariance &initial_covariance, const int m) 43 | : m(m), 44 | mod(mod), 45 | cov(initial_covariance), 46 | eng(mod.energy(cov, m)), 47 | tmp_eng(eng) {} 48 | 49 | double add_point(const row &point) { 50 | cov.add_point_tmp(point); 51 | tmp_eng = tmp_energy(); 52 | return tmp_eng - eng; 53 | } 54 | 55 | double rem_point(const row &point) { 56 | cov.rem_point_tmp(point); 57 | tmp_eng = tmp_energy(); 58 | return tmp_eng - eng; 59 | } 60 | 61 | const row &mean() const { 62 | return cov.mean(); 63 | } 64 | 65 | int card() const { 66 | return cov.card(); 67 | } 68 | 69 | const mat &covariance() const { 70 | return cov; 71 | } 72 | 73 | void apply_change() { 74 | cov.apply_change(); 75 | eng = tmp_eng; 76 | } 77 | 78 | double energy() { 79 | return mod.energy(cov, m); 80 | } 81 | 82 | private: 83 | double tmp_energy() { 84 | return mod.energy(cov.tmp_covariance(), m); 85 | } 86 | 87 | const int m; 88 | const model &mod; 89 | deferred_update_covariance cov; 90 | double eng; 91 | double tmp_eng; 92 | }; 93 | } 94 | 95 | #endif //CEC_CLUSTER_H 96 | -------------------------------------------------------------------------------- /inst/cec_tests/energy.calculation.test.ball1.R: -------------------------------------------------------------------------------- 1 | testname <- "Energy calculation (ball1)" 2 | setup <- function() 3 | { 4 | B <- as.matrix(read.table(system.file("cec_tests", "ball1.data", package="CEC"))) 5 | C <- as.matrix(read.table(system.file("cec_tests", "centers1.data", package="CEC"))) 6 | } 7 | 8 | 9 | test.type.covariance <- function() 10 | { 11 | given.cov = matrix(c(2,1,1,3), 2,2) 12 | 13 | expected.energy <- 2.766927173 14 | 15 | CE <- cec(B, centers=1, type="cov", param = given.cov, iter.max=0) 16 | 17 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 18 | } 19 | 20 | test.type.fixedr <- function() 21 | { 22 | r <- 1.5 23 | 24 | expected.energy <- 2.410818718 25 | 26 | CE <- cec(B, centers=1, type="fix", param = 1.5, iter.max=0) 27 | 28 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 29 | } 30 | 31 | test.type.spherical <- function() 32 | { 33 | expected.energy <- 1.456430201 34 | 35 | CE <- cec(B, centers=1, type="sp", iter.max=0) 36 | 37 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 38 | } 39 | 40 | 41 | test.type.diagonal <- function() 42 | { 43 | cov <- cov.mle(B) 44 | 45 | expected.energy <- 1.45637452 46 | 47 | CE <- cec(B, centers=1, type="diag", iter.max=0) 48 | 49 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 50 | } 51 | 52 | test.type.eigenvalues <- function() 53 | { 54 | evals <- c(0.1, 0.22) 55 | 56 | expected.energy <- 1.734310397 57 | 58 | CE <- cec(B, centers=1, type="eigen", param=evals, iter.max=0) 59 | 60 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 61 | } 62 | 63 | test.type.all <- function() 64 | { 65 | expected.energy <- 1.455903678 66 | 67 | CE <- cec(B, centers=1, type="all", iter.max=0) 68 | 69 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 70 | } 71 | 72 | test.type.mean <- function() 73 | { 74 | expected.energy <- 1.455960581 75 | 76 | CE <- cec(B, 1, type="mean", param=c(0, 0), iter.max=0) 77 | 78 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 79 | } 80 | 81 | 82 | 83 | #################################################################################################################### 84 | 85 | test.type.spherical.cluster.removing <- function() 86 | { 87 | expected.energy <- 1.456430201 88 | 89 | CE <- cec(B, C, type="sp", iter.max=20) 90 | 91 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 92 | } 93 | 94 | 95 | -------------------------------------------------------------------------------- /src/cec_starter.h: -------------------------------------------------------------------------------- 1 | #include "starter.h" 2 | #include "init.h" 3 | #include "params.h" 4 | 5 | #ifndef CEC_STARTER_H 6 | #define CEC_STARTER_H 7 | 8 | namespace cec { 9 | struct clustering_input { 10 | const mat &x; 11 | const vector> ⊧ 12 | 13 | public: 14 | clustering_input(const mat &x, const vector> &models) 15 | : x(x), 16 | models(models) {} 17 | }; 18 | 19 | class best_results_collector { 20 | public: 21 | void operator()(unique_ptr &&cr) { 22 | if (!cr) 23 | return; 24 | if (!best || cr->energy < best->energy) 25 | best = std::move(cr); 26 | } 27 | 28 | unique_ptr operator()() noexcept { 29 | return std::move(best); 30 | } 31 | 32 | void reset() { 33 | best.reset(nullptr); 34 | } 35 | 36 | explicit operator bool() { 37 | return !!best; 38 | } 39 | 40 | private: 41 | unique_ptr best; 42 | }; 43 | 44 | 45 | class clustering_starter { 46 | public: 47 | virtual ~clustering_starter() = default; 48 | virtual unique_ptr start(const clustering_input &input_params) = 0; 49 | }; 50 | 51 | class clustering_processor { 52 | public: 53 | virtual ~clustering_processor() = default; 54 | virtual unique_ptr 55 | start(const unique_ptr &cl_res, const clustering_input &input_param) = 0; 56 | }; 57 | 58 | class cec_starter : public clustering_starter { 59 | public: 60 | struct parameters { 61 | const cec_parameters start_params; 62 | const centers_init_spec &init; 63 | int starts; 64 | 65 | parameters(cec_parameters start_params, 66 | const centers_init_spec &init, 67 | int starts = 1) 68 | : start_params(start_params), 69 | init(init), 70 | starts(starts) {} 71 | }; 72 | 73 | explicit cec_starter(const parameters ¶ms) 74 | : starts(params.starts), 75 | cec(params.start_params), 76 | closest(), 77 | init(params.init.create()) {} 78 | 79 | unique_ptr 80 | start(const clustering_input &ip) override; 81 | 82 | private: 83 | const int starts; 84 | best_results_collector best; 85 | cross_entropy_clustering cec; 86 | closest_assignment closest; 87 | unique_ptr init; 88 | }; 89 | } 90 | 91 | #endif 92 | -------------------------------------------------------------------------------- /src/init.cpp: -------------------------------------------------------------------------------- 1 | #include "init.h" 2 | #include "random.h" 3 | 4 | std::vector cec::closest_assignment::init(const cec::mat &x, const cec::mat &c) { 5 | int m = x.m; 6 | int k = c.m; 7 | std::vector asgn(m); 8 | for (int i = 0; i < m; i++) { 9 | double b_dist = std::numeric_limits::max(); 10 | const row &point = x[i]; 11 | int b_row = -1; 12 | for (int j = 0; j < k; j++) { 13 | double dist = row::dist_sq(point, c[j]); 14 | if (dist < b_dist) { 15 | b_dist = dist; 16 | b_row = j; 17 | } 18 | } 19 | asgn[i] = b_row; 20 | } 21 | return asgn; 22 | } 23 | 24 | cec::mat cec::random_init::init(const mat &x, int k) { 25 | mat c_mat(k, x.n); 26 | std::uniform_int_distribution unif_int(0, x.m - 1); 27 | for (int i = 0; i < k; i++) 28 | c_mat[i] = x[unif_int.operator()(gen)]; 29 | return c_mat; 30 | } 31 | 32 | cec::mat cec::kmeanspp_init::init(const mat &x, int k) { 33 | int m = x.m; 34 | int n = x.n; 35 | dists.resize(m); 36 | sums.resize(m); 37 | mat c(k, n); 38 | std::uniform_int_distribution unif_int(0, x.m - 1); 39 | c[0] = x[unif_int(gen)]; 40 | dists[0] = 0.0; 41 | sums[0] = 0.0; 42 | 43 | sums[0] = dists[0] = row::dist_sq(x[0], c[0]); 44 | 45 | for (int i = 1; i < m; i++) { 46 | double dist = row::dist_sq(x[i], c[0]); 47 | dists[i] = dist; 48 | sums[i] = sums[i - 1] + dist; 49 | } 50 | 51 | std::uniform_real_distribution unif_real; 52 | for (int i = 1; i < k; i++) { 53 | double upper = sums[m - 1]; 54 | double n_sum = upper == 0.0 55 | ? 0.0 56 | : unif_real(gen, std::uniform_real_distribution::param_type(0.0, upper)); 57 | auto range = std::equal_range(sums.begin(), sums.end(), n_sum); 58 | int idx_from = range.first - sums.begin(); 59 | int idx_to = range.second - sums.begin(); 60 | idx_to = std::min(idx_to, m - 1); 61 | int idx = unif_int(gen, std::uniform_int_distribution::param_type(idx_from, idx_to)); 62 | c[i] = x[idx]; 63 | sums[0] = dists[0] = std::min(dists[0], row::dist_sq(x[0], c[i])); 64 | for (int j = 1; j < m; j++) { 65 | dists[j] = std::min(dists[j], row::dist_sq(x[j], c[i])); 66 | sums[j] = sums[j - 1] + dists[j]; 67 | } 68 | } 69 | return c; 70 | } 71 | 72 | cec::mat cec::fixed_init::init(const cec::mat &x, int k) { 73 | mat res(k, x.n); 74 | for (int i = 0; i < k; ++i) 75 | res[i] = c_mat[i]; 76 | return res; 77 | } 78 | 79 | std::unique_ptr cec::random_init_spec::create() const { 80 | return make_unique(); 81 | } 82 | 83 | std::unique_ptr cec::kmeanspp_init_spec::create() const { 84 | return make_unique(); 85 | } 86 | 87 | std::unique_ptr cec::fixed_init_spec::create() const { 88 | return make_unique(c_mat); 89 | } 90 | -------------------------------------------------------------------------------- /inst/cec_tests/energy.calculation.test.various.data.sets.R: -------------------------------------------------------------------------------- 1 | testname <- "energy calculation (various data sets)" 2 | setup <- function() 3 | { 4 | two.gausses.4d = as.matrix(read.table(system.file("cec_tests", "two.gausses.4d.data", package="CEC"))) 5 | 6 | mouse3d <- as.matrix(read.table(system.file("cec_tests", "mouse3d.data", package="CEC"))) 7 | C <- as.matrix(read.table(system.file("cec_tests", "centers3d.data", package="CEC"))) 8 | C4 <- as.matrix(read.table(system.file("cec_tests", "centers43d.data", package="CEC"))) 9 | } 10 | 11 | test.type.covariance <- function() 12 | { 13 | given.cov = matrix(c(0.770118878, 0.005481129, -0.005991149, 0.005481129, 0.766972716, 0.008996509, -0.005991149, 0.008996509, 0.821481768), 3, 3) 14 | 15 | expected.energy <- 4.365855156 16 | 17 | CE <- cec(mouse3d, centers=C, type="cov", param = given.cov, iter.max=20) 18 | 19 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 20 | } 21 | 22 | test.type.fixedr.mixture <- function() 23 | { 24 | r <- c(0.2, 0.3, 0.4) 25 | 26 | expected.energy <- 4.853461033 27 | 28 | CE <- cec(mouse3d, centers=C, type=c("fi", "fi", "fi"), param = r) 29 | 30 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 31 | } 32 | 33 | 34 | test.type.spherical.one.cluster.removed <- function() 35 | { 36 | expected.energy <- 4.179257781 37 | expected.number.of.clusters <- 3 38 | 39 | CE <- cec(mouse3d, C4, type="sp") 40 | 41 | CEC:::checkNumericVectorEquals(expected.number.of.clusters, CE$nclusters, msg="Number of clusters") 42 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 43 | } 44 | 45 | test.type.diagonal.spherical.mixture <- function() 46 | { 47 | expected.energy <- 4.177793598 48 | expected.number.of.clusters <- 3 49 | 50 | CE <- cec(mouse3d, C, type=c("diag", "diag", "sp")) 51 | 52 | CEC:::checkNumericVectorEquals(expected.number.of.clusters, CE$nclusters, msg="Number of clusters") 53 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 54 | } 55 | 56 | test.type.eigenvalues.all.fixedr.mixture <- function() 57 | { 58 | evals1 <- c(0.8240634, 0.7739987, 0.7595220) 59 | evals2 <- c(0.7240634, 0.5739987, 0.3595220) 60 | r <- 1.0 61 | 62 | expected.energy <- 4.323007035 63 | expected.number.of.clusters <- 3 64 | 65 | CE <- cec(mouse3d, C4, type=c("all", "eigen", "fixedr", "eigen"), param=list(evals1, r, evals2)) 66 | 67 | CEC:::checkNumericVectorEquals(expected.number.of.clusters, CE$nclusters, msg="Number of clusters") 68 | CEC:::checkNumericVectorEquals(expected.energy, CE$cost, msg="Energy") 69 | } 70 | 71 | test.type.mean.two.gaussians <- function() 72 | { 73 | centers = matrix(c(2, 4, 2, 4, 2, 4, 2, 4), 2, 4) 74 | means.param = list(c(0, 0, 0, 0), c(5, 5, 5, 5)) 75 | 76 | expected.energy = 6.142651451 77 | 78 | cec = cec(two.gausses.4d, centers, c("mean", "mean"), param=means.param) 79 | 80 | CEC:::checkNumericVectorEquals(expected.energy, cec$cost, msg="Energy") 81 | } -------------------------------------------------------------------------------- /src/models/cov_utils.cpp: -------------------------------------------------------------------------------- 1 | #include "cov_utils.h" 2 | #include "../m.h" 3 | 4 | #include 5 | 6 | const double ZERO_EPSILON = 1.0e-32; 7 | 8 | static inline double handle_zero(double d) { 9 | if (d < ZERO_EPSILON) 10 | return ZERO_EPSILON; 11 | return d; 12 | } 13 | 14 | static inline double handle_cholesky_nan(double d) { 15 | if (cec::m::isnan(d)) 16 | return handle_zero(0); 17 | return handle_zero(d); 18 | } 19 | 20 | static bool cholesky(const cec::mat &cov, cec::mat &dst) { 21 | int n = cov.n; 22 | int info; 23 | dst = cov; 24 | dpotrf_("U", &n, dst.data(), &n, &info); 25 | return info == 0; 26 | } 27 | 28 | double cec::diagonal_product(const mat &cov) { 29 | int n = cov.n; 30 | double res = 1.0; 31 | for (int i = 0; i < n; i++) 32 | res *= cov[i][i]; 33 | return res; 34 | } 35 | 36 | double cec::trace(const mat &cov) { 37 | int n = cov.n; 38 | double tr = 0; 39 | for (int i = 0; i < n; i++) 40 | tr += cov[i][i]; 41 | return tr; 42 | } 43 | 44 | void cec::multiply(const mat &a, const mat &b, mat &dst) { 45 | int n = a.n; 46 | if (n < 8) { 47 | dst.fill(0.0); 48 | for (int i = 0; i < n; i++) 49 | for (int j = 0; j < n; j++) 50 | for (int k = 0; k < n; k++) 51 | dst[i][j] += a[k][i] * b[j][k]; 52 | return; 53 | } 54 | double zero = 0; 55 | double one = 1; 56 | dsymm_("L", "L", &n, &n, &one, a.data(), &n, b.data(), &n, &zero, dst.data(), &n); 57 | } 58 | 59 | bool cec::invert(const mat &cov, mat &dst) { 60 | int n = cov.n; 61 | int info; 62 | if (!cholesky(cov, dst)) 63 | return false; 64 | dpotri_("U", &n, dst.data(), &n, &info); 65 | for (int i = 0; i < n; i++) 66 | for (int j = i + 1; j < n; j++) 67 | dst[i][j] = dst[j][i]; 68 | return info == 0; 69 | } 70 | 71 | bool cec::eigenvalues_calculator::eigenvalues(const cec::mat &cov, double *res) const noexcept { 72 | int n = cov.n; 73 | int info; 74 | tmp = cov; 75 | dsyev_("N", "U", &n, tmp.data(), &n, res, workspace.data(), &workspace.size, &info); 76 | return info == 0; 77 | } 78 | 79 | double cec::determinant_calculator::determinant(const cec::mat &cov) const noexcept { 80 | if (cov.n == 1) 81 | return cov[0][0]; 82 | if (cov.n == 2) 83 | return cov[0][0] * cov[1][1] - cov[0][1] * cov[1][0]; 84 | if (!cholesky(cov, tmp)) 85 | return m::QNAN; 86 | double prod = diagonal_product(tmp); 87 | return handle_cholesky_nan(prod * prod); 88 | } 89 | 90 | double 91 | cec::mahalanobis_dist_calculator::mahalanobis2(const mat &cov_inv, const row &mean, const row &x) const { 92 | int n = cov_inv.n; 93 | for (int i = 0; i < n; i++) 94 | tmp[i] = x[i] - mean[i]; 95 | double res = 0; 96 | for (int i = 0; i < n; i++) { 97 | double acc = 0.0; 98 | for (int j = 0; j < n; j++) 99 | acc += tmp[j] * cov_inv[j][i]; 100 | res += acc * tmp[i]; 101 | } 102 | return res; 103 | } 104 | -------------------------------------------------------------------------------- /src/r_ext_ptr.h: -------------------------------------------------------------------------------- 1 | #ifndef R_EXT_PTR_H 2 | #define R_EXT_PTR_H 3 | 4 | #include 5 | #include 6 | 7 | namespace cec { 8 | namespace r { 9 | template 10 | class r_ext_ptr { 11 | public: 12 | r_ext_ptr() { 13 | r_ptr = PROTECT(R_MakeExternalPtr(nullptr, NULL, NULL)); 14 | R_RegisterCFinalizerEx(r_ptr, r_ext_ptr::finalize, TRUE); 15 | } 16 | 17 | r_ext_ptr(r_ext_ptr &r_p) = delete; 18 | 19 | r_ext_ptr(r_ext_ptr &&r_p) noexcept { 20 | (*this) = std::move(r_p); 21 | } 22 | 23 | bool operator==(const r_ext_ptr &r_p) const { 24 | return get() == r_p.get(); 25 | } 26 | 27 | bool operator!=(const r_ext_ptr &r_p) const { 28 | return !(r_p == *this); 29 | } 30 | 31 | explicit operator bool() { 32 | return get() != nullptr; 33 | } 34 | 35 | r_ext_ptr &operator=(r_ext_ptr &r_p) = delete; 36 | 37 | r_ext_ptr &operator=(r_ext_ptr &&r_p) noexcept { 38 | r_ptr = r_p.r_ptr; 39 | r_p.r_ptr = NULL; 40 | return *this; 41 | } 42 | 43 | virtual ~r_ext_ptr() { 44 | if (r_ptr == NULL) 45 | return; 46 | finalize(r_ptr); 47 | UNPROTECT_PTR(r_ptr); 48 | } 49 | 50 | template 51 | void init(Args &&...args) { 52 | finalize(r_ptr); 53 | T *t_ptr = new T(std::forward(args)...); 54 | 55 | // assume this will always return... 56 | R_SetExternalPtrAddr(r_ptr, t_ptr); 57 | } 58 | 59 | void reset(T *ptr) { 60 | finalize(r_ptr); 61 | R_SetExternalPtrAddr(r_ptr, ptr); 62 | } 63 | 64 | const T *get() const { 65 | return (T *) R_ExternalPtrAddr(r_ptr); 66 | } 67 | 68 | const T *operator->() const { 69 | return get(); 70 | } 71 | 72 | const T &operator*() const { 73 | return *get(); 74 | } 75 | 76 | T *get() { 77 | return (T *) R_ExternalPtrAddr(r_ptr); 78 | } 79 | 80 | T *operator->() { 81 | return get(); 82 | } 83 | 84 | T &operator*() { 85 | return *get(); 86 | } 87 | 88 | private: 89 | SEXP r_ptr = NULL; 90 | 91 | static void finalize(SEXP r_ptr) { 92 | T *ptr = (T *) R_ExternalPtrAddr(r_ptr); 93 | if (ptr == nullptr) 94 | return; 95 | delete ptr; 96 | R_ClearExternalPtr(r_ptr); 97 | } 98 | }; 99 | 100 | template 101 | static r_ext_ptr make_r_ext(Args &&... args) { 102 | r_ext_ptr ptr; 103 | ptr.init(std::forward(args)...); 104 | return ptr; 105 | } 106 | } 107 | } 108 | 109 | #endif //R_EXT_PTR_H 110 | -------------------------------------------------------------------------------- /src/params.cpp: -------------------------------------------------------------------------------- 1 | #include "params.h" 2 | #include "exceptions.h" 3 | #include "models/all.h" 4 | #include "models/spherical.h" 5 | #include "models/diagonal.h" 6 | #include "models/fixed_radius.h" 7 | #include "models/fixed_covariance.h" 8 | #include "models/fixed_eigenvalues.h" 9 | #include "models/fixed_mean.h" 10 | 11 | namespace cec { 12 | init_method parse_init_method(const string &method) { 13 | if (method == "none") 14 | return init_method::NONE; 15 | if (method == "kmeanspp") 16 | return init_method::KMEANSPP; 17 | if (method == "random") 18 | return init_method::RANDOM; 19 | throw invalid_init_method(method); 20 | } 21 | 22 | model_type parse_model_type(const string &name) { 23 | if (name == "all") 24 | return model_type::ALL; 25 | if (name == "covariance") 26 | return model_type::COVARIANCE; 27 | if (name == "diagonal") 28 | return model_type::DIAGONAL; 29 | if (name == "eigenvalues") 30 | return model_type::EIGENVALUES; 31 | if ((name == "radius") || (name == "fixed_r") || (name == "fixedr")) 32 | return model_type::FIXED_R; 33 | if (name == "mean") 34 | return model_type::MEAN; 35 | if (name == "spherical") 36 | return model_type::SPHERICAL; 37 | throw invalid_model_name(name); 38 | } 39 | 40 | vector> 41 | model_spec::create_models(vector> specs) { 42 | int size = specs.size(); 43 | vector> models(size); 44 | for (int i = 0; i < size; ++i) 45 | models[i] = specs[i]->create_model(); 46 | return models; 47 | } 48 | 49 | vector> model_spec::create_models(const model_spec &spec, int n) { 50 | vector> models(n); 51 | for (int i = 0; i < n; ++i) 52 | models[i] = spec.create_model(); 53 | return models; 54 | } 55 | 56 | shared_ptr centers_param::get_centers_init() { 57 | switch (init_m) { 58 | case init_method::NONE: 59 | return make_shared(centers_mat); 60 | case init_method::RANDOM: 61 | return make_shared(); 62 | default: 63 | return make_shared(); 64 | } 65 | } 66 | 67 | unique_ptr model_all_spec::create_model() const { 68 | return make_unique(n); 69 | } 70 | 71 | unique_ptr model_spherical_spec::create_model() const { 72 | return make_unique(n); 73 | } 74 | 75 | unique_ptr model_diagonal_spec::create_model() const { 76 | return make_unique(n); 77 | } 78 | 79 | unique_ptr model_fixed_radius_spec::create_model() const { 80 | return make_unique(n, r); 81 | } 82 | 83 | unique_ptr model_covariance_spec::create_model() const { 84 | return make_unique(n, g_cov); 85 | } 86 | 87 | unique_ptr model_eigenvalues_spec::create_model() const { 88 | return make_unique(n, values); 89 | } 90 | 91 | unique_ptr model_mean_spec::create_model() const { 92 | vec mean_vec(n); 93 | for (int i = 0; i < n; i++) 94 | mean_vec[i] = mean[i]; 95 | return make_unique(n, mean_vec); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/cov.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_COV_H 2 | #define CEC_COV_H 3 | 4 | #include "vec.h" 5 | 6 | namespace cec { 7 | 8 | class mean: public vec { 9 | public: 10 | explicit mean(const mat &sample) 11 | : mean(sample.n) { 12 | for (auto &&p : sample) add_point(p); 13 | update(); 14 | } 15 | 16 | explicit mean(int n) 17 | : vec(n), 18 | acc(n) { 19 | acc.fill(0); 20 | } 21 | 22 | mean(const mean &initial) = default; 23 | 24 | mean(mean &&initial) noexcept = default; 25 | 26 | mean &operator=(const mean &m) = default; 27 | 28 | void add_point(const row &point) { 29 | acc += point; 30 | car++; 31 | } 32 | 33 | void rem_point(const row &point) { 34 | acc -= point; 35 | car--; 36 | } 37 | 38 | void update() { 39 | row::operator=(acc); 40 | (*this) /= car; 41 | } 42 | 43 | int card() const { 44 | return car; 45 | } 46 | 47 | private: 48 | using row::operator=; 49 | int car = 0; 50 | vec acc; 51 | }; 52 | 53 | class covariance: public mat { 54 | public: 55 | covariance &operator=(const covariance &cov) = default; 56 | 57 | covariance(const covariance &) = default; 58 | 59 | covariance(covariance &&) noexcept = default; 60 | 61 | const cec::mean& mean() const { 62 | return mn; 63 | } 64 | 65 | int card() const { 66 | return mn.card(); 67 | } 68 | 69 | static covariance estimate(const mat &sample) { 70 | cec::mean mn(sample); 71 | int n = sample.n; 72 | mat acc(n, n); 73 | acc.fill(0); 74 | vec t_vec(n); 75 | for (auto &&p : sample) { 76 | t_vec = p; 77 | t_vec -= mn; 78 | acc += mat::outer_product(t_vec); 79 | } 80 | acc /= sample.m; 81 | return covariance(acc, mn); 82 | } 83 | 84 | void add_point(const row &point) { 85 | int card = mn.card(); 86 | double card_n = card + 1; 87 | cov_change(point, card / card_n, card / (card_n * card_n), *this); 88 | mn.add_point(point); 89 | mn.update(); 90 | } 91 | 92 | void rem_point(const row &point) { 93 | int card = mn.card(); 94 | double card_n = card - 1; 95 | cov_change(point, card / card_n, -card / (card_n * card_n), *this); 96 | mn.rem_point(point); 97 | mn.update(); 98 | } 99 | 100 | protected: 101 | cec::mean mn; 102 | 103 | covariance(mat initial, cec::mean mn) 104 | : mat(std::move(initial)), 105 | mn(std::move(mn)) {} 106 | 107 | inline void cov_change(const row &point, double cov_mul, 108 | double new_cov_point_mul, mat &dst) { 109 | for (int j = 0; j < n; j++) 110 | for (int k = 0; k < n; k++) 111 | dst[j][k] = (*this)[j][k] * cov_mul 112 | + (mn[j] - point[j]) 113 | * (mn[k] - point[k]) 114 | * new_cov_point_mul; 115 | } 116 | }; 117 | } 118 | #endif //CEC_COV_H 119 | -------------------------------------------------------------------------------- /src/r_params.cpp: -------------------------------------------------------------------------------- 1 | #include "r_params.h" 2 | #include "r_utils.h" 3 | 4 | namespace cec { 5 | namespace r { 6 | r_ext_ptr get_centers_param(SEXP centers_param_r) { 7 | r_wrapper r_par(centers_param_r); 8 | init_method im = parse_init_method(r_par["init.method"].get()); 9 | auto centers = (im == init_method::NONE) ? r_par["mat"].get>() : make_r_ext(0, 0); 10 | auto var_centers = r_par["var.centers"].get>>(); 11 | return make_r_ext(im, *centers, *var_centers); 12 | } 13 | 14 | r_ext_ptr get_control_param(SEXP control_param_r) { 15 | r_wrapper r_par(control_param_r); 16 | return make_r_ext( 17 | r_par["starts"].get(), 18 | r_par["max.iters"].get(), 19 | r_par["min.card"].get(), 20 | r_par["threads"].get() 21 | ); 22 | } 23 | 24 | r_ext_ptr get_models_param(SEXP models_param_r, int n) { 25 | r_wrapper r_models(models_param_r); 26 | int len = r_models.size(); 27 | auto specs = make_r_ext>>(); 28 | for (int i = 0; i < len; i++) { 29 | r_wrapper model_r = r_models[i]; 30 | model_type type = parse_model_type(model_r["type"].get()); 31 | r_wrapper params_r = model_r["params"]; 32 | switch (type) { 33 | case model_type::ALL: 34 | specs->push_back(make_shared(n)); 35 | break; 36 | case model_type::SPHERICAL: 37 | specs->push_back(make_shared(n)); 38 | break; 39 | case model_type::DIAGONAL: 40 | specs->push_back(make_shared(n)); 41 | break; 42 | case model_type::FIXED_R: 43 | specs->push_back(make_shared(n, params_r["r"].get())); 44 | break; 45 | case model_type::COVARIANCE: { 46 | auto cov = params_r["cov"].get>(); 47 | specs->push_back(make_shared(n, *cov)); 48 | break; 49 | } 50 | case model_type::EIGENVALUES: { 51 | auto evals = params_r["eigenvalues"].get>>(); 52 | specs->push_back(make_shared(n, *evals)); 53 | break; 54 | } 55 | case model_type::MEAN: { 56 | auto mean = params_r["mean"].get>>(); 57 | specs->push_back(make_shared(n, *mean)); 58 | } 59 | } 60 | } 61 | return make_r_ext(std::move(*specs)); 62 | } 63 | 64 | r_ext_ptr get_split_param(SEXP split_param_r) { 65 | r_wrapper r_par(split_param_r); 66 | return make_r_ext ( 67 | r_par["limit"].get(), 68 | r_par["depth"].get(), 69 | r_par["tries"].get(), 70 | r_par["initial.starts"].get() 71 | ); 72 | } 73 | } 74 | } 75 | 76 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # CLion IDE project 2 | #set(ENV{Path} "") 3 | cmake_minimum_required(VERSION 3.5) 4 | project(cec) 5 | 6 | set(LINUX_R_INCLUDE_PATH "/usr/share/R/include") 7 | set(LINUX_R "R") 8 | set(LINUX_CEC_LIB "/home/konrad/R/x86_64-pc-linux-gnu-library/3.4/CEC/libs/CEC.so") 9 | 10 | set(WINDOWS_R_INCLUDE_PATH "K:/R/R-3.4.3/include") 11 | set(WINDOWS_R_LINK_DIRECTORY_PATH "K:/R/R-3.4.3/bin/x64") 12 | set(WINDOWS_R "K:/R/R-3.4.3/bin/x64/R.exe") 13 | set(WINDOWS_CEC_LIB "K:/R/R-3.4.3/library/CEC/libs/x64/CEC.dll") 14 | 15 | if (WIN32) 16 | include_directories(${WINDOWS_R_INCLUDE_PATH}) 17 | link_directories(${WINDOWS_R_LINK_DIRECTORY_PATH}) 18 | link_libraries(m R Rlapack Rblas) 19 | set(R ${WINDOWS_R}) 20 | set(CEC_LIB ${WINDOWS_CEC_LIB}) 21 | else (WIN32) 22 | include_directories(${LINUX_R_INCLUDE_PATH}) 23 | link_libraries(m R lapack blas pthread) 24 | set(R ${LINUX_R}) 25 | set(CEC_LIB ${LINUX_CEC_LIB}) 26 | endif (WIN32) 27 | 28 | set(CMAKE_CXX_STANDARD 11) 29 | set(CMAKE_VERBOSE_MAKEFILE TRUE) 30 | 31 | set(SOURCE_FILES 32 | src/cec_r.cpp 33 | src/cec_r.h 34 | src/cec_starter.cpp 35 | src/cec_starter.h 36 | src/cluster.h 37 | src/common.h 38 | src/exceptions.h 39 | src/init.cpp 40 | src/init.h 41 | src/m.h 42 | src/models/all.h 43 | src/models/cov_utils.cpp 44 | src/models/cov_utils.h 45 | src/models/fixed_covariance.h 46 | src/models/diagonal.h 47 | src/models/fixed_eigenvalues.h 48 | src/models/fixed_radius.h 49 | src/models/fixed_mean.h 50 | src/models/model.h 51 | src/models/spherical.h 52 | src/parallel_starter.h 53 | src/params.cpp 54 | src/params.h 55 | src/r_ext_ptr.h 56 | src/r_params.cpp 57 | src/r_params.h 58 | src/r_result.cpp 59 | src/r_result.h 60 | src/r_utils.h 61 | src/random.cpp 62 | src/random.h 63 | src/split_starter.cpp 64 | src/split_starter.h 65 | src/starter.cpp 66 | src/starter.h 67 | src/variable_starter.cpp 68 | src/variable_starter.h 69 | src/vec.h) 70 | 71 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=core2") 72 | 73 | set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -pipe -std=c++11 -pedantic -Wall") 74 | 75 | set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg") 76 | set(CMAKE_EXE_LINKER_FLAGS_PROFILE "-pg") 77 | 78 | add_library(cec SHARED ${SOURCE_FILES}) 79 | #add_executable(cec_test ${SOURCE_FILES} src/test.cpp) 80 | 81 | set(MAKEVARS_PKG_LIBS "$(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)") 82 | 83 | file(WRITE src/Makevars "CXX_STD = CXX11" "\n") 84 | file(APPEND src/Makevars "PKG_LIBS = " ${MAKEVARS_PKG_LIBS} "\n") 85 | 86 | string(REPLACE ";" " " SOURCE_FILES_STRING "${SOURCE_FILES}") 87 | string(REPLACE "src/" "" SOURCE_FILES_STRING ${SOURCE_FILES_STRING}) 88 | string(REGEX REPLACE "[^ ]+\\.h" "" SOURCE_FILES_STRING ${SOURCE_FILES_STRING}) 89 | string(REPLACE ".cpp" ".o" SOURCE_FILES_STRING ${SOURCE_FILES_STRING}) 90 | string(REPLACE ".c" ".o" SOURCE_FILES_STRING ${SOURCE_FILES_STRING}) 91 | string(REGEX REPLACE " +" " " SOURCE_FILES_STRING ${SOURCE_FILES_STRING}) 92 | 93 | file(APPEND src/Makevars "OBJECTS = " ${SOURCE_FILES_STRING} "\n") 94 | 95 | add_custom_target(install_CEC 96 | COMMAND ${R} CMD INSTALL --clean ${CMAKE_SOURCE_DIR} 97 | ) 98 | 99 | add_custom_target(dist_lib 100 | COMMAND ${CMAKE_COMMAND} -E copy $ ${CEC_LIB} 101 | ) 102 | 103 | add_dependencies(dist_lib cec) 104 | -------------------------------------------------------------------------------- /src/exceptions.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_EXCEPTIONS_H 2 | #define CEC_EXCEPTIONS_H 3 | 4 | #include 5 | #include 6 | #include "common.h" 7 | #include "vec.h" 8 | #include "cluster.h" 9 | 10 | namespace cec { 11 | class cec_exception : public std::exception { 12 | public: 13 | const string &info() { 14 | return info_msg; 15 | } 16 | 17 | protected: 18 | string info_msg; 19 | 20 | explicit cec_exception(string info) 21 | : info_msg(std::move(info)) {} 22 | 23 | public: 24 | const char *what() const noexcept override { 25 | return "cec exception"; 26 | } 27 | }; 28 | 29 | class clustering_exception : public cec_exception { 30 | public: 31 | explicit clustering_exception(string info) 32 | : cec_exception(std::move(info)) {} 33 | 34 | const char *what() const noexcept override { 35 | return "clustering failed"; 36 | } 37 | }; 38 | 39 | class invalid_covariance : public clustering_exception { 40 | public: 41 | explicit invalid_covariance(mat cov) 42 | : clustering_exception("invalid covariance: probably not positive definite"), 43 | cov(std::move(cov)) {} 44 | 45 | const mat &covariance() const { 46 | return cov; 47 | } 48 | 49 | private: 50 | mat cov; 51 | }; 52 | 53 | 54 | class all_clusters_removed : public clustering_exception { 55 | public: 56 | all_clusters_removed() 57 | : clustering_exception("all clusters have been removed") {} 58 | }; 59 | 60 | class not_implemented : public cec_exception { 61 | public: 62 | explicit not_implemented(const string &info) 63 | : cec_exception(info) {} 64 | 65 | const char *what() const noexcept override { 66 | return "not implemented"; 67 | } 68 | }; 69 | 70 | class invalid_init_method : public cec_exception { 71 | public: 72 | explicit invalid_init_method(string info) 73 | : cec_exception(std::move(info)) {} 74 | 75 | const char *what() const noexcept override { 76 | return "invalid center method"; 77 | } 78 | }; 79 | 80 | class invalid_model_name : public cec_exception { 81 | public: 82 | explicit invalid_model_name(string name) 83 | : cec_exception(std::move(name)) {} 84 | 85 | const char *what() const noexcept override { 86 | return "invalid model name"; 87 | } 88 | }; 89 | 90 | class missing_parameter : public cec_exception { 91 | public: 92 | explicit missing_parameter(std::string name) 93 | : cec_exception(std::move(name)) {} 94 | 95 | const char *what() const noexcept override { 96 | return "missing parameter"; 97 | } 98 | }; 99 | 100 | class invalid_parameter_type : public cec_exception { 101 | public: 102 | explicit invalid_parameter_type(string name) 103 | : cec_exception(std::move(name)) {} 104 | 105 | const char *what() const noexcept override { 106 | return "invalid parameter type"; 107 | } 108 | }; 109 | 110 | class invalid_model_parameter : public cec_exception { 111 | public: 112 | explicit invalid_model_parameter(string desc) 113 | : cec_exception(std::move(desc)) {} 114 | 115 | const char *what() const noexcept override { 116 | return "invalid model parameter"; 117 | } 118 | }; 119 | } 120 | #endif //CEC_EXCEPTIONS_H 121 | -------------------------------------------------------------------------------- /R/cec.params.R: -------------------------------------------------------------------------------- 1 | # maps clustering type to int 2 | resolve.type <- function(type) 3 | { 4 | types <- c("covariance", "fixedr", "spherical", "diagonal", "eigenvalues", "mean", "all") 5 | match.arg(type, types) 6 | } 7 | 8 | # prepares clustering parameters for C function 9 | create.cec.params.for.models <- function(k, n, type.arg, param.arg) 10 | { 11 | models <- replicate(k, list()) 12 | types <- vapply(type.arg, resolve.type, "") 13 | 14 | params <- NULL 15 | if (hasArg(param.arg)) 16 | params <- param.arg 17 | if (length(types) == 1) { 18 | types <- rep(types, k) 19 | if (hasArg(param.arg)) { 20 | params <- rep(list(unlist(param.arg)), k) 21 | params <- params[!params %in% list(NULL, NA)] 22 | } 23 | } 24 | 25 | if (k != length(types)) 26 | stop("Illegal argument: illegal length of \"type\" vector.") 27 | 28 | idx <- 0 29 | 30 | for (i in 1:length(types)) 31 | { 32 | type = types[i] 33 | models[[i]]$type = type 34 | models[[i]]$params = list() 35 | if (type == resolve.type("covariance")) 36 | { 37 | idx <- idx + 1 38 | 39 | if (length(params) < idx) 40 | stop("Illegal argument: illegal param length.") 41 | 42 | cov <- params[[idx]] 43 | 44 | if (!is.array(cov)) stop("Illegal argument: illegal parameter for \"covariance\" type.") 45 | if (ncol(cov) != n) stop("Illegal argument: illegal parameter for \"covariance\" type.") 46 | if (nrow(cov) != n) stop("Illegal argument: illegal parameter for \"covariance\" type.") 47 | 48 | if (!try.chol(cov)) 49 | stop("Illegal argument: illegal parameter for \"covariance\" type - matrix must be positive-definite.") 50 | 51 | cov.inv = solve(cov) 52 | models[[i]]$params <- list(cov = cov, cov.inv = cov.inv) 53 | } 54 | else if (type == resolve.type("fixed")) 55 | { 56 | idx <- idx + 1 57 | 58 | if (length(params) < idx) 59 | stop("Illegal argument: illegal param length.") 60 | 61 | r = params[[idx]] 62 | if (length(r) != 1) stop("Illegal argument: illegal parameter for \"fixedr\" type.") 63 | if (!is.numeric(r)) stop("Illegal argument: illegal parameter for \"fixedr\" type.") 64 | if (!r > 0) stop("Illegal argument: illegal parameter for \"fixedr\" type.") 65 | models[[i]]$params = list(r = r) 66 | } 67 | else if (type == resolve.type("eigenvalues")) 68 | { 69 | idx <- idx + 1 70 | 71 | if (length(params) < idx) 72 | stop("Illegal argument: illegal param length.") 73 | 74 | evals <- params[[idx]] 75 | 76 | if (length(evals) != n) stop("Illegal argument: illegal parameter for \"eigenvalues\" type: invalid length.") 77 | if (!all(evals != 0)) stop("Illegal argument: illegal parameter for \"eigenvalues\" type: all values must be greater than 0.") 78 | models[[i]]$params = list(eigenvalues = sort(evals)) 79 | } 80 | else if (type == resolve.type("mean")) 81 | { 82 | idx <- idx + 1 83 | 84 | if (length(params) < idx) 85 | stop("Illegal argument: illegal param length.") 86 | 87 | mean <- params[[idx]] 88 | 89 | if (length(mean) != n) stop("Illegal argument: illegal parameter for \"mean\" type: invalid length.") 90 | models[[i]]$params = list(mean = mean) 91 | } 92 | } 93 | models 94 | } 95 | 96 | try.chol <- function(mat) 97 | { 98 | ifelse("try-error" %in% class(try(chol(mat), silent=TRUE)), F, T) 99 | } 100 | -------------------------------------------------------------------------------- /src/split_starter.cpp: -------------------------------------------------------------------------------- 1 | #include "split_starter.h" 2 | #include "exceptions.h" 3 | 4 | namespace cec { 5 | unique_ptr 6 | split_starter::try_split_cluster(const mat &x_mat) { 7 | try { 8 | const unique_ptr &single_res 9 | = cec.start(x_mat, vector(x_mat.m, 0), 10 | model_spec::create_models(m_spec)); 11 | 12 | if (!single_res) 13 | return unique_ptr(); 14 | 15 | unique_ptr split_res = splitter.start(clustering_input(x_mat, try_split_models)); 16 | 17 | if (split_res && split_res->cluster_number == 2 && 18 | split_res->energy < single_res->energy) 19 | return split_res; 20 | 21 | return unique_ptr(); 22 | } catch (clustering_exception &ce) { 23 | return unique_ptr(); 24 | } 25 | } 26 | 27 | unique_ptr 28 | split_starter::start(const unique_ptr &cl_res, 29 | const clustering_input &input_params) { 30 | 31 | if (cl_res->cluster_number >= max_k) 32 | return make_unique(*cl_res); 33 | 34 | const mat &x = input_params.x; 35 | int n = x.n; 36 | int k = cl_res->centers.m; 37 | vector cluster(x.m); 38 | vector moved(k, true); 39 | 40 | auto current_res = make_unique(*cl_res); 41 | 42 | for (int split_level = 0; split_level < max_depth; split_level++) { 43 | cluster = current_res->assignment; 44 | k = current_res->centers.m; 45 | const vector &split = points_split::split_points(x, cluster, k); 46 | mat split_res_c_mat = mat(2, n); 47 | int k_s = 0; 48 | int next_k = std::min(max_k, k * 2); 49 | vector is_split(next_k); 50 | bool split_flag = false; 51 | 52 | for (int i = 0; i < k; i++) { 53 | if (k_s >= max_k) 54 | break; 55 | int split_m = split[i].points().m; 56 | if (split_m == 0) 57 | continue; 58 | bool split_success = false; 59 | 60 | const mat &split_x_mat = split[i].points(); 61 | const vector mapping = split[i].mapping(); 62 | vector split_assignment(split_x_mat.m); 63 | if (moved[i]) { 64 | auto const &split_res = try_split_cluster(split_x_mat); 65 | if (split_res) { 66 | split_success = true; 67 | split_res_c_mat = split_res->centers; 68 | split_assignment = split_res->assignment; 69 | } 70 | } 71 | split_flag = split_flag || split_success; 72 | if (k_s == max_k - 1) 73 | split_success = false; 74 | 75 | if (split_success) { 76 | is_split[k_s] = true; 77 | is_split[k_s + 1] = true; 78 | 79 | for (int p = 0; p < split_m; p++) { 80 | if (split_assignment[p] == 0) 81 | cluster[mapping[p]] = k_s; 82 | else 83 | cluster[mapping[p]] = k_s + 1; 84 | } 85 | k_s += 2; 86 | 87 | } else { 88 | is_split[k_s] = false; 89 | for (int p = 0; p < split_m; p++) 90 | cluster[mapping[p]] = k_s; 91 | k_s++; 92 | } 93 | } 94 | 95 | bool need_another_split = false; 96 | if (split_flag) { 97 | try { 98 | current_res = cec.start(x, cluster, model_spec::create_models(m_spec, k_s)); 99 | } catch (clustering_exception &ce) { 100 | return current_res; 101 | } 102 | 103 | for (int i = 0; i < x.m; i++) { 104 | if (cluster[i] != current_res->assignment[i]) { 105 | moved[cluster[i]] = true; 106 | moved[current_res->assignment[i]] = true; 107 | } 108 | } 109 | for (int i = 0; i < k_s; i++) { 110 | if (is_split[i]) 111 | moved[i] = true; 112 | need_another_split = (need_another_split || moved[i]); 113 | } 114 | } 115 | if (!need_another_split) 116 | break; 117 | } 118 | return current_res; 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /R/tests.R: -------------------------------------------------------------------------------- 1 | run.cec.tests <- function() 2 | { 3 | errors <- 0 4 | 5 | # test files 6 | tests <- list.files(system.file("cec_tests", package="CEC")) 7 | for (test in tests) 8 | { 9 | if (grepl(".R", test, perl=T)) 10 | { 11 | testenv <- new.env() 12 | local( 13 | { 14 | # just to trick R CMD check... 15 | testname <- NULL 16 | setup <- NULL 17 | }, 18 | testenv 19 | ) 20 | source(system.file("cec_tests", test, package="CEC"), local=testenv) 21 | errors <- errors + local( 22 | { 23 | local.errors <- 0 24 | cat(paste("Test:",testname, "\n")) 25 | fs <- lsf.str() 26 | 27 | # execute setup function if exists 28 | if ("setup" %in% fs) eval(expr=body(setup), envir=testenv) 29 | for (fn in fs) 30 | { 31 | # test cases 32 | if (grepl("test.", fn)) 33 | { 34 | cat(paste("---- ", fn)) 35 | fbody = body(eval(parse(text=fn))) 36 | 37 | # evaluate test case function and catch (and count) errors 38 | local.errors <- local.errors + tryCatch( 39 | { 40 | eval(expr=fbody, envir=testenv) 41 | cat(": OK\n") 42 | 0 43 | }, 44 | error = function(er) { 45 | cat(": FAILED\n") 46 | warning(er$message, immediate.=T, call.=F) 47 | 1 48 | } 49 | ) 50 | } 51 | } 52 | local.errors 53 | }, 54 | envir=testenv) 55 | } 56 | } 57 | if (errors > 0) 58 | { 59 | stop("One or more tests failed.") 60 | } 61 | } 62 | printmsg <- function(msg) 63 | { 64 | if (!is.null(msg)) 65 | paste(msg, ":") 66 | else "" 67 | } 68 | 69 | checkNumericVectorEquals <- function(ex, ac, msg=NULL, tolerance = .Machine$double.eps ^ 0.5) 70 | { 71 | if (length(ex) != length(ac)) stop (paste(printmsg(msg),"Vectors have different length.")) 72 | for (i in seq(1, length(ex))) 73 | if (!isTRUE(all.equal.numeric(ex[i], ac[i], tolerance=tolerance))) 74 | stop (paste(printmsg(msg), "Vectors differ at index:", i, ", expected:", ex[i], ", actuall:",ac[i])) 75 | 76 | } 77 | 78 | checkNumericEquals <- function(ex, ac, msg=NULL, tolerance = .Machine$double.eps ^ 0.5) 79 | { 80 | if(!is.numeric(ex)) stop(paste(printmsg(msg), "Expression:",ex,"is not numeric type.")) 81 | if(!is.numeric(ac)) stop(paste(printmsg(msg), "Expression:",ac,"is not numeric type.")) 82 | if (!isTRUE(all.equal.numeric(ex, ac, tolerance=tolerance))) 83 | stop (paste(printmsg(msg), "Numeric values are different: expected:", ex, ", actuall:",ac, ", difference:", abs(ex - ac))) 84 | } 85 | 86 | checkEquals <- function(ex, ac, msg=NULL) 87 | { 88 | if (!isTRUE(identical(ex, ac))) 89 | stop (paste(printmsg(msg), "Values are not identical: expected:", ex, ", actuall:",ac)) 90 | } 91 | 92 | checkTrue <- function(exp, msg=NULL) 93 | { 94 | if (!is.logical(exp)) 95 | { 96 | stop(paste(printmsg(msg), "Expression has not logical type.")) 97 | } 98 | if (!isTRUE(exp)) 99 | { 100 | stop(paste(printmsg(msg), "Expression is not TRUE.")) 101 | } 102 | } 103 | 104 | checkNumericMatrixEquals <- function(ex, ac, msg=NULL, tolerance = .Machine$double.eps ^ 0.5) 105 | { 106 | 107 | if (nrow(ex) != nrow(ac)) stop (paste(printmsg(msg),"Matrices have different dimensions.")) 108 | if (ncol(ex) != ncol(ac)) stop (paste(printmsg(msg),"Matrices have different dimensions.")) 109 | 110 | for (i in seq(1, nrow(ex))) 111 | for (j in seq(1, ncol(ex))) 112 | if (!isTRUE(all.equal.numeric(ex[i, j], ac[i, j], tolerance=tolerance))) 113 | stop (paste(printmsg(msg), "Matrices differ at row:", i, " col:", j, ": expected:", ex[i, j], ", actuall:",ac[i, j])) 114 | 115 | } 116 | 117 | # Maximum likelihood estimate of covariance matrix. 118 | cov.mle <- function(M) 119 | { 120 | mean <- colMeans(M) 121 | mat <- matrix(0,ncol(M),ncol(M)) 122 | for (i in seq(1, nrow(M))) 123 | { 124 | v <- M[i,] 125 | mat <- mat + (t(t(v - mean)) %*% t(v - mean)) 126 | } 127 | mat <- mat / nrow(M) 128 | mat 129 | } 130 | -------------------------------------------------------------------------------- /src/starter.cpp: -------------------------------------------------------------------------------- 1 | #include "starter.h" 2 | #include "cov.h" 3 | #include "cluster.h" 4 | #include "exceptions.h" 5 | 6 | std::unique_ptr 7 | cec::cross_entropy_clustering::start(const mat &x, const vector &initial_assignment, 8 | const vector> &models) { 9 | 10 | int m = x.m; 11 | int k = models.size(); 12 | int n = x.n; 13 | int min_card = params.min_card; 14 | int max_iter = params.max_iter; 15 | vector assignment = initial_assignment; 16 | double energy_sum = 0; 17 | 18 | vector> clusters(k); 19 | vector split = points_split::split_points(x, assignment, k); 20 | 21 | for (int i = 0; i < k; i++) { 22 | const mat &cluster_split = split[i].points(); 23 | if (cluster_split.m >= min_card) { 24 | covariance cov = covariance::estimate(cluster_split); 25 | clusters[i].reset(new cluster(*models[i], cov, m)); 26 | } 27 | } 28 | 29 | for (int i = 0; i < k; i++) { 30 | if (!clusters[i]) 31 | continue; 32 | 33 | double energy = clusters[i]->energy(); 34 | 35 | if (m::isnan(energy)) 36 | throw invalid_covariance(clusters[i]->covariance()); 37 | 38 | energy_sum += energy; 39 | } 40 | 41 | int removed_after_assignment = std::count(clusters.begin(), clusters.end(), 42 | unique_ptr()); 43 | if (removed_after_assignment == k) 44 | throw all_clusters_removed(); 45 | 46 | bool handle_removed_flag = removed_after_assignment != 0; 47 | 48 | int iter = (handle_removed_flag ? -1 : 0); 49 | for (; iter < max_iter; iter++) { 50 | bool transfer_flag = false; 51 | bool removed_last_iteration_flag = false; 52 | 53 | for (int i = 0; i < m; i++) { 54 | const int cl_num = assignment[i]; 55 | unique_ptr &cl_src = clusters[cl_num]; 56 | 57 | if (handle_removed_flag && cl_src) 58 | continue; 59 | 60 | double rem_energy_gain = cl_src ? cl_src->rem_point(x[i]) : 0; 61 | double best_gain = cl_src ? 0 : m::INF; 62 | 63 | if (m::isnan(rem_energy_gain)) 64 | throw invalid_covariance(cl_src->covariance()); 65 | 66 | int dst_cl_num = -1; 67 | 68 | for (int j = 0; j < k; j++) { 69 | if (j == cl_num || !clusters[j]) 70 | continue; 71 | 72 | cluster &cl_dst = *clusters[j]; 73 | double add_energy_gain = cl_dst.add_point(x[i]); 74 | 75 | if (m::isnan(add_energy_gain)) 76 | throw invalid_covariance(cl_dst.covariance()); 77 | 78 | double gain = add_energy_gain + rem_energy_gain; 79 | 80 | if (gain < best_gain) { 81 | dst_cl_num = j; 82 | best_gain = gain; 83 | } 84 | } 85 | 86 | if (dst_cl_num != -1) { 87 | assignment[i] = dst_cl_num; 88 | clusters[dst_cl_num]->apply_change(); 89 | if (cl_src) { 90 | cl_src->apply_change(); 91 | if (cl_src->card() < min_card) { 92 | removed_last_iteration_flag = true; 93 | energy_sum -= cl_src->energy(); 94 | clusters[cl_num].reset(nullptr); 95 | } 96 | } 97 | energy_sum += best_gain; 98 | transfer_flag = true; 99 | } 100 | } 101 | 102 | if (!transfer_flag) 103 | break; 104 | 105 | if (removed_last_iteration_flag) { 106 | handle_removed_flag = true; 107 | iter--; 108 | } else 109 | handle_removed_flag = false; 110 | } 111 | 112 | mat centers(k, n); 113 | vector cov_mats(k, mat(n, n)); 114 | 115 | for (int i = 0; i < k; i++) { 116 | if (!clusters[i]) { 117 | centers[i].fill(m::QNAN); 118 | cov_mats[i].fill(m::QNAN); 119 | continue; 120 | } 121 | centers[i] = clusters[i]->mean(); 122 | cov_mats[i] = clusters[i]->covariance(); 123 | } 124 | int final_k = k - std::count(clusters.begin(), clusters.end(), unique_ptr()); 125 | return unique_ptr( 126 | new clustering_results(centers, assignment, final_k, iter + 1, energy_sum, cov_mats)); 127 | } 128 | 129 | std::vector 130 | cec::points_split::split_points(const mat &points, const vector &assignment, int k) { 131 | vector sizes(k, 0); 132 | vector indices(k, 0); 133 | for (auto &&cl : assignment) 134 | sizes[cl]++; 135 | vector split; 136 | for (auto &&size : sizes) { 137 | split.emplace_back(mat(size, points.n), vector(size)); 138 | } 139 | int m = points.m; 140 | for (int i = 0; i < m; i++) { 141 | int cl = assignment[i]; 142 | int idx = indices[cl]; 143 | split[cl].pts[idx] = points[i]; 144 | split[cl].map[idx] = i; 145 | indices[cl]++; 146 | } 147 | return split; 148 | } 149 | -------------------------------------------------------------------------------- /src/r_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_R_UTILS_H 2 | #define CEC_R_UTILS_H 3 | 4 | #include "vec.h" 5 | #include "exceptions.h" 6 | #include "r_ext_ptr.h" 7 | 8 | #include 9 | #include 10 | 11 | namespace cec { 12 | 13 | namespace r { 14 | template 15 | inline T get(SEXP sexp); 16 | 17 | template<> 18 | inline const char* get(SEXP sexp) { 19 | if (!isString(sexp)) 20 | throw invalid_parameter_type("string vector"); 21 | return CHAR(STRING_ELT(sexp, 0)); 22 | } 23 | 24 | template<> 25 | inline int get(SEXP sexp) { 26 | if (TYPEOF(sexp) != INTSXP || LENGTH(sexp) != 1) 27 | throw invalid_parameter_type("single integer"); 28 | return INTEGER(sexp)[0]; 29 | } 30 | 31 | template<> 32 | inline double get(SEXP sexp) { 33 | if (TYPEOF(sexp) != REALSXP || LENGTH(sexp) != 1) 34 | throw invalid_parameter_type("single real"); 35 | return REAL(sexp)[0]; 36 | } 37 | 38 | template<> 39 | inline r_ext_ptr get(SEXP sexp) { 40 | if (!isMatrix(sexp)) 41 | throw invalid_parameter_type("matrix"); 42 | int m = Rf_nrows(sexp); 43 | int n = Rf_ncols(sexp); 44 | double *r_ma_data = REAL(sexp); 45 | auto ma = make_r_ext(m, n); 46 | for (int i = 0; i < m; i++) 47 | for (int j = 0; j < n; j++) 48 | (*ma)[i][j] = r_ma_data[j * m + i]; 49 | 50 | return ma; 51 | } 52 | 53 | template<> 54 | inline r_ext_ptr> get(SEXP sexp) { 55 | if (TYPEOF(sexp) != REALSXP) 56 | throw invalid_parameter_type("real vector"); 57 | return make_r_ext>(REAL(sexp), REAL(sexp) + LENGTH(sexp)); 58 | } 59 | 60 | template<> 61 | inline r_ext_ptr> get(SEXP sexp) { 62 | if (TYPEOF(sexp) != INTSXP) 63 | throw invalid_parameter_type("integer vector"); 64 | return make_r_ext>(INTEGER(sexp), INTEGER(sexp) + LENGTH(sexp)); 65 | } 66 | 67 | inline SEXP put(const mat &ma) { 68 | int m = ma.m; 69 | int n = ma.n; 70 | const double *m_data = ma.data(); 71 | 72 | SEXP r_ma; 73 | 74 | PROTECT(r_ma = allocMatrix(REALSXP, m, n)); 75 | 76 | double *r_data = REAL(r_ma); 77 | for (int i = 0; i < m; i++) 78 | for (int j = 0; j < n; j++) 79 | r_data[j * m + i] = m_data[i * n + j]; 80 | 81 | UNPROTECT(1); 82 | 83 | return r_ma; 84 | } 85 | 86 | inline SEXP put(int val) { 87 | SEXP ve; 88 | PROTECT(ve = allocVector(INTSXP, 1)); 89 | INTEGER(ve)[0] = val; 90 | UNPROTECT(1); 91 | 92 | return ve; 93 | } 94 | 95 | inline SEXP put(double val) { 96 | SEXP ve; 97 | PROTECT(ve = allocVector(REALSXP, 1)); 98 | REAL(ve)[0] = val; 99 | UNPROTECT(1); 100 | 101 | return ve; 102 | } 103 | 104 | inline SEXP put(vector val) { 105 | SEXP ve; 106 | PROTECT(ve = allocVector(INTSXP, val.size())); 107 | std::copy(val.begin(), val.end(), INTEGER(ve)); 108 | UNPROTECT(1); 109 | 110 | return ve; 111 | } 112 | 113 | class r_wrapper { 114 | public: 115 | explicit r_wrapper(SEXP sexp) 116 | : sexp(sexp) {} 117 | 118 | int size() { 119 | return LENGTH(sexp); 120 | } 121 | 122 | r_wrapper operator[](const char *name) { 123 | return r_wrapper(get_named(sexp, name)); 124 | } 125 | 126 | r_wrapper operator[](const int idx) { 127 | return r_wrapper(get_n(sexp, idx)); 128 | } 129 | 130 | template 131 | T get() { 132 | return cec::r::get(sexp); 133 | } 134 | 135 | private: 136 | SEXP sexp; 137 | 138 | SEXP get_named(SEXP list, const char *name) { 139 | SEXP elementNames = GET_NAMES(list); 140 | if (!isString(elementNames)) 141 | throw invalid_parameter_type("named elements"); 142 | int len = LENGTH(elementNames); 143 | for (int i = 0; i < len; i++) { 144 | if (strcmp(name, CHAR(STRING_ELT(elementNames, i)))) 145 | continue; 146 | SEXP res = VECTOR_ELT(list, i); 147 | if (!res || isNull(res)) 148 | break; 149 | return res; 150 | } 151 | throw missing_parameter(name); 152 | } 153 | 154 | SEXP get_n(SEXP vec, int idx) { 155 | int len = LENGTH(vec); 156 | if (idx >= len) 157 | throw missing_parameter("out of range: " + std::to_string(idx)); 158 | SEXP el = VECTOR_ELT(vec, idx); 159 | return el; 160 | } 161 | }; 162 | } 163 | } 164 | #endif /* CEC_R_UTILS_H */ 165 | 166 | -------------------------------------------------------------------------------- /src/params.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_PARAMS_H 2 | #define CEC_PARAMS_H 3 | 4 | #include "vec.h" 5 | #include "models/model.h" 6 | #include "init.h" 7 | #include "common.h" 8 | 9 | namespace cec { 10 | enum class init_method { 11 | NONE, 12 | KMEANSPP, 13 | RANDOM 14 | }; 15 | 16 | enum class model_type { 17 | ALL, 18 | COVARIANCE, 19 | DIAGONAL, 20 | EIGENVALUES, 21 | FIXED_R, 22 | MEAN, 23 | SPHERICAL 24 | }; 25 | 26 | init_method parse_init_method(const string &method); 27 | 28 | model_type parse_model_type(const string &name); 29 | 30 | class centers_param { 31 | public: 32 | const init_method init_m; 33 | const mat centers_mat; 34 | const vector var_centers; 35 | 36 | centers_param(init_method init_m, mat centers_mat, vector var_centers) 37 | : init_m(init_m), 38 | centers_mat(std::move(centers_mat)), 39 | var_centers(std::move(var_centers)) {} 40 | 41 | shared_ptr get_centers_init(); 42 | }; 43 | 44 | class control_param { 45 | public: 46 | int starts; 47 | int max_iter; 48 | int min_card; 49 | int threads; 50 | 51 | control_param(int starts, int max_iter, int min_card, int threads) 52 | : starts(starts), 53 | max_iter(max_iter), 54 | min_card(min_card), 55 | threads(threads) {} 56 | }; 57 | 58 | class split_param { 59 | public: 60 | int max_k; 61 | int max_depth; 62 | int tries; 63 | int initial_starts; 64 | 65 | split_param(int max_k, int max_depth, int tries, int initial_starts) 66 | : max_k(max_k), 67 | max_depth(max_depth), 68 | tries(tries), 69 | initial_starts(initial_starts) {} 70 | }; 71 | 72 | class model_spec { 73 | public: 74 | const model_type type; 75 | 76 | explicit model_spec(const model_type type) 77 | : type(type) {} 78 | 79 | virtual ~model_spec() = default; 80 | 81 | virtual unique_ptr create_model() const = 0; 82 | 83 | static vector> create_models(vector> specs); 84 | static vector> create_models(const model_spec &spec, int n = 1); 85 | }; 86 | 87 | 88 | class model_all_spec : public model_spec { 89 | public: 90 | const int n; 91 | 92 | explicit model_all_spec(int n) 93 | : model_spec(model_type::ALL), 94 | n(n) {} 95 | 96 | unique_ptr create_model() const override; 97 | }; 98 | 99 | class model_spherical_spec : public model_spec { 100 | public: 101 | const int n; 102 | 103 | explicit model_spherical_spec(int n) 104 | : model_spec(model_type::ALL), 105 | n(n) {} 106 | 107 | unique_ptr create_model() const override; 108 | }; 109 | 110 | class model_diagonal_spec : public model_spec { 111 | public: 112 | const int n; 113 | 114 | explicit model_diagonal_spec(int n) 115 | : model_spec(model_type::ALL), 116 | n(n) {} 117 | 118 | unique_ptr create_model() const override; 119 | }; 120 | 121 | class model_fixed_radius_spec : public model_spec { 122 | public: 123 | const int n; 124 | const double r; 125 | 126 | explicit model_fixed_radius_spec(int n, double r) 127 | : model_spec(model_type::FIXED_R), 128 | n(n), 129 | r(r) {} 130 | 131 | unique_ptr create_model() const override; 132 | }; 133 | 134 | class model_covariance_spec : public model_spec { 135 | public: 136 | const int n; 137 | const mat g_cov; 138 | 139 | explicit model_covariance_spec(int n, mat g_cov) 140 | : model_spec(model_type::FIXED_R), 141 | n(n), 142 | g_cov(std::move(g_cov)) {} 143 | 144 | unique_ptr create_model() const override; 145 | }; 146 | 147 | class model_eigenvalues_spec : public model_spec { 148 | public: 149 | const int n; 150 | const vector values; 151 | 152 | explicit model_eigenvalues_spec(int n, vector values) 153 | : model_spec(model_type::EIGENVALUES), 154 | n(n), 155 | values(std::move(values)) {} 156 | 157 | unique_ptr create_model() const override; 158 | }; 159 | 160 | class model_mean_spec : public model_spec { 161 | public: 162 | const int n; 163 | const vector mean; 164 | 165 | explicit model_mean_spec(int n, vector mean) 166 | : model_spec(model_type::MEAN), 167 | n(n), 168 | mean(std::move(mean)) {} 169 | 170 | unique_ptr create_model() const override; 171 | }; 172 | 173 | class models_param { 174 | public: 175 | const vector> specs; 176 | 177 | explicit models_param(vector> specs) 178 | : specs(std::move(specs)) {} 179 | }; 180 | 181 | 182 | } 183 | #endif //CEC_PARAMS_H 184 | -------------------------------------------------------------------------------- /src/cec_r.cpp: -------------------------------------------------------------------------------- 1 | #include "params.h" 2 | #include "starter.h" 3 | #include "cec_starter.h" 4 | #include "variable_starter.h" 5 | 6 | #include "r_result.h" 7 | #include "r_utils.h" 8 | #include "r_params.h" 9 | #include "cec_r.h" 10 | 11 | #include 12 | 13 | using namespace cec; 14 | using namespace cec::r; 15 | using std::exception; 16 | 17 | static void seed_from_r() { 18 | GetRNGstate(); 19 | double r = unif_rand(); 20 | PutRNGstate(); 21 | unsigned long seed; 22 | memcpy(&seed, &r, sizeof(seed)); 23 | random::set_seed(seed); 24 | } 25 | 26 | extern "C" 27 | SEXP cec_r(SEXP x_r, SEXP centers_param_r, SEXP control_param_r, SEXP models_param_r) { 28 | seed_from_r(); 29 | const char *ex_what = nullptr; 30 | r_ext_ptr start_results; 31 | try { 32 | auto x = get>(x_r); 33 | int n = x->n; 34 | 35 | r_ext_ptr centers_par = get_centers_param(centers_param_r); 36 | r_ext_ptr control_par = get_control_param(control_param_r); 37 | r_ext_ptr models_par = get_models_param(models_param_r, n); 38 | 39 | const shared_ptr ¢ers_init_ptr = centers_par->get_centers_init(); 40 | const centers_init_spec &init_spec = *centers_init_ptr; 41 | 42 | cec_starter::parameters starter_params({control_par->max_iter, control_par->min_card}, init_spec); 43 | 44 | parallel_starter ps(control_par->threads, control_par->starts); 45 | 46 | auto cl_function = [&](const mat &x, const vector> &specs) { 47 | multiple_starts_task task(starter_params, x, specs); 48 | return ps.start(task); 49 | }; 50 | 51 | variable_starter var_start(std::move(cl_function), centers_par->var_centers); 52 | 53 | unique_ptr results = var_start.start(*x, models_par->specs); 54 | 55 | if (!results) 56 | throw clustering_exception("all starts failed"); 57 | 58 | start_results.reset(results.release()); 59 | 60 | } catch (exception &ex) { 61 | ex_what = ex.what(); 62 | } 63 | 64 | if (ex_what) 65 | error(ex_what); 66 | 67 | try { 68 | SEXP r_res; 69 | PROTECT(r_res = create_R_result(*start_results)); 70 | UNPROTECT(1); 71 | return r_res; 72 | } catch (std::exception &ex) { 73 | ex_what = ex.what(); 74 | } 75 | error(ex_what); 76 | } 77 | 78 | extern "C" 79 | SEXP cec_split_r(SEXP x_r, SEXP centers_param_r, SEXP control_param_r, SEXP models_param_r, 80 | SEXP split_param_r) { 81 | seed_from_r(); 82 | const char *ex_what = nullptr; 83 | r_ext_ptr start_results; 84 | try { 85 | auto x = get>(x_r); 86 | int n = x->n; 87 | 88 | r_ext_ptr centers_par = get_centers_param(centers_param_r); 89 | r_ext_ptr control_par = get_control_param(control_param_r); 90 | r_ext_ptr models_par = get_models_param(models_param_r, n); 91 | r_ext_ptr split_par = get_split_param(split_param_r); 92 | 93 | const shared_ptr ¢ers_init_ptr = centers_par->get_centers_init(); 94 | const centers_init_spec &init_spec = *centers_init_ptr; 95 | 96 | cec_parameters start_params(control_par->max_iter, control_par->min_card); 97 | 98 | cec_starter::parameters initial_cl_params(start_params, init_spec, 99 | split_par->initial_starts); 100 | 101 | split_starter::parameters split_params(start_params, *models_par->specs[0], init_spec, 102 | split_par->tries, split_par->max_k, 103 | split_par->max_depth); 104 | 105 | parallel_starter ps(control_par->threads, control_par->starts); 106 | 107 | auto cl_function = [&](const mat &x, const vector> &specs) { 108 | start_and_split_task task(initial_cl_params, split_params, x, specs); 109 | return ps.start(task); 110 | }; 111 | 112 | variable_starter var_start(cl_function, centers_par->var_centers); 113 | 114 | unique_ptr results = var_start.start(*x, models_par->specs); 115 | 116 | start_results.reset(results.release()); 117 | 118 | } catch (exception &ex) { 119 | ex_what = ex.what(); 120 | } 121 | 122 | if (ex_what) 123 | error(ex_what); 124 | 125 | try { 126 | SEXP r_res; 127 | PROTECT(r_res = create_R_result(*start_results)); 128 | UNPROTECT(1); 129 | return r_res; 130 | } catch (std::exception &ex) { 131 | ex_what = ex.what(); 132 | } 133 | error(ex_what); 134 | } 135 | 136 | extern "C" 137 | SEXP cec_init_centers_r(SEXP x_r, SEXP k_r, SEXP method_r) { 138 | seed_from_r(); 139 | const char *ex_what = nullptr; 140 | r_ext_ptr res; 141 | try { 142 | auto x = get>(x_r); 143 | int k = get(k_r); 144 | init_method im = parse_init_method(get(method_r)); 145 | switch (im) { 146 | case init_method::KMEANSPP: 147 | res.init(kmeanspp_init().init(*x, k)); 148 | break; 149 | case init_method::RANDOM: 150 | res.init(random_init().init(*x, k)); 151 | break; 152 | default: 153 | throw invalid_init_method("invalid init method"); 154 | } 155 | } catch (exception &ex) { 156 | ex_what = ex.what(); 157 | } 158 | 159 | if (ex_what) 160 | error(ex_what); 161 | 162 | try { 163 | SEXP r_res = put(*res); 164 | return r_res; 165 | } catch (exception &ex) { 166 | ex_what = ex.what(); 167 | error(ex_what); 168 | } 169 | } 170 | 171 | R_CallMethodDef methods[] = { 172 | {"cec_r", (DL_FUNC) &cec_r, 4}, 173 | {"cec_split_r", (DL_FUNC) &cec_split_r, 5}, 174 | {"cec_init_centers_r", (DL_FUNC) &cec_init_centers_r, 3}, 175 | {NULL, NULL, 0} 176 | }; 177 | 178 | extern "C" 179 | void R_init_CEC(DllInfo *dllInfo) { 180 | R_registerRoutines(dllInfo, NULL, methods, NULL, NULL); 181 | R_useDynamicSymbols(dllInfo, TRUE); 182 | } 183 | -------------------------------------------------------------------------------- /src/parallel_starter.h: -------------------------------------------------------------------------------- 1 | #ifndef CEC_PARALLEL_STARTER_H 2 | #define CEC_PARALLEL_STARTER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "starter.h" 8 | #include "cec_starter.h" 9 | #include "split_starter.h" 10 | #include "exceptions.h" 11 | 12 | namespace cec { 13 | 14 | class unique_models_input { 15 | public: 16 | unique_models_input(const mat &x, vector> &&models) 17 | : x(x), 18 | models(std::move(models)) {} 19 | 20 | unique_models_input(unique_models_input &) = delete; 21 | 22 | unique_models_input(unique_models_input &&) = default; 23 | 24 | clustering_input get() { 25 | return clustering_input(x, models); 26 | } 27 | 28 | private: 29 | const mat &x; 30 | vector> models; 31 | }; 32 | 33 | class parallel_starter { 34 | public: 35 | parallel_starter(int max_threads, int starts) 36 | : starts(starts) { 37 | if (max_threads == 0) 38 | max_threads = std::thread::hardware_concurrency(); 39 | if (max_threads == 0) 40 | max_threads = default_threads_number; 41 | 42 | parallel_starter::max_threads = std::min(starts, max_threads); 43 | } 44 | 45 | template 46 | unique_ptr start(Task &task) { 47 | using subtask = typename Task::subtask; 48 | int starts_per_thread = starts / max_threads; 49 | int remaining = starts - (starts_per_thread * max_threads); 50 | best_results_collector best; 51 | 52 | vector cl_tasks; 53 | vector>> cl_results; 54 | 55 | subtask my = task(starts_per_thread + (remaining-- > 0 ? 1 : 0)); 56 | try { 57 | for (int th = 1; th < max_threads; th++) { 58 | std::packaged_task()> 59 | pt_subtask(task(starts_per_thread + (remaining-- > 0 ? 1 : 0))); 60 | 61 | cl_results.emplace_back(pt_subtask.get_future()); 62 | cl_tasks.emplace_back(std::move(pt_subtask)); 63 | } 64 | 65 | best(my()); 66 | 67 | for (auto &&result : cl_results) 68 | try { 69 | best(result.get()); 70 | } catch (clustering_exception &ex) { 71 | //ignore for now... 72 | } 73 | 74 | } catch (std::exception &ex) { 75 | join_all_threads(cl_tasks); 76 | throw; 77 | } 78 | 79 | join_all_threads(cl_tasks); 80 | 81 | return best(); 82 | } 83 | 84 | private: 85 | int max_threads; 86 | int starts; 87 | static const int default_threads_number = 4; 88 | 89 | static void join_all_threads(vector &threads) { 90 | for (auto &&th : threads) 91 | th.join(); 92 | } 93 | }; 94 | 95 | class mp_start_subtask { 96 | public: 97 | mp_start_subtask(mp_start_subtask &&) = default; 98 | 99 | mp_start_subtask(mp_start_subtask &) = delete; 100 | 101 | mp_start_subtask(unique_ptr c_starter, 102 | vector> c_procs, unique_models_input &&uniqe_m_input, 103 | const int starts) 104 | : c_starter(std::move(c_starter)), 105 | c_procs(std::move(c_procs)), 106 | uniqe_m_input(std::move(uniqe_m_input)), 107 | starts(starts) {}; 108 | 109 | unique_ptr operator()() { 110 | best_results_collector best; 111 | for (int i = 0; i < starts; i++) { 112 | unique_ptr res = c_starter->start(uniqe_m_input.get()); 113 | for (auto &&cp : c_procs) 114 | res = cp->start(res, uniqe_m_input.get()); 115 | 116 | best(std::move(res)); 117 | } 118 | return best(); 119 | } 120 | 121 | private: 122 | unique_ptr c_starter; 123 | vector> c_procs; 124 | unique_models_input uniqe_m_input; 125 | const int starts; 126 | }; 127 | 128 | class multiple_starts_task { 129 | public: 130 | using subtask = mp_start_subtask; 131 | 132 | explicit multiple_starts_task(cec_starter::parameters params, const mat &x, 133 | const vector> &model_specs) 134 | : params(params), 135 | x(x), 136 | model_specs(model_specs) {} 137 | 138 | subtask operator()(int starts) const { 139 | unique_ptr starter = make_unique(params); 140 | return mp_start_subtask(std::move(starter), 141 | vector>(), 142 | unique_models_input(x, model_spec::create_models(model_specs)), starts); 143 | } 144 | 145 | private: 146 | const cec_starter::parameters params; 147 | const mat &x; 148 | const vector> &model_specs; 149 | }; 150 | 151 | class start_and_split_task { 152 | public: 153 | using subtask = mp_start_subtask; 154 | 155 | explicit start_and_split_task(cec_starter::parameters init_cl_params, 156 | split_starter::parameters split_params, 157 | const mat &x, 158 | const vector> &model_specs 159 | ) 160 | : init_cl_params(init_cl_params), 161 | split_params(split_params), 162 | x(x), 163 | model_specs(model_specs) {} 164 | 165 | subtask operator()(int starts) { 166 | unique_ptr starter = make_unique(init_cl_params); 167 | vector> cl_procs(1); 168 | cl_procs[0] = make_unique(split_params); 169 | 170 | return mp_start_subtask( 171 | std::move(starter), 172 | std::move(cl_procs), 173 | unique_models_input(x, model_spec::create_models(model_specs)), 174 | starts); 175 | } 176 | 177 | private: 178 | cec_starter::parameters init_cl_params; 179 | split_starter::parameters split_params; 180 | const mat &x; 181 | const vector> &model_specs; 182 | }; 183 | } 184 | 185 | #endif //CEC_PARALLEL_STARTER_H 186 | -------------------------------------------------------------------------------- /src/vec.h: -------------------------------------------------------------------------------- 1 | #ifndef VEC_H 2 | #define VEC_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace cec { 9 | class vec; 10 | class mat; 11 | class row { 12 | public: 13 | const int size; 14 | 15 | inline double &operator[](int idx) { 16 | return data_[idx]; 17 | } 18 | 19 | inline const double &operator[](int idx) const { 20 | return data_[idx]; 21 | } 22 | 23 | void fill(double value) { 24 | std::fill(data_, data_ + size, value); 25 | } 26 | 27 | void set(const row &r) { 28 | this->operator=(r); 29 | } 30 | 31 | inline row &operator=(const row &r) { 32 | std::copy(r.data_, r.data_ + r.size, data_); 33 | return *this; 34 | } 35 | 36 | void operator*=(double value) { 37 | for (int i = 0; i < size; i++) 38 | data_[i] *= value; 39 | } 40 | 41 | void operator/=(double value) { 42 | for (int i = 0; i < size; i++) 43 | data_[i] /= value; 44 | } 45 | 46 | void operator+=(const row &r) { 47 | for (int i = 0; i < size; i++) 48 | data_[i] += r[i]; 49 | } 50 | 51 | void operator-=(const row &r) { 52 | for (int i = 0; i < size; i++) 53 | data_[i] -= r[i]; 54 | } 55 | 56 | double *data() { 57 | return data_; 58 | } 59 | 60 | const double *data() const { 61 | return data_; 62 | } 63 | 64 | friend std::ostream &operator<<(std::ostream &os, const row &r) { 65 | os << "{"; 66 | for (int i = 0; i < r.size; i++) 67 | os << r[i] << (i == r.size - 1 ? "}" : ", "); 68 | return os; 69 | } 70 | 71 | static double dist_sq(const row &a, const row &b) { 72 | double acc = 0; 73 | for (int i = 0; i < a.size; i++) { 74 | double diff = b[i] - a[i]; 75 | acc += diff * diff; 76 | } 77 | return acc; 78 | } 79 | 80 | private: 81 | friend class vec; 82 | friend class mat; 83 | 84 | row(const row &r) = default; 85 | 86 | row(row &&r) noexcept = default; 87 | 88 | row(double *ptr, int n) 89 | : size(n), 90 | data_(ptr) {} 91 | 92 | 93 | row sub(int offset, int n) { 94 | return {data_ + offset, n}; 95 | } 96 | 97 | double *data_; 98 | }; 99 | 100 | template 101 | class storage { 102 | protected: 103 | explicit storage(int n) 104 | : ptr(new T[n]) {} 105 | 106 | storage(storage &&vm) noexcept = default; 107 | 108 | inline T *get_storage() { 109 | return ptr.get(); 110 | } 111 | private: 112 | std::unique_ptr ptr; 113 | }; 114 | 115 | class vec: private storage, public row { 116 | public: 117 | explicit vec(int n) 118 | : storage(n), 119 | row(get_storage(), n) {} 120 | 121 | explicit vec(const row &v) 122 | : vec(v.size) { 123 | operator=(v); 124 | } 125 | 126 | vec(const vec &v) 127 | : vec(v.size) { 128 | operator=(v); 129 | } 130 | 131 | vec(vec &&v) = default; 132 | 133 | using row::operator=; 134 | 135 | vec &operator=(const vec &v) { 136 | row::operator=(v); 137 | return *this; 138 | } 139 | }; 140 | 141 | class mat { 142 | public: 143 | const int m, n; 144 | private: 145 | vec data_vec; 146 | public: 147 | mat(int m, int n) 148 | : m(m), 149 | n(n), 150 | data_vec(m * n) {} 151 | 152 | mat(const mat &ma) 153 | : mat(ma.m, ma.n) { 154 | (*this) = ma; 155 | } 156 | 157 | mat(mat &&ma) noexcept = default; 158 | 159 | inline const row operator[](int idx) const { 160 | return const_cast(this)->operator[](idx); 161 | } 162 | 163 | inline row operator[](int idx) { 164 | return data_vec.sub(n * idx, n); 165 | } 166 | 167 | inline mat &operator=(const mat &ma) { 168 | data_vec.operator=(ma.data_vec); 169 | return *this; 170 | } 171 | 172 | inline mat &operator=(mat &&ma) noexcept { 173 | *this = ma; 174 | return *this; 175 | } 176 | 177 | double *data() { 178 | return data_vec.data(); 179 | } 180 | 181 | const double *data() const { 182 | return data_vec.data(); 183 | } 184 | 185 | void fill(double value) { 186 | data_vec.fill(value); 187 | } 188 | 189 | void operator*=(double value) { 190 | data_vec *= value; 191 | } 192 | 193 | void operator/=(double value) { 194 | data_vec /= value; 195 | } 196 | 197 | void operator+=(const mat &m) { 198 | data_vec += m.data_vec; 199 | } 200 | 201 | void operator-=(const mat &m) { 202 | data_vec -= m.data_vec; 203 | } 204 | 205 | friend std::ostream &operator<<(std::ostream &os, const mat &m) { 206 | for (int j = 0; j < m.m; j++) { 207 | os << (j == 0 ? '(' : ' '); 208 | for (int k = 0; k < m.n; k++) 209 | os << m[j][k] << (k < m.n - 1 ? ", " : ""); 210 | os << (j == m.m - 1 ? ')' : '\n'); 211 | } 212 | return os; 213 | } 214 | 215 | template 216 | class rows_iterator { 217 | friend class mat; 218 | 219 | public: 220 | using difference_type = long; 221 | using value_type = typename std::conditional::value, const row, row>::type; 222 | using pointer = value_type*; 223 | using reference = value_type&; 224 | using iterator_category = std::forward_iterator_tag; 225 | 226 | value_type 227 | operator*() { 228 | return ref[r]; 229 | } 230 | 231 | void operator++() { 232 | r++; 233 | } 234 | 235 | bool operator==(const rows_iterator &ri) { 236 | return r == ri.r; 237 | } 238 | 239 | bool operator!=(const rows_iterator &ri) { 240 | return !operator==(ri); 241 | } 242 | 243 | private: 244 | rows_iterator(M &ref, int r) 245 | : ref(ref), 246 | r(r) {} 247 | 248 | M &ref; 249 | int r; 250 | }; 251 | 252 | rows_iterator begin() { 253 | return {*this, 0}; 254 | } 255 | 256 | rows_iterator end() { 257 | return {*this, m}; 258 | } 259 | 260 | rows_iterator begin() const { 261 | return {*this, 0}; 262 | } 263 | 264 | rows_iterator end() const { 265 | return {*this, m}; 266 | } 267 | 268 | static mat outer_product(row &v) { 269 | int n = v.size; 270 | mat ma(n, n); 271 | for (int j = 0; j < n; j++) 272 | for (int k = 0; k < n; k++) 273 | ma[j][k] = v[j] * v[k]; 274 | return ma; 275 | } 276 | }; 277 | } 278 | 279 | #endif /* VEC_H */ 280 | -------------------------------------------------------------------------------- /R/cec.R: -------------------------------------------------------------------------------- 1 | cec <- function( 2 | x, 3 | centers, 4 | type = c("covariance", "fixedr", "spherical", "diagonal", "eigenvalues", "mean", "all"), 5 | iter.max = 25, 6 | nstart = 1, 7 | param, 8 | centers.init = c("kmeans++", "random"), 9 | card.min = "5%", 10 | keep.removed = FALSE, 11 | interactive = FALSE, 12 | threads = 1, 13 | split = FALSE, 14 | split.depth = 8, 15 | split.tries = 5, 16 | split.limit = 100, 17 | split.initial.starts = 1, 18 | readline = TRUE 19 | ) 20 | { 21 | # check arguments 22 | if (!hasArg(x)) stop("Missing requierd argument: 'x'.") 23 | 24 | if (!hasArg(centers)) { 25 | centers <- 1 26 | split = T 27 | } 28 | 29 | if (iter.max < 0) stop("Illegal argument: iter.max < 0.") 30 | if (!is.matrix(x)) stop("Illegal argument: 'x' is not a matrix.") 31 | if (ncol(x) < 1) stop("Illegal argument: ncol(x) < 1.") 32 | if (nrow(x) < 1) stop("Illegal argument: nrow(x) < 1.") 33 | 34 | if (!all(complete.cases(x))) stop("Illegal argument: 'x' contains NA values.") 35 | if (!all(complete.cases(centers))) stop("Illegal argument: 'centers' contains NA values.") 36 | 37 | var.centers = NULL 38 | centers.mat = NULL 39 | 40 | if (!is.matrix(centers)) 41 | { 42 | if (length(centers) > 1) 43 | var.centers <- centers 44 | else 45 | var.centers <- c(centers) 46 | 47 | for (i in centers) 48 | if (i < 1) stop("Illegal argument: 'centers' < 1") 49 | 50 | centers.initilized <- F 51 | } 52 | else 53 | { 54 | if (ncol(x) != ncol(centers)) stop("Illegal argument: ncol(x) != ncol(centers).") 55 | if (nrow(centers) < 1) stop("Illegal argument: nrow(centers) < 1.") 56 | var.centers <- c(nrow(centers)) 57 | centers.mat = centers 58 | centers.initilized <- T 59 | } 60 | 61 | if (!(attr(regexpr("[\\.0-9]+%{0,1}", perl=TRUE, text=card.min), "match.length") == nchar(card.min))) 62 | stop("Illegal argument: 'card.min' in wrong format.") 63 | 64 | if (centers.initilized) 65 | init.method.name = "none" 66 | else if (hasArg(centers.init)) 67 | init.method.name = switch (match.arg(centers.init), "kmeans++" = "kmeanspp", "random" = "random") 68 | else init.method.name = "kmeanspp" 69 | 70 | 71 | if (!hasArg(type)) 72 | type <- "all" 73 | 74 | if (length(type) > 1 && length(var.centers) > length(type)) 75 | stop("Illegal argument: 'type' with length > 1 should be equal or greater than the length of the vector of variable number of centers ('centers' as a vector).") 76 | 77 | # run interactive mode if requested 78 | if (interactive) { 79 | if (split == T) stop("Interactive mode not available in split mode") 80 | if (length(var.centers) > 1) stop("Interactive mode not available for variable centers") 81 | if (nstart > 1) stop("Interactive mode not available for multiple starts") 82 | return(cec.interactive(x, centers, type, iter.max, 1, param, centers.init, card.min, keep.removed, readline)) 83 | } 84 | 85 | n = ncol(x) 86 | m = nrow(x) 87 | 88 | if (substr(card.min, nchar(card.min), nchar(card.min)) == "%") 89 | card.min = as.integer(as.double(substr(card.min , 1, nchar(card.min) - 1)) * m / 100) 90 | else 91 | card.min = as.integer(card.min) 92 | 93 | # card.min must be greater than the dimension of the data 94 | card.min = max(card.min, n + 1) 95 | 96 | # prepare input for C function cec_r 97 | k <- max(var.centers) 98 | 99 | startTime <- proc.time() 100 | 101 | centers.r = list( 102 | init.method = init.method.name, 103 | var.centers = as.integer(var.centers), 104 | mat = centers.mat 105 | ) 106 | 107 | if (threads == "auto") 108 | threads = 0 109 | 110 | control.r = list( 111 | min.card = as.integer(card.min), 112 | max.iters = as.integer(iter.max), 113 | starts = as.integer(nstart), 114 | threads = as.integer(threads) 115 | ) 116 | 117 | models.r = create.cec.params.for.models(k, n, type, param) 118 | 119 | # perform the clustering by calling C function cec_r 120 | if (split) { 121 | for (i in 1:k) 122 | if (models.r[[i]]$type != models.r[[1]]$type) 123 | stop("mixing model types is currently not supported in split mode") 124 | 125 | models.r 126 | split.r = list( 127 | depth = as.integer(split.depth), 128 | limit = as.integer(split.limit), 129 | tries = as.integer(split.tries), 130 | initial.starts = as.integer(split.initial.starts) 131 | ) 132 | Z <- .Call(cec_split_r, x, centers.r, control.r, models.r, split.r) 133 | } 134 | else 135 | Z <- .Call(cec_r, x, centers.r, control.r, models.r) 136 | 137 | k.final <- nrow(Z$centers) 138 | 139 | # prepare the results 140 | 141 | execution.time = as.vector((proc.time() - startTime))[3] 142 | 143 | Z$centers[is.nan(Z$centers)] <- NA 144 | 145 | tab <- tabulate(Z$cluster) 146 | probability <- vapply(tab, function(c.card){c.card / m}, 0) 147 | 148 | #TODO: change this temporary hack 149 | model.one = models.r[[1]]; 150 | 151 | # change cluster assignment if keep.removed == F 152 | if (!keep.removed) 153 | { 154 | cluster.map = 1:k.final 155 | na.rows = which(is.na(Z$centers[, 1])) 156 | if (length(na.rows) > 0) 157 | { 158 | for (i in 1:length(na.rows)) 159 | for (j in na.rows[i]:k.final) 160 | cluster.map[j] <- cluster.map[j] - 1 161 | 162 | Z$cluster <- as.integer(vapply(Z$cluster,function(asgn) {as.integer(cluster.map[asgn])}, 0)) 163 | 164 | # in case of having single row - convert it to a matrix 165 | Z$centers <- matrix(Z$centers[-na.rows,],,n) 166 | Z$covariances <- Z$covariances[-na.rows] 167 | probability <- probability[-na.rows] 168 | models.r <- models.r[-na.rows] 169 | } 170 | } 171 | covs = length(Z$covariances) 172 | covariances.model = rep(list(NA), covs) 173 | means.model = Z$centers 174 | 175 | #TODO: change this temporary hack 176 | if (split) 177 | models.r = rep(list(model.one), covs) 178 | 179 | # obtain the covariances of the model 180 | for(i in 1:covs) 181 | covariances.model[[i]] = model.covariance(models.r[[i]]$type, Z$covariances[[i]], Z$centers[i, ], models.r[[i]]$params) 182 | 183 | for(i in 1:covs) 184 | means.model[i, ] = model.mean(models.r[[i]]$type, Z$centers[i, ], models.r[[i]]$params) 185 | 186 | structure(list( 187 | data = x, 188 | cluster = Z$cluster, 189 | centers = Z$centers, 190 | probability = probability, 191 | cost.function = Z$energy, 192 | nclusters = Z$nclusters, 193 | iterations = Z$iterations, 194 | time = execution.time, 195 | covariances = Z$covariances, 196 | covariances.model = covariances.model, 197 | means.model = means.model 198 | ), class = "cec"); 199 | } 200 | 201 | cec.interactive <- function( 202 | x, 203 | centers, 204 | type = c("covariance", "fixedr", "spherical", "diagonal", "eigenvalues", "all"), 205 | iter.max = 40, 206 | nstart = 1, 207 | param, 208 | centers.init = c("kmeans++", "random"), 209 | card.min = "5%", 210 | keep.removed = FALSE, 211 | readline = TRUE 212 | ) 213 | { 214 | par 215 | { 216 | old.ask = par()["ask"] 217 | on.exit(par(old.ask)) 218 | n = ncol(x) 219 | if (n != 2) 220 | stop("interactive mode available only for 2-dimensional data") 221 | i <- 0 222 | if (!is.matrix(centers)) centers <- init.centers(x, centers, centers.init) 223 | if (readline) 224 | { 225 | ignore = readline(prompt="After each iteration you may:\n - press for next iteration \n - write number (may be negative one) and press for next iterations \n - write 'q' and abort execution.\n Press .\n") 226 | par(ask = FALSE) 227 | } 228 | else 229 | { 230 | par(ask = TRUE) 231 | } 232 | while (TRUE) 233 | { 234 | Z <- cec(x, centers, type, i, 1, param, centers.init , card.min, keep.removed, F); 235 | 236 | if(i > Z$iterations | i>= iter.max) 237 | break 238 | 239 | desc = "" 240 | if (i == 0) 241 | desc = "(position of center means before first iteration)" 242 | 243 | cat("Iterations:", Z$iterations, desc, "cost function:", Z$cost," \n ") 244 | 245 | plot(Z, ellipses = TRUE) 246 | 247 | if (readline) 248 | { 249 | line = readline(prompt="Press OR write number OR write 'q':"); 250 | lineint = suppressWarnings(as.integer(line)) 251 | 252 | if (!is.na(lineint)) 253 | { 254 | i = i + lineint - 1 255 | if (i < 0) 256 | i = -1 257 | } 258 | else if (line == "q" | line == "quit") { 259 | break 260 | } 261 | } 262 | i = i + 1 263 | } 264 | plot(Z, ellipses="TRUE") 265 | if (readline) 266 | ignore = readline(prompt="Press :") 267 | 268 | Z 269 | } 270 | } -------------------------------------------------------------------------------- /man/cec.Rd: -------------------------------------------------------------------------------- 1 | \name{cec} 2 | \alias{cec} 3 | 4 | \title{ 5 | Cross-Entropy Clustering 6 | } 7 | 8 | \description{ 9 | Performs Cross-Entropy Clustering on a data matrix. 10 | } 11 | 12 | \usage{ 13 | cec(x, centers, type = c("covariance", "fixedr", "spherical", "diagonal", 14 | "eigenvalues", "mean", "all"), iter.max = 25, nstart = 1, param, 15 | centers.init = c("kmeans++", "random"), card.min = "5\%", keep.removed = FALSE, 16 | interactive = FALSE, threads = 1, split = FALSE, split.depth = 8, split.tries = 5, 17 | split.limit = 100, split.initial.starts = 1, readline = TRUE) 18 | } 19 | 20 | \arguments{ 21 | 22 | \item{x}{ 23 | Numeric matrix of data. 24 | } 25 | 26 | \item{centers}{ 27 | Either a matrix of initial centers or the number of initial centers (\code{k}, single 28 | number \code{cec(data, 4, ...)} or a vector for variable number of 29 | centers \code{cec(data, 3:10, ...)}). 30 | 31 | If \code{centers} is a vector, \code{length(centers)} clusterings will be performed for each 32 | start (\code{nstart} argument) and the total number of clusterings will be 33 | \code{length(centers) * nstart}. 34 | 35 | If \code{centers} is a number or a vector, initial centers will be generated using a method 36 | depending on the \code{centers.init} argument. 37 | } 38 | 39 | \item{type}{ 40 | Type (or types) of clustering (density family). This can be either a single value 41 | or a vector of length equal to the number of centers. Possible values are: "covariance", 42 | "fixedr", "spherical", "diagonal", "eigenvalues", "all" (default). 43 | 44 | Currently, if the \code{centers} argument is a vector, only signle type can be used. 45 | } 46 | 47 | \item{iter.max}{ 48 | Maximum number of iterations at each clustering. 49 | } 50 | 51 | \item{nstart}{ 52 | The number of clusterings to perform (with different initial centers). Only the best 53 | clustering (with the lowest cost) will be returned. Value grater then one is valid 54 | only if the \code{centers} argument is a number or a vector. 55 | 56 | If the \code{centers} argument is a vector, \code{length(centers)} clusterings will be performed 57 | for each start and the total number of clusterings will be \code{length(centers) * nstart}. 58 | 59 | If the split mode is on (\code{split = TRUE}), it's rarely desired use change this parameter as the whole procedure (initial clustering + split) will be performed \code{nstart} times. 60 | } 61 | 62 | \item{centers.init}{ 63 | Centers initialization method. Possible values are: "kmeans++" (default), "random". 64 | } 65 | 66 | \item{param}{ 67 | Parameter (or parameters) specific to a particular type of clustering. 68 | Not all types of clustering require parameter. Types that require parameter: 69 | "covariance" (matrix parameter), "fixedr" (numeric parameter), 70 | "eigenvalues" (vector parameter). This can be a vector or a list 71 | (when one of the parameters is a matrix or a vector). 72 | } 73 | 74 | \item{card.min}{ 75 | Minimal cluster cardinality. If cluster cardinality becomes less than card.min, cluster 76 | is removed. This argument can be either an integer number or a string ended with a percent 77 | sign (e.g. "5\%"). 78 | } 79 | 80 | \item{keep.removed}{ 81 | If this parameter is TRUE, removed clusters will be visible in the results as NA in centers matrix 82 | (as well as corresponding values in the list of covariances). 83 | } 84 | 85 | \item{interactive}{ 86 | Interactive mode. If TRUE, the result of clustering will be plotted after every iteration. 87 | } 88 | 89 | \item{threads}{ 90 | Specifies the number of threads to use or "auto" to use default number of threads (usually 91 | the number of available processing units/cores) when performing multiple starts (\code{nstart} parameter). 92 | 93 | The execution of a single start is always performed by a single thread, thus for \code{nstart = 1} only one thread will 94 | be used regardless of the value of this parameter. 95 | } 96 | 97 | \item{split}{ 98 | Enables split mode. This mode discovers new clusters after initial clustering, by trying to split single clusters 99 | into two to lower the cost function. 100 | 101 | For each start (\code{nstart}), initial clustering will be performed and then split. The number of starts in 102 | the initial clustering before split is driven by the \code{split.initial.starts} parameter. 103 | } 104 | 105 | \item{split.depth}{ 106 | Cluster subdivision depth used in split mode. Usually a value less than 10 is sufficient (when after each subdivision, 107 | new clusters have similar sizes). For some data, subdivisions may often produce a cluster (one of the two) that will 108 | not be split further, in that case a higher value of the \code{split.depth} is required. 109 | } 110 | 111 | \item{split.tries}{ 112 | The number of attempts that are made when trying to split a cluster in split mode. 113 | } 114 | 115 | \item{split.limit}{ 116 | Maximum number of centers to be discovered in split mode. 117 | } 118 | 119 | \item{split.initial.starts}{ 120 | The number of 'standard' starts performed before starting split. 121 | } 122 | 123 | \item{readline}{ 124 | Used only in the interactive mode. If \code{readline} is TRUE, at each iteration, before 125 | plotting it will wait for the user to press instead of standard "before plotting" 126 | (\code{par(ask = TRUE)}) waiting. 127 | } 128 | 129 | } 130 | \value{ 131 | Returns an object of class "cec" with available components: 132 | "data", "cluster", "probabilities", "centers", "cost.function", "nclusters", "iterations", "cost", "covariances", 133 | "covariances.model", "time". 134 | } 135 | 136 | \details{ 137 | In the context of implementation, Cross-Entropy Clustering (CEC) aims to partition \emph{m} points into \emph{k} 138 | clusters so as to minimize the cost function (energy \emph{\strong{E}} of the clustering) by switching the points between clusters. 139 | The presented method is based on the adapted Hartigan approach, where we reduce clusters which cardinalities decreased below some small prefixed level. 140 | 141 | The energy function \emph{\strong{E}} is given by: 142 | \deqn{E(Y_1,\mathcal{F}_1;...;Y_k,\mathcal{F}_k) = \sum\limits_{i=1}^{k} p(Y_i) \cdot (-ln(p(Y_i)) + H^{\times}(Y_i\|\mathcal{F}_i))}{ 143 | E(Y1, F1; ...; Yk, Fk) = \sum(p(Yi) * (-ln(p(Yi)) + H(Yi | Fi)))} 144 | where \emph{Yi} denotes the \emph{i}-th cluster, \emph{p(Yi)} is the ratio of the number of points in \emph{i}-th cluster to the total number points, \emph{\strong{H}(Yi|Fi)} is the value of cross-entropy, which represents the internal cluster energy function of data \emph{Yi} defined with respect to a certain Gaussian density family \emph{Fi}, which encodes the type of clustering we consider. 145 | 146 | The value of the internal energy function \emph{\strong{H}} depends on the covariance matrix (computed using maximum-likelihood method) and the mean (in case of the \emph{mean} model) of the points in the cluster. 147 | Seven implementations of \emph{\strong{H}} have been proposed (expressed as a type - model - of the clustering): 148 | 149 | \itemize{ 150 | \item"all" - All Gaussian densities. Data will form ellipsoids with arbitrary radiuses. 151 | \item"covariance" - Gaussian densities with a fixed given covariance. The shapes of clusters depend on the given covariance 152 | matrix (additional parameter). 153 | \item"fixedr" - Special case of "covariance", where the covariance matrix equals \emph{rI} for the given \emph{r} 154 | (additional parameter). The clustering will have a tendency to divide data into balls with approximate radius proportional to the square root of \emph{r}. 155 | \item"spherical" - Spherical (radial) Gaussian densities (covariance proportional to the identity). 156 | Clusters will have a tendency to form balls of arbitrary sizes. 157 | \item"diagonal" - Gaussian densities with diagonal covariane. Data will form ellipsoids with radiuses parallel to the coordinate axes. 158 | \item"eigenvalues" - Gaussian densities with covariance matrix having fixed eigenvalues (additional parameter). 159 | The clustering will try to divide the data into fixed-shaped ellipsoids rotated by an arbitrary angle. 160 | \item"mean" Gaussian densities with a fixed mean. Data will be covered with ellipsoids with fixed centers. 161 | } 162 | 163 | The implementation of \code{cec} function allows mixing of clustering types. 164 | } 165 | \author{ 166 | Konrad Kamieniecki, Jacek Tabor, Przemysław Spurek 167 | } 168 | 169 | \seealso{ 170 | \code{\link{CEC-package}.} 171 | } 172 | \examples{ 173 | # 174 | # Cross-Entropy Clustering 175 | # 176 | 177 | ## Example of clustering random data set of 3 Gaussians, 178 | ## 10 random initial centers and 7\% as minimal cluster size. 179 | 180 | m1 = matrix(rnorm(2000, sd=1), ncol=2) 181 | m2 = matrix(rnorm(2000, mean = 3, sd = 1.5), ncol = 2) 182 | m3 = matrix(rnorm(2000, mean = 3, sd = 1), ncol = 2) 183 | m3[,2] = m3[,2] - 5 184 | m = rbind(m1, m2, m3) 185 | oldpar = par(ask = TRUE) 186 | on.exit(par(oldpar)) 187 | plot(m, cex = 0.5, pch = 19) 188 | ## Clustering result: 189 | Z = cec(m, 10, iter.max = 100, card.min="7\%") 190 | plot(Z) 191 | # Result: 192 | Z 193 | ## Example of clustering mouse-like set using spherical Gaussian densities. 194 | m = mouseset(n=7000, r.head=2, r.left.ear=1.1, r.right.ear=1.1, left.ear.dist=2.5, 195 | right.ear.dist=2.5, dim=2) 196 | plot(m, cex = 0.5, pch = 19) 197 | ## Clustering result: 198 | Z = cec(m, 3, type="sp", iter.max = 100, nstart=4, card.min="5\%") 199 | plot(Z) 200 | # Result: 201 | Z 202 | 203 | ## Example of clustering data set "Tset" using "eigenvalues" clustering type. 204 | data(Tset) 205 | plot(Tset, cex = 0.5, pch = 19) 206 | centers = init.centers(Tset, 2) 207 | ## Clustering result: 208 | Z <- cec(Tset, 5, "eigenvalues", param=c(0.02,0.002), nstart=4) 209 | plot(Z) 210 | # Result: 211 | Z 212 | 213 | ## Example of using CEC split method starting with a single cluster. 214 | data(mixShapes) 215 | plot(mixShapes, cex = 0.5, pch = 19) 216 | ## Clustering result: 217 | Z <- cec(mixShapes, 1, split=TRUE) 218 | plot(Z) 219 | # Result: 220 | Z 221 | } 222 | \references{ 223 | Spurek, P. and Tabor, J. (2014) 224 | Cross-Entropy Clustering 225 | \emph{Pattern Recognition} \bold{47, 9} 3046--3059 226 | } 227 | 228 | \keyword{cluster} 229 | \keyword{models} 230 | \keyword{multivariate} 231 | \keyword{package} 232 | \concept{entropy} 233 | \concept{gaussian} 234 | \concept{gaussian mixture models} 235 | \concept{kmeans} 236 | 237 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | CEC 2 | === 3 | 4 | The R Package CEC performs clustering based on the cross–entropy clustering (CEC) method, 5 | which has been recently developed with the use of information theory. 6 | The main advantage of CEC is that it combines the speed and simplicity of k-means with the ability 7 | to use various Gaussian mixture models and reduce unnecessary clusters. 8 | 9 | The CEC package is a part of CRAN repository and it can be installed by the following command: 10 | 11 | ```R 12 | install.packages("CEC") 13 | library("CEC") 14 | ``` 15 | 16 | The basic usage comes down to the function `cec` with two arguments: input data (`x`) and the initial number of centers (`centers`): 17 | 18 | ```R 19 | cec(x = ..., centers = ...) 20 | ``` 21 | Below, a simple session with **R** is presented, where the component 22 | (waiting) of the Old Faithful data set is split into two clusters: 23 | 24 | ```R 25 | library("CEC") 26 | attach(faithful) 27 | cec <- cec(matrix(faithful$waiting), 2) 28 | print(cec) 29 | ``` 30 | 31 | As the main result, CEC returns data cluster membership `cec$cluster`. The following parameters of 32 | clusters can be obtained as well: 33 | 34 | - means (`cec$centers`) 35 | - covariances (`cec$covariances.model`) 36 | - probabilities (`cec$probability`) 37 | 38 | Additional information concerning the number of iterations, cost (energy) function and the number of clusters at each iteration are also available. 39 | 40 | Below, a session of **R** is presented which shows how to use the above parameters for plotting the data and the Gaussian models corresponding to the clusters. 41 | 42 | ```R 43 | hist(faithful$waiting, prob = T, main = "Time between Old Faithful eruptions", xlab = "Minutes", 44 | col = "lightgray", border = 0, ylim = c(0, 0.05)) 45 | 46 | for(i in c(1:2)) 47 | curve(cec$probability[i] * dnorm(x, mean = cec$centers[i], sd = sqrt(cec$covariances.model[[i]][1])), 48 | add = T, col = i + 1, lwd = 2) 49 | ``` 50 | ![](https://azureblue.github.io/cec/static/old.png) 51 | 52 | The CEC method, analogously to k-means, depends on the initial clusters memberships. Therefore, the initialization should be started a few times, which can be achieved using the `nstart` parameter. 53 | ```R 54 | cec <- cec(x = ..., centers = ..., nstart = ...) 55 | ``` 56 | 57 | **Multiple threads can be used to speed up clustering (when `nstart > 1` ).** 58 | It's driven by the `threads` parameter (more details in the package manual). 59 | ```R 60 | cec <- cec(..., nstart = 100, threads = 4) 61 | ``` 62 | 63 | The initial locations of the centers can be chosen either **randomly** or using the **k-means++** method and it's driven by the `centers.init` parameter which can take one of the two values: `"random"` or `"kmeans++"`. 64 | 65 | Two essential parameter, in the context of CEC method, are `card.min` and `iter.max` that express minimal cluster size - the number of points, below which, the cluster is removed and the maximum number of iterations at each start, respectively. 66 | 67 | One of the most important properties of the CEC algorithm is that it can be applied to various Gaussian models. The CEC package includes the implementation of six Gaussian models, which can be specified by the parameter `type`. 68 | 69 | Implemented Gaussian models (families) 70 | -------------------------------------- 71 | 72 | ### General Gaussian distributions 73 | **`type = "all"`** 74 | 75 | The general Gaussian CEC algorithm gives similar results to those obtained by the Gaussian Mixture Models. 76 | However, the CEC does not use the EM (Expectation Maximization) approach for minimization but a simple iteration process (Hartigan method). 77 | Consequently, larger data sets can be processed in shorter time. 78 | 79 | The clustering will have a tendency to divide the data into clusters in the shape of ellipses (ellipsoids in higher dimensions). 80 | 81 | ```R 82 | data("fourGaussians") 83 | 84 | cec <- cec(fourGaussians, centers = 10, type = "all", nstart = 20) 85 | 86 | plot(cec, asp = 1) 87 | ``` 88 | ![](https://azureblue.github.io/cec/static/all.png) 89 | 90 | ### Spherical CEC 91 | **`type = "spherical"`** 92 | 93 | The original distribution will be estimated by spherical (radial) densities, which will result in splitting the data into circle-like clusters of arbitrary sizes (balls in higher dimensions). 94 | 95 | ```R 96 | data("Tset") 97 | 98 | cec <- cec(x = Tset, centers = 10, type = "spherical", nstart = 5) 99 | 100 | plot(cec, asp = 1) 101 | ``` 102 | ![](https://azureblue.github.io/cec/static/spherical.png) 103 | 104 | ### Spherical CEC with fixed radius 105 | **`type = "fixedr"`** 106 | 107 | Similarly to the general spherical model, the dataset will be divided into clusters resembling full circles, but with the radius determined by the `param` argument. 108 | 109 | ```R 110 | data("Tset") 111 | 112 | cec <- cec(x = Tset, centers = 10, type = "fixedr", param = 0.01, nstart = 20) 113 | 114 | plot(cec, asp = 1) 115 | ``` 116 | ![](https://azureblue.github.io/cec/static/fixedr.png) 117 | 118 | ### Diagonal CEC 119 | **`type = "diagonal"`** 120 | 121 | In this case, the data will be described by ellipses for which the main semi-major axes are parallel to the axes of the coordinate system. 122 | 123 | ```R 124 | data("Tset") 125 | 126 | cec <- cec(x = Tset, centers = 10, type = "diagonal", nstart = 5) 127 | 128 | plot(cec, asp = 1) 129 | ``` 130 | ![](https://azureblue.github.io/cec/static/diagonal.png) 131 | 132 | ### Fixed covariance CEC 133 | **`type = "covariance"`** 134 | 135 | This model contains Gaussians with fixed covariance matrix. 136 | 137 | ```R 138 | data("Tset") 139 | 140 | cec <- cec(x = Tset, centers = 10, card.min = '10%', type = "covariance", 141 | param = matrix(c(0.04, 0, 0, 0.01), 2)) 142 | 143 | plot(cec, asp = 1) 144 | ``` 145 | ![](https://azureblue.github.io/cec/static/cov.png) 146 | 147 | In the above example, the following covariance matrix has been used, which results in covering the data by fixed shape ellipses: 148 | 149 | ![ 150 | 0.04 0.00 151 | 0.00 0.01 152 | ](https://azureblue.github.io/cec/static/covariance.png) 153 | 154 | 155 | ### Fixed eigenvalues CEC 156 | **`type = "eigenvalues"`** 157 | 158 | Model based on Gaussians with arbitrary fixed eigenvalues. 159 | 160 | ```R 161 | data("Tset") 162 | 163 | cec <- cec(x = Tset, centers = 10, type = "eigenvalues", param = c(0.01, 0.001), nstart = 5) 164 | 165 | plot(cec, asp = 1) 166 | ``` 167 | ![](https://azureblue.github.io/cec/static/eigen.png) 168 | 169 | In the above example, two eigenvalues: **λ₁=0.01** and **λ₂=0.001** are used, which results in covering the data with ellipses having fixed semi axes (corresponding to the eigenvalues). 170 | 171 | ### Fixed mean CEC 172 | **`type = "mean"`** 173 | 174 | Model based on Gaussians with fixed means. 175 | 176 | ```R 177 | data("Tset") 178 | data("threeGaussians") 179 | 180 | cec <- cec(Tset, 4, "mean", param = c(0.48, 0.48), nstart = 5) 181 | plot(cec, asp = 1) 182 | 183 | cec <- cec(threeGaussians,4, "mean", param = c(0, 0), nstart = 10) 184 | plot(cec) 185 | ``` 186 | ![](https://azureblue.github.io/cec/static/mean1.png) 187 | ![](https://azureblue.github.io/cec/static/mean2.png) 188 | 189 | 190 | A mix of the Gaussian models 191 | ---------------------------- 192 | 193 | One of the most powerful properties of the CEC algorithm is the possibility of mixing models. More precisely, the mixed models can be specified by giving a list of cluster types (and a list of parameters if needed). 194 | 195 | ```R 196 | cec(x = ..., centers = ..., type = c("all", "diagonal", ...), param = ...). 197 | ``` 198 | 199 | ```R 200 | data("mixShapes") 201 | 202 | cec <- cec(mixShapes, 7, iter.max = 3, 203 | type = c("fixedr", "fixedr", "eigen", "eigen", "eigen", "eigen", "eigen"), 204 | param = list(350, 350, c(9000, 8), c(9000, 8), c(9000, 8), c(9000, 8), c(9000, 8)), nstart = 500) 205 | 206 | plot(cec, asp = 1) 207 | ``` 208 | ![](https://azureblue.github.io/cec/static/mix.png) 209 | 210 | 211 | CEC Split 212 | --------- 213 | 214 | CEC Split method discovers new clusters after initial clustering, 215 | by recursively trying to split each cluster into two with lower cost function. 216 | 217 | 218 | To enable split method, the `split` argument must be set to `TRUE`. 219 | ```R 220 | cec <- cec(x = ..., centers = ..., split = T) 221 | ``` 222 | 223 | There are four parameters (with their default values) that control the split mode: 224 | `split.depth = 8` , `split.tries = 5`, `split.limit = 100`, `split.initial.starts = 1`. 225 | The description of those parameters is provided in the package manual. Using `nstart` parameter, 226 | the whole procedure, from start to end (initial clustering and splitting), can be 227 | repeated multiple times. In this, case we can also use multiple threads (`threads` parameter). 228 | 229 | An example usage is presented below: 230 | ```R 231 | data("fourGaussians") 232 | 233 | cec <- cec(fourGaussians, centers = 1, type = "all", split = T) 234 | 235 | plot(cec, asp = 1) 236 | ``` 237 | ![](https://azureblue.github.io/cec/static/split_4_gaussians.png) 238 | 239 | The split method will be used implicitly when `centers` argument is not provided. 240 | ```R 241 | data("mixShapes") 242 | 243 | cec <- cec(mixShapes) 244 | 245 | plot(cec, asp = 1) 246 | ``` 247 | ![](https://azureblue.github.io/cec/static/split_mix.png) 248 | 249 | The next two examples show clustering results using split method on a data set of 20 gaussians 250 | generated by the following code: 251 | ```R 252 | twenty.gaussians = matrix(c(0, 0), 0, 2) 253 | for (i in 0:(19)) { 254 | G = matrix(rnorm(400), 200, 2) 255 | G[,1] = G[,1] + i %% 5 * 8 + runif(1,-1, 1) 256 | G[,2] = G[,2] + i %/% 5 * 8 + runif(1,-1, 1) 257 | twenty.gaussians = rbind(twenty.gaussians, G) 258 | } 259 | ``` 260 | In the following example, the usage of general gaussian distributions (`type = 'all'`) doesn't require 261 | any modification of default split parameters. Only the `card.min` needs to be set to a much lower value. 262 | ```R 263 | cec <- cec(twenty.gaussians, card.min="1%") 264 | 265 | plot(cec, asp = 1) 266 | ``` 267 | ![](https://azureblue.github.io/cec/static/split_20_gaussians_all.png) 268 | 269 | Some data sets may require tuning of the split parameters. 270 | Using spherical densities (`type = 'spherical'`) on the same data, the `split.depth` 271 | needs to be increased significantly as well as `split.tries`. As in the previous example the `card.min` is changed. 272 | ``` 273 | cec <- cec(twenty.gaussians,, "sp", split.depth = 25, split.tries=15, card.min="1%") 274 | 275 | plot(cec, asp = 1) 276 | ``` 277 | ![](https://azureblue.github.io/cec/static/split_20_gaussians_spherical.png) 278 | 279 | -------------------------------------------------------------------------------- /inst/cec_tests/four.gaussians.result.dp: -------------------------------------------------------------------------------- 1 | structure(list(cluster = c(1L, 1L, 1L, 1L, 1L, 1L, 2 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 7 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 8 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 9 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 10 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 11 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 12 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 13 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 14 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 15 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 16 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 17 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 18 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 19 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 20 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 21 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 22 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 23 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 24 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 25 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 26 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 27 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 28 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 29 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 30 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 31 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 32 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 33 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 34 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 35 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 36 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 37 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 38 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 39 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 40 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 41 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 42 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 43 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 44 | 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 45 | 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 4L, 46 | 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 4L, 3L, 4L, 1L, 4L, 47 | 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 4L, 1L, 4L, 4L, 48 | 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 49 | 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 50 | 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 51 | 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 52 | 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 53 | 4L, 4L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 54 | 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 55 | 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 56 | 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 57 | 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 58 | 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 59 | 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 60 | 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 61 | 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 62 | 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 63 | 4L, 4L, 4L, 3L, 3L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 64 | 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 65 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 66 | 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 67 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 68 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 69 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 70 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 71 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 72 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 73 | 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 74 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 75 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 76 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 77 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 78 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 79 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 80 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 81 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 82 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 83 | 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 84 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 85 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 86 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 87 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 88 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 89 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 90 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 91 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 92 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 93 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 94 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 95 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 96 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 97 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 98 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 99 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 100 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 101 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 102 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 103 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 104 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 105 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 106 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 107 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 108 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 109 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 110 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 111 | 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 112 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 113 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 114 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 115 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 116 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 117 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 118 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 119 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 120 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 121 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 122 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 123 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 124 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 125 | 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 126 | 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 127 | 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 128 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 129 | 3L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 130 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 131 | 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 132 | 3L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 133 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 134 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 135 | 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 136 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 137 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 138 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 139 | 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 140 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 141 | 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 142 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 143 | 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 144 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 145 | 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 146 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 147 | 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 148 | 2L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 149 | 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 150 | 2L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 151 | 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 152 | 2L, 2L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 153 | 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 154 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 155 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 156 | 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 157 | 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 158 | 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 159 | 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 160 | 2L, 3L, 3L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 161 | 2L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 162 | 2L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 163 | 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 164 | 2L, 2L), centers = structure(c(1.84245135056162, -2.67684580955369, 165 | -1.15840735698224, 1.19516157191488, 1.91462031764866, -2.87739165221024, 166 | -1.05624166102196, 1.42358754604792), .Dim = c(4L, 2L)), probability = c(0.270769230769231, 167 | 0.193461538461538, 0.428461538461538, 0.107307692307692), cost.function = 2.5304669787963, 168 | nclusters = 4L, iterations = 11L, time = 0.0229999999999109, 169 | covariances = list(structure(c(2.03741754058808, -1.07562652980048, 170 | -1.07562652980048, 0.568029344322332), .Dim = c(2L, 2L)), 171 | structure(c(1.05017894591473, 0.39952859435606, 0.39952859435606, 172 | 0.621530934359661), .Dim = c(2L, 2L)), structure(c(4.74650412580127, 173 | 1.49180431362112, 1.49180431362112, 0.503851162161489 174 | ), .Dim = c(2L, 2L)), structure(c(4.00121342238377, -2.38360733918697, 175 | -2.38360733918697, 1.52301385413902), .Dim = c(2L, 2L 176 | ))), covariances.model = list(structure(c(2.03741754058808, 177 | -1.07562652980048, -1.07562652980048, 0.568029344322332), .Dim = c(2L, 178 | 2L)), structure(c(1.05017894591473, 0.39952859435606, 0.39952859435606, 179 | 0.621530934359661), .Dim = c(2L, 2L)), structure(c(4.74650412580127, 180 | 1.49180431362112, 1.49180431362112, 0.503851162161489), .Dim = c(2L, 181 | 2L)), structure(c(4.00121342238377, -2.38360733918697, -2.38360733918697, 182 | 1.52301385413902), .Dim = c(2L, 2L))), means.model = structure(c(1.84245135056162, 183 | -2.67684580955369, -1.15840735698224, 1.19516157191488, 1.91462031764866, 184 | -2.87739165221024, -1.05624166102196, 1.42358754604792), .Dim = c(4L, 185 | 2L))), .Names = c("cluster", "centers", "probability", 186 | "cost.function", "nclusters", "iterations", "time", "covariances", 187 | "covariances.model", "means.model"), class = "cec") 188 | -------------------------------------------------------------------------------- /inst/cec_tests/two.gausses.4d.data: -------------------------------------------------------------------------------- 1 | "V1" "V2" "V3" "V4" 2 | "1" -0.860532383000426 0.0655682349398269 -0.720376042955361 0.128903523809565 3 | "2" 2.09010146886274 1.81081385220877 0.795829445051545 -0.216534891146476 4 | "3" 0.566470165744258 -1.08720834086419 -2.43728276089261 -0.342870395334349 5 | "4" -0.419809952103439 -0.449397686613793 0.612167175347869 0.647193332554642 6 | "5" 0.956254885279354 0.0411239437039079 -0.882400148179033 -0.518643818027796 7 | "6" 0.159497008675263 0.0350156863152326 0.694994683233496 -0.829211067270737 8 | "7" 0.426359367614666 -0.515256408732593 0.800053725989837 -0.751883520621898 9 | "8" 1.42256285047719 1.50002391269176 1.85486284554688 1.1567647322227 10 | "9" -1.61020139182429 1.71169832626625 0.00913447005015524 -0.597976167260553 11 | "10" -0.131874825258349 -1.32507135584183 -0.0834747048915845 -0.852088972622546 12 | "11" -0.536410701482096 -0.0699171658571853 -0.650544201235641 1.000482450284 13 | "12" -0.70321600085378 -0.25335304363581 -0.659194922179516 2.69011975462535 14 | "13" 0.210900480415109 1.28443049363689 -1.43235155953496 0.1577643427379 15 | "14" 0.599468002190932 -0.769425228231766 -0.241030957057748 -0.525789587330167 16 | "15" -0.281127533632122 -0.00711374890116098 0.426069144813097 -1.41524866390276 17 | "16" 0.462888332878518 -1.62422321920841 0.956795749854279 -2.33438695676061 18 | "17" -1.04974088624154 -0.0153367546499704 -1.90817582663282 -1.11880430866905 19 | "18" 1.15637446899475 0.310901624564162 -0.0217264262523928 -2.66585634342909 20 | "19" -0.597885726853264 -0.389175835065157 0.029255538995534 -0.699378922142906 21 | "20" -0.693262637297017 0.764395151742298 -0.797232733512468 1.20471497335883 22 | "21" 0.539506689826985 -0.877767264439271 -1.06360601490258 0.6455949616323 23 | "22" -1.81764343417277 0.613587622362276 -0.818225793978127 -1.1580322917344 24 | "23" -1.47651001489696 0.717989062229101 1.05360559200996 -0.174492355525237 25 | "24" 0.915201351623415 0.811969193047697 -0.400172462973439 -1.19984563477839 26 | "25" -0.82921210147404 1.31192930333857 0.738586608388883 0.0231201758443822 27 | "26" -0.301173478542504 0.304865174699893 0.277322464245446 0.369780487187652 28 | "27" -0.695708482262271 0.18721313714058 -2.209282207036 -0.301971966654849 29 | "28" 2.14534157315428 0.603549097124866 0.607389380321091 0.874629621877083 30 | "29" 0.612765176335031 1.54168072057852 1.1723900366181 -0.317938136111876 31 | "30" -0.0115838532624324 1.58846898954954 0.511857614713291 0.469115722717497 32 | "31" -0.562798973375981 -1.18108217379765 0.833993190625396 -0.0176334029622061 33 | "32" 1.04309566930446 -0.0296615490361567 -0.928871702377988 -0.511901880741439 34 | "33" -1.07184250452536 1.1573893325546 -0.186026218072107 0.438073951288252 35 | "34" 1.29308644326958 0.590548533806093 -1.93396845444625 1.21069936502281 36 | "35" 0.294772932506191 -0.683494315776905 1.03683436424947 0.671173851553124 37 | "36" 0.776269386380039 -0.848905964607031 0.504863621084761 -0.398261671747236 38 | "37" 0.145713875454892 0.688395199782643 0.5302999152682 0.310296861657352 39 | "38" 1.12937746365079 -0.649337780278176 -1.21835870544266 -0.612965396423113 40 | "39" 1.94770628127703 -1.3285860105039 -0.943771177287442 -0.738218759107106 41 | "40" 1.61408286126575 -1.77672927308988 -0.451428292940471 -1.62197373244486 42 | "41" -0.996730542858198 0.0073052638131078 -1.28969114225324 0.196721435996766 43 | "42" 0.133970083386448 -1.8567362189555 -0.479519361788954 0.178783442723995 44 | "43" -1.02046963315084 -1.53222777963181 0.0900943773502709 -0.301285727266575 45 | "44" -0.837824556123588 0.0173434090939118 -0.299574224124141 -0.396289399151939 46 | "45" 0.283609271320063 -1.67573458639997 -0.483886869070716 -2.68335852100404 47 | "46" -0.967613158404023 -0.549826102915676 0.370924845220098 -0.356568435674802 48 | "47" 0.648445827215239 0.0646965138205773 0.596327043129538 -0.257156118923648 49 | "48" 0.184175720536139 0.478348550682119 0.114021136248451 -0.395636305746716 50 | "49" 0.751900916520829 -0.286273795997231 0.0204609399519075 -0.12761634357835 51 | "50" 0.650382069611906 2.13315471695754 0.134355983288358 1.59350255890686 52 | "51" 0.25756652253689 -0.860555726810751 -1.05413518644047 -0.556546530118016 53 | "52" -0.231925501875058 0.307932428223599 0.672836513156132 -0.314711777044376 54 | "53" -0.784051230231009 -1.38643645484367 0.966593713233166 -1.18127466132901 55 | "54" 1.06574519166241 -1.42601434874537 -1.28904733518383 0.66838018402193 56 | "55" -0.444883780794545 0.9382906848777 1.55150666289715 0.322642415530515 57 | "56" 1.02108892594205 0.653645841521292 -0.725512141855967 0.330223444898432 58 | "57" -0.949980435158305 -0.405123340712621 0.969671775797982 -2.07222230674939 59 | "58" 1.02890194924146 -0.734034920869188 0.610218378371988 -0.801778621353806 60 | "59" -0.389060532754434 0.550791782816945 0.685764387507292 0.25637539260533 61 | "60" -0.519321148696498 -1.62431737439266 -0.392306949367637 0.0887176777506972 62 | "61" -0.0613487886513537 -0.785041284171758 1.06915081592459 -0.298958734937174 63 | "62" 1.20203494939652 -1.18779369487345 0.719943063503609 1.57561869681412 64 | "63" 0.806212780321723 0.244747911489677 -0.0692535456100387 -1.4408284948181 65 | "64" -1.1270175989736 0.192434876507312 -0.551145402189259 -2.29698616404654 66 | "65" -1.40823936108396 -0.510746694220941 0.212307493176883 0.881641149832323 67 | "66" -1.32117853575454 0.00850659143941221 -0.516112431008125 -0.681849922800708 68 | "67" -1.04736769609764 -0.677210091876239 1.10814176114323 -0.693668722857398 69 | "68" 0.993219369321959 0.779862225271052 -0.726404657960109 -0.305581234600888 70 | "69" -0.318281296811162 -0.222967037482712 -0.563676368707216 0.872409679999507 71 | "70" 0.19037842463892 0.539467398637625 -0.387858539182939 0.13483814755583 72 | "71" -0.593709716252004 0.263950227680801 1.53105160926954 0.892488469156775 73 | "72" -0.316950090044094 -0.107040885379975 1.54180866240402 -0.666050768951622 74 | "73" -0.61258580420538 0.214868885362827 1.11036630074376 0.405429327502624 75 | "74" -0.575221331403095 -1.34001226230116 -0.836487068707119 -0.124724592195165 76 | "75" 0.315953276723457 0.361670078594581 0.460531926318416 0.353984527346007 77 | "76" -0.17084374800324 -0.0473606899307149 -0.883800080383053 -0.786361871924391 78 | "77" 2.2468253393337 1.14058832854332 -0.18943737414816 0.593052269882203 79 | "78" 1.98668350133981 0.358146156040156 0.545303797484994 0.587597043626154 80 | "79" -0.308079909338634 0.572335877299588 -0.228429943464813 -1.67807106750443 81 | "80" 0.692755726393474 -1.41725411454929 0.716600377316562 -0.674588766590444 82 | "81" -1.596039181999 -0.161978319791254 -1.18343006652431 0.0749441662185701 83 | "82" 0.376068311896427 1.28946977247886 -0.330968926778752 -0.1004782914374 84 | "83" -0.0145543967401529 1.42012931855742 -0.7588064594477 0.46242285558593 85 | "84" 0.159416721041476 0.292411689125593 -0.352525299852682 1.52673221705882 86 | "85" -1.541030000531 1.52542945267285 -0.514196875267898 -1.46692476199785 87 | "86" 0.70420447210643 -0.55660262104292 -0.545704685976844 1.10099312720021 88 | "87" 0.374927253912916 -1.06396121255162 1.54209455759528 0.303931273509544 89 | "88" 1.242562512772 0.0413549728721431 0.176797190523994 -0.276889179053808 90 | "89" 0.231229521773968 0.654420023103591 1.58955058821991 1.4180964572949 91 | "90" -1.18232600003211 1.06486226014102 0.719792505521219 0.913757211366128 92 | "91" -0.594459624118108 -1.47411194567592 -1.19622808646799 -0.106495215435347 93 | "92" 0.312375159454683 1.24705500085163 0.321677618390512 -0.165651105887591 94 | "93" 1.03548054663773 0.705390275727041 -0.605713225299888 0.383713091476684 95 | "94" -0.322589400141703 0.0816983904479671 0.197071375734212 1.25977141611116 96 | "95" 0.996655414916001 1.13541511751286 -0.628471167845546 -0.280784721149398 97 | "96" 0.0392740348282289 -0.748681506005131 0.293050575575877 0.350262415795282 98 | "97" 0.174264454877122 0.226986520010749 -0.883144774795541 -1.21343172593563 99 | "98" 0.539179399773602 -1.11184566363389 1.45099817142993 -1.28151924260821 100 | "99" -1.25450478152657 -0.822667608955933 -0.393280013691983 -0.831877370988791 101 | "100" 1.74069156665094 -0.724882672957753 -0.755927901707974 1.0429240076092 102 | "101" 5.26427559312731 4.29105347696634 3.8279107815715 5.72574629745318 103 | "102" 4.92616902106486 4.29626450627053 6.72571942848194 4.42400824485079 104 | "103" 3.85111269885778 5.88635850266496 5.95306301369686 4.6892408397004 105 | "104" 5.27745841079592 3.73450005137253 6.30702122247815 6.93732230646519 106 | "105" 4.57974567127042 3.35521180040917 4.15626727672127 5.4995511670429 107 | "106" 4.49993740511646 2.86359839335413 7.37851054370835 3.47064790450062 108 | "107" 3.46913028139379 4.70734310653204 5.27352236892651 5.51701223278609 109 | "108" 4.78696741632874 5.31869115486264 5.38756558351156 5.18301626385458 110 | "109" 4.97370532906656 2.5194223575922 7.2427427543419 3.82715414986573 111 | "110" 6.89182122511884 5.27194115720416 6.85533794959231 6.06094143457929 112 | "111" 2.91273532425107 5.86107125685502 4.14513945906339 5.05916407599514 113 | "112" 4.383421188639 3.86849923460143 6.35309360259187 5.57440004726018 114 | "113" 5.42302403414847 3.69028816541658 4.82086472625451 4.13991635893964 115 | "114" 6.23953397607547 4.58640454041846 3.34539701849344 3.30784195014295 116 | "115" 5.32666003190959 6.17622061555758 4.33516051793689 5.15133454254107 117 | "116" 4.28781541000449 5.99936295105589 6.07178431264716 4.04512139506385 118 | "117" 6.79366703758129 4.004520047467 4.87742252530497 5.39046035210483 119 | "118" 5.19887356812077 3.96729460552371 5.75277084436294 4.92616222969462 120 | "119" 3.54655793789715 5.49913800470408 3.8951450655692 5.27894749478808 121 | "120" 5.81417427828713 4.80728766193395 5.81430912225373 4.27904634525542 122 | "121" 5.37543683666099 4.33621962247011 3.82213542612225 4.60320232450868 123 | "122" 4.25498430336944 6.01935509452373 3.81408066989271 5.49176255849659 124 | "123" 5.01143372542217 2.90642832154405 5.15364528373232 4.75231813929927 125 | "124" 4.94776785119871 3.89872520836332 4.38436429243111 5.41976856193287 126 | "125" 4.05195563651337 5.87449865791668 2.49952881408247 4.41166063794686 127 | "126" 5.29807301678784 5.28023637739888 5.04023907851463 4.14844921942779 128 | "127" 4.572660820124 3.67715064295208 5.42348776704445 5.50902399894999 129 | "128" 3.69018338842715 4.71348492213568 5.38620095365177 5.79066918014268 130 | "129" 3.53224524238929 4.15546490075302 5.45801863802399 3.67562276169023 131 | "130" 4.22297248004081 4.55670400925806 4.45328986387427 5.03197711211244 132 | "131" 4.0508735458922 4.0646907758045 4.80599172673972 5.30892112425457 133 | "132" 3.81868583687887 4.86312089917848 3.75384783212297 6.12813297116818 134 | "133" 5.88810048978727 5.30464098460583 4.30604510739893 3.04042620433872 135 | "134" 5.9607646472607 3.40002196396226 5.70966455722258 4.66330109740811 136 | "135" 4.28003547829166 5.44805145446026 5.99435323677325 4.14101724341488 137 | "136" 5.03383547377009 6.27635059667282 3.79458884889911 3.85390577721611 138 | "137" 3.4209081276262 5.16197979648561 3.54895612645086 6.31335203411661 139 | "138" 4.042262214896 3.60317275186968 6.34994153551035 4.1366404855797 140 | "139" 3.9000770480466 5.1067355782217 4.08427311831559 4.5535900423228 141 | "140" 5.05615905424018 6.80136534661765 5.37294866461737 4.32939692931001 142 | "141" 5.98400458914607 4.46227160602924 5.56108975612475 5.6873983084553 143 | "142" 6.23328180092766 5.07884273799028 5.61567357132429 4.3039822335576 144 | "143" 5.87129605943995 3.42517746875179 4.71846967578143 3.97550386759629 145 | "144" 3.84496638077985 4.04487371693146 3.54289613121407 6.08343369830177 146 | "145" 5.38785601339899 5.25166751722479 4.90353864443429 5.2789544990476 147 | "146" 3.96973818592091 2.98254115327429 5.20318923166365 4.34904104912828 148 | "147" 5.0035041766482 4.57581374071752 4.554890035747 5.93375444120148 149 | "148" 6.19151767035614 4.11638413417223 5.12471606564863 6.5056755829917 150 | "149" 4.81420795380674 4.76844341967745 5.0124848886129 4.58356744781332 151 | "150" 4.19159476770923 5.77550471615892 3.86556024514571 4.67221935463939 152 | "151" 4.22341180995379 4.06655456851922 3.61940441737699 5.61156612599776 153 | "152" 3.81205584841995 3.90408140070042 5.157698827146 5.44811107971646 154 | "153" 4.81537764966501 4.90433978287173 5.1562117579516 3.73807077493975 155 | "154" 5.6954790390901 4.34394195371961 4.20407143704816 4.46619156092053 156 | "155" 6.65103137614419 4.85370142520933 6.04234999393646 5.59868303264714 157 | "156" 3.67962346426071 6.19859356849017 6.09936175518587 4.63449618027549 158 | "157" 3.88084881238602 3.72727972592639 6.1014705730857 6.61307054331332 159 | "158" 4.97350415685599 4.81378327213338 6.85919872157412 6.07911211102802 160 | "159" 5.47646548946994 6.07632485717861 5.35173938600062 6.70897304702398 161 | "160" 4.86406885815044 5.13505999747369 3.81676464889363 7.26686683211948 162 | "161" 4.51174227869986 3.17145706923323 5.06754141014158 5.77067345051899 163 | "162" 2.75893818122746 5.56368430701936 4.619344214913 2.62060413667283 164 | "163" 5.6637109441011 4.85706281857927 5.42987159873432 6.07502723901617 165 | "164" 5.77213423796754 6.66740327061798 4.47049884597082 4.37791237395069 166 | "165" 4.5936475518372 6.17050149580502 4.88334497331497 4.56616353068383 167 | "166" 4.61389663517391 3.46073081195584 6.34738247771613 3.8698299574211 168 | "167" 4.93249819735449 3.80345888835558 4.16485382979964 4.11386731251105 169 | "168" 4.32076024767947 4.98688528262542 5.01366703719037 6.35657046223252 170 | "169" 6.54764317255448 5.43448443714168 3.86974018839885 3.87906110421452 171 | "170" 4.66944466439298 4.70473442217666 7.070356571406 4.58525846054225 172 | "171" 4.67269752105085 5.31486703792108 3.86571008203207 6.19901550278105 173 | "172" 5.28809479514445 5.88897759564013 4.72511327496666 4.88770625157114 174 | "173" 4.2437031652453 6.63873404004559 3.92437504477434 4.60792385107505 175 | "174" 6.04669616336563 5.99309211622553 5.49563359559351 5.23382582894261 176 | "175" 5.1661450650594 5.51537733465093 2.34044502096915 4.98460273173603 177 | "176" 4.47468001864605 6.44859420525293 5.02946506509698 5.97551246648356 178 | "177" 5.74145053064303 4.69808076607705 4.00269243595437 4.07175567929847 179 | "178" 5.13210221277037 4.97236944036205 5.49237657037806 5.36744464711393 180 | "179" 5.80843268376278 5.21516790599113 4.64249890318317 4.11856896415402 181 | "180" 6.53595581470893 5.64983919997338 4.78498683767837 5.6164399388916 182 | "181" 3.53684056494967 4.72369997399788 5.2858801654837 4.65180844785095 183 | "182" 3.28214579176531 6.21044383857223 5.371304504744 5.72632685438888 184 | "183" 4.97907164106184 5.77179368812178 5.94732639175111 5.84464809765341 185 | "184" 5.48357155253733 4.15270368681914 4.9392015127597 4.80694597641873 186 | "185" 3.71692366580803 4.85923703907927 4.94318696662826 5.89868817610712 187 | "186" 6.08744550238052 5.45275037847621 4.19156724103999 3.91124557373076 188 | "187" 4.54063499459945 4.51980320122158 4.65426328363348 4.50471725407486 189 | "188" 5.17466509671876 7.28817693023592 5.52229560273515 5.83951881930623 190 | "189" 5.92998078198469 3.67966230559191 3.90828948475245 5.58890589033743 191 | "190" 4.55946048271817 5.05689200370596 5.00106099290615 3.81553846890284 192 | "191" 6.67722215035927 5.75275472460785 5.60176759886084 4.20068790372111 193 | "192" 4.91223640973082 5.74876458647451 6.11916411115278 4.57617729136212 194 | "193" 4.46721420815459 5.07407341273489 4.44680402173133 6.82882336880433 195 | "194" 4.58228077717342 4.47815822524711 4.36977525766352 6.05680638102197 196 | "195" 5.0776063792388 6.39630896610763 4.10436609981164 5.04099723232383 197 | "196" 4.7428774608129 3.42028214793706 5.93008446188464 4.79313335956048 198 | "197" 4.43300807631338 4.2018755549494 3.09008951923977 6.90345369878711 199 | "198" 4.43218786450084 6.10175589428707 5.17921714780154 3.87638168376392 200 | "199" 4.30346907065294 6.45100779790468 5.47180701294439 6.52326298222252 201 | "200" 6.03996155067212 3.32892564218147 5.05427836066051 5.46645333090903 202 | --------------------------------------------------------------------------------