├── DESCRIPTION ├── MD5 ├── NAMESPACE ├── NEWS.md ├── R ├── bootstrap.R ├── clustCombi.R ├── densityMclust.R ├── gmmhd.R ├── graphics.R ├── icl.R ├── impute.R ├── mbahc.R ├── mclust.R ├── mclust2.R ├── mclustda.R ├── mclustdr.R ├── mclustssc.R ├── options.R ├── toremove.R ├── util.R ├── weights.R └── zzz.R ├── build ├── partial.rdb └── vignette.rds ├── data ├── Baudry_etal_2010_JCGS_examples.R ├── EuroUnemployment.R ├── GvHD.R ├── acidity.R ├── banknote.txt ├── chevron.R ├── cross.R ├── diabetes.R ├── thyroid.R ├── wdbc.txt └── wreath.R ├── inst ├── CITATION └── doc │ ├── mclust.R │ ├── mclust.Rmd │ └── mclust.html ├── man ├── Baudry_etal_2010_JCGS_examples.Rd ├── BrierScore.Rd ├── EuroUnemployment.Rd ├── GvHD.Rd ├── Mclust.Rd ├── MclustBootstrap.Rd ├── MclustDA.Rd ├── MclustDR.Rd ├── MclustDRsubsel.Rd ├── MclustSSC.Rd ├── acidity.Rd ├── adjustedRandIndex.Rd ├── banknote.Rd ├── bic.Rd ├── cdens.Rd ├── cdensE.Rd ├── cdfMclust.Rd ├── chevron.Rd ├── clPairs.Rd ├── classError.Rd ├── classPriorProbs.Rd ├── clustCombi-internals.Rd ├── clustCombi.Rd ├── clustCombiOptim.Rd ├── combMat.Rd ├── combiPlot.Rd ├── combiTree.Rd ├── coordProj.Rd ├── covw.Rd ├── crimcoords.Rd ├── cross.Rd ├── cvMclustDA.Rd ├── decomp2sigma.Rd ├── defaultPrior.Rd ├── dens.Rd ├── densityMclust.Rd ├── densityMclust.diagnostic.Rd ├── diabetes.Rd ├── dmvnorm.Rd ├── dupPartition.Rd ├── em.Rd ├── emControl.Rd ├── emE.Rd ├── entPlot.Rd ├── errorBars.Rd ├── estep.Rd ├── estepE.Rd ├── figures │ └── logo.png ├── gmmhd.Rd ├── hc.Rd ├── hcE.Rd ├── hcRandomPairs.Rd ├── hclass.Rd ├── hdrlevels.Rd ├── hypvol.Rd ├── icl.Rd ├── imputeData.Rd ├── imputePairs.Rd ├── logLik.Mclust.Rd ├── logLik.MclustDA.Rd ├── logsumexp.Rd ├── majorityVote.Rd ├── map.Rd ├── mapClass.Rd ├── mclust-deprecated.Rd ├── mclust-internal.Rd ├── mclust-package.Rd ├── mclust.options.Rd ├── mclust1Dplot.Rd ├── mclust2Dplot.Rd ├── mclustBIC.Rd ├── mclustBICupdate.Rd ├── mclustBootstrapLRT.Rd ├── mclustICL.Rd ├── mclustLoglik.Rd ├── mclustModel.Rd ├── mclustModelNames.Rd ├── mclustVariance.Rd ├── me.Rd ├── me.weighted.Rd ├── meE.Rd ├── mstep.Rd ├── mstepE.Rd ├── mvn.Rd ├── mvnX.Rd ├── nMclustParams.Rd ├── nVarParams.Rd ├── partconv.Rd ├── partuniq.Rd ├── plot.Mclust.Rd ├── plot.MclustBoostrap.Rd ├── plot.MclustDA.Rd ├── plot.MclustDR.Rd ├── plot.MclustSSC.Rd ├── plot.clustCombi.Rd ├── plot.densityMclust.Rd ├── plot.hc.Rd ├── plot.mclustBIC.Rd ├── plot.mclustICL.Rd ├── predict.Mclust.Rd ├── predict.MclustDA.Rd ├── predict.MclustDR.Rd ├── predict.MclustSSC.Rd ├── predict.densityMclust.Rd ├── priorControl.Rd ├── randProj.Rd ├── randomOrthogonalMatrix.Rd ├── sigma2decomp.Rd ├── sim.Rd ├── simE.Rd ├── softmax.Rd ├── summary.Mclust.Rd ├── summary.MclustBootstrap.Rd ├── summary.MclustDA.Rd ├── summary.MclustDR.Rd ├── summary.MclustSSC.Rd ├── summary.mclustBIC.Rd ├── surfacePlot.Rd ├── thyroid.Rd ├── uncerPlot.Rd ├── unmap.Rd ├── wdbc.Rd └── wreath.Rd ├── src ├── Makevars ├── dmvnorm.f ├── init.c ├── logsumexp.f ├── mclust.f └── mclustaddson.f └── vignettes ├── mclust.Rmd └── vignette.css /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: mclust 2 | Version: 6.1.1 3 | Date: 2024-04-29 4 | Title: Gaussian Mixture Modelling for Model-Based Clustering, 5 | Classification, and Density Estimation 6 | Description: Gaussian finite mixture models fitted via EM algorithm for 7 | model-based clustering, classification, and density estimation, 8 | including Bayesian regularization, dimension reduction for 9 | visualisation, and resampling-based inference. 10 | Authors@R: c(person("Chris", "Fraley", role = "aut"), 11 | person("Adrian E.", "Raftery", role = "aut", 12 | comment = c(ORCID = "0000-0002-6589-301X")), 13 | person("Luca", "Scrucca", role = c("aut", "cre"), 14 | email = "luca.scrucca@unipg.it", 15 | comment = c(ORCID = "0000-0003-3826-0484")), 16 | person("Thomas Brendan", "Murphy", role = "ctb", 17 | comment = c(ORCID = "0000-0002-5668-7046")), 18 | person("Michael", "Fop", role = "ctb", 19 | comment = c(ORCID = "0000-0003-3936-2757"))) 20 | Depends: R (>= 3.0) 21 | Imports: stats, utils, graphics, grDevices 22 | Suggests: knitr (>= 1.4), rmarkdown (>= 2.10), mix (>= 1.0), geometry 23 | (>= 0.4), MASS 24 | License: GPL (>= 2) 25 | URL: https://mclust-org.github.io/mclust/ 26 | VignetteBuilder: knitr 27 | Repository: CRAN 28 | ByteCompile: true 29 | NeedsCompilation: yes 30 | LazyData: yes 31 | Encoding: UTF-8 32 | Packaged: 2024-04-29 14:15:14 UTC; luca 33 | Author: Chris Fraley [aut], 34 | Adrian E. Raftery [aut] (), 35 | Luca Scrucca [aut, cre] (), 36 | Thomas Brendan Murphy [ctb] (), 37 | Michael Fop [ctb] () 38 | Maintainer: Luca Scrucca 39 | Date/Publication: 2024-04-29 16:40:02 UTC 40 | -------------------------------------------------------------------------------- /R/icl.R: -------------------------------------------------------------------------------- 1 | ## 2 | ## Integrated Complete-data Likelihood (ICL) Criterion 3 | ## 4 | 5 | icl <- function(object, ...) UseMethod("icl") 6 | 7 | icl.Mclust <- function(object, ...) 8 | { 9 | n <- object$n 10 | # G <- object$G + ifelse(is.na(object$hypvol),0,1) 11 | z <- object$z 12 | if(is.null(z)) z <- matrix(1, nrow = n, ncol = 1) 13 | C <- matrix(0, n, ncol(z)) 14 | for(i in 1:n) 15 | C[i, which.max(z[i,])] <- 1 16 | object$bic + 2*sum(C * ifelse(z > 0, log(z), 0)) 17 | } 18 | 19 | icl.MclustDA <- function(object, ...) 20 | { 21 | n <- object$n 22 | z <- predict(object)$z 23 | df <- object$df 24 | if(is.null(z)) z <- matrix(1, nrow = n, ncol = 1) 25 | C <- matrix(0, n, ncol(z)) 26 | for(i in 1:n) 27 | C[i, which.max(z[i,])] <- 1 28 | object$bic + 2*sum(C * ifelse(z > 0, log(z), 0)) 29 | } 30 | 31 | mclustICL <- function(data, G = NULL, modelNames = NULL, 32 | initialization = list(hcPairs=NULL, subset=NULL, noise=NULL), 33 | x = NULL, ...) 34 | { 35 | call <- match.call() 36 | data <- data.matrix(data) 37 | n <- nrow(data) 38 | d <- ncol(data) 39 | mc <- match.call(expand.dots = TRUE) 40 | mc[[1]] <- as.name("mclustBIC") 41 | mc[[2]] <- data 42 | BIC <- eval(mc, parent.frame()) 43 | class(BIC) <- "mclustBIC" 44 | G <- attr(BIC, "G") 45 | modelNames <- attr(BIC, "modelNames") 46 | ICL <- matrix(NA, nrow = length(G), ncol = length(modelNames)) 47 | mostattributes(ICL) <- attributes(BIC) 48 | if(!is.null(x)) 49 | { 50 | r <- match(as.character(G), rownames(x), nomatch = 0) 51 | c <- match(modelNames, colnames(x), nomatch = 0) 52 | ICL[r,c] <- BIC[r,c] 53 | } 54 | for(i in 1:nrow(ICL)) 55 | { for(j in 1:ncol(ICL)) 56 | { if(is.na(BIC[i,j])) next() # not fitted 57 | if(!is.na(ICL[i,j])) next() # already available 58 | Sumry <- summary(BIC, data, G = G[i], modelNames = modelNames[j]) 59 | ICL[i,j] <- icl.Mclust(Sumry) 60 | } 61 | } 62 | class(ICL) <- "mclustICL" 63 | attr(ICL, "criterion") <- "ICL" 64 | return(ICL) 65 | } 66 | 67 | print.mclustICL <- function (x, pick = 3, ...) 68 | { 69 | subset <- !is.null(attr(x, "subset")) 70 | oldClass(x) <- attr(x, "args") <- NULL 71 | attr(x, "criterion") <- NULL 72 | attr(x, "control") <- attr(x, "initialization") <- NULL 73 | attr(x, "oneD") <- attr(x, "warn") <- attr(x, "Vinv") <- NULL 74 | attr(x, "prior") <- attr(x, "G") <- attr(x, "modelNames") <- NULL 75 | ret <- attr(x, "returnCodes") == -3 76 | n <- attr(x, "n") 77 | d <- attr(x, "d") 78 | attr(x, "returnCodes") <- attr(x, "n") <- attr(x, "d") <- NULL 79 | 80 | oldClass(x) <- attr(x, "args") <- attr(x, "criterion") <- NULL 81 | catwrap("Integrated Complete-data Likelihood (ICL) criterion:") 82 | print(x, ...) 83 | cat("\n") 84 | catwrap(paste("Top", pick, "models based on the ICL criterion:")) 85 | print(pickBIC(x, pick), ...) 86 | invisible() 87 | } 88 | 89 | summary.mclustICL <- function(object, G, modelNames, ...) 90 | { 91 | if(!missing(G)) 92 | object <- object[rownames(object) %in% G,,drop=FALSE] 93 | if(!missing(modelNames)) 94 | object <- object[,colnames(object) %in% modelNames,drop=FALSE] 95 | structure(pickBIC(object, ...), 96 | class = "summary.mclustICL") 97 | } 98 | 99 | print.summary.mclustICL <- function(x, digits = getOption("digits"), ...) 100 | { 101 | cat("Best ICL values:\n") 102 | x <- drop(as.matrix(x)) 103 | x <- rbind(ICL = x, "ICL diff" = x - max(x)) 104 | print(x, digits = digits) 105 | invisible() 106 | } 107 | 108 | 109 | plot.mclustICL <- function(x, ylab = "ICL", ...) 110 | { 111 | plot.mclustBIC(x, ylab = ylab, ...) 112 | } 113 | 114 | -------------------------------------------------------------------------------- /R/options.R: -------------------------------------------------------------------------------- 1 | ############################################################################# 2 | 3 | .mclust <- structure(list( 4 | emModelNames = c("EII", "VII", "EEI", "VEI", "EVI", "VVI", 5 | "EEE", "VEE", "EVE", "VVE", 6 | "EEV", "VEV", "EVV", "VVV"), 7 | # in mclust version <= 4.x 8 | # emModelNames = c("EII", "VII", "EEI", "VEI", "EVI", "VVI", "EEE", "EEV", "VEV", "VVV"), 9 | hcModelName = "VVV", 10 | hcUse = "SVD", 11 | subset = 2000, 12 | fillEllipses = FALSE, 13 | bicPlotSymbols = structure(c(17, 2, 16, 10, 13, 1, 14 | 15, 8, 5, 9, 15 | 12, 7, 14, 0, 16 | 17, 2), 17 | .Names = c("EII", "VII", "EEI", "EVI", "VEI", "VVI", 18 | "EEE", "VEE", "EVE", "VVE", 19 | "EEV", "VEV", "EVV", "VVV", 20 | "E", "V")), 21 | bicPlotColors = structure( 22 | { pal <- grDevices::colorRampPalette(c("forestgreen", 23 | "royalblue1", 24 | "red3"), space = "Lab") 25 | c("gray", "black", pal(12), "gray", "black") 26 | }, 27 | .Names = c("EII", "VII", "EEI", "EVI", "VEI", "VVI", 28 | "EEE", "VEE", "EVE", "VVE", 29 | "EEV", "VEV", "EVV", "VVV", 30 | "E", "V")), 31 | classPlotSymbols = c(16, 0, 17, 3, 15, 4, 1, 8, 2, 7, 32 | 5, 9, 6, 10, 11, 18, 12, 13, 14), 33 | classPlotColors = c("dodgerblue2", "red3", "green3", "slateblue", 34 | "darkorange", "skyblue1", "violetred4", "forestgreen", 35 | "steelblue4", "slategrey", "brown", "black", 36 | "darkseagreen", "darkgoldenrod3", "olivedrab", 37 | "royalblue", "tomato4", "cyan2", "springgreen2"), 38 | warn = FALSE)) 39 | 40 | mclust.options <- function(...) 41 | { 42 | current <- get(".mclust", envir = asNamespace("mclust")) 43 | if(nargs() == 0) return(current) 44 | args <- list(...) 45 | if(length(args) == 1 && is.null(names(args))) 46 | { arg <- args[[1]] 47 | switch(mode(arg), 48 | list = args <- arg, 49 | character = return(.mclust[[arg]]), 50 | stop("invalid argument: ", dQuote(arg))) 51 | } 52 | if(length(args) == 0) return(current) 53 | n <- names(args) 54 | if (is.null(n)) stop("options must be given by name") 55 | changed <- current[n] 56 | current[n] <- args 57 | assign(".mclust", current, envir = asNamespace("mclust")) 58 | # assignInNamespace(".mclust", current, ns = asNamespace("mclust")) 59 | # invisible(changed) # bettina suggestion... 60 | invisible(current) 61 | } 62 | -------------------------------------------------------------------------------- /R/weights.R: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | ## Weights for MCLUST 3 | ## 4 | ## Written by Thomas Brendan Murphy 5 | ## Bugs fix by Luca Scrucca 6 | ############################################################################# 7 | 8 | me.weighted <- function(data, modelName, z, weights = NULL, prior = NULL, 9 | control = emControl(), Vinv = NULL, warn = NULL, ...) 10 | { 11 | data <- as.matrix(data) 12 | nobs <- nrow(data) 13 | modelName <- switch(EXPR = modelName, 14 | "X" = "E", 15 | "XII" = "EII", 16 | "XXI" = "EEI", 17 | "XXX" = "EEE", 18 | modelName) 19 | if(is.null(warn)) warn <- mclust.options("warn") 20 | if(is.null(weights)) 21 | { weights <- rep(1,nobs) } 22 | if(any(weights < 0)| any(!is.finite(weights))) 23 | { stop("Weights must be positive and finite") } 24 | if(!is.vector(weights)) 25 | { stop("Weights must be a vector") } 26 | if(max(weights) > 1) 27 | { if(warn) 28 | warning("Weights rescaled to have maximum equal to 1") 29 | weights <- weights/max(weights) 30 | } 31 | zw <- z*weights 32 | llold <- ll <- -Inf 33 | eps <- .Machine$double.eps 34 | criterion <- TRUE 35 | iter <- 0 36 | while(!is.na(criterion) & criterion) 37 | { 38 | iter <- iter+1 39 | fit.m <- do.call("mstep", list(data = data, 40 | z = zw, 41 | modelName = modelName, 42 | prior = prior, 43 | control = control, 44 | Vinv = Vinv, 45 | warn = warn)) 46 | fit.m$parameters$pro <- fit.m$parameters$pro/mean(weights) 47 | fit.e <- do.call("estep", c(list(data = data, 48 | control = control, 49 | Vinv = Vinv, 50 | warn = warn), 51 | fit.m)) 52 | if(is.na(fit.e$loglik)) 53 | { criterion <- FALSE; next() } 54 | zw <- pmax(fit.e$z*weights, eps) 55 | ldens <- do.call("dens", c(list(data = data, 56 | logarithm = TRUE, 57 | warn = warn), 58 | fit.m)) 59 | ll <- sum(weights*ldens) 60 | criterion <- criterion & (iter < control$itmax[1]) 61 | criterion <- criterion & ((ll-llold)/(1+abs(ll)) > control$tol[1]) 62 | llold <- ll 63 | } 64 | fit <- fit.m 65 | fit$z <- fit.e$z 66 | fit$weights <- weights 67 | fit$loglik <- ll/mean(weights) 68 | npar <- nMclustParams(modelName = modelName, 69 | d = ncol(data), 70 | G = ncol(z)) 71 | fit$bic <- 2*ll - npar*log(nobs) 72 | return(fit) 73 | } 74 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | # .onLoad <- function(libname, pkgname) 2 | # { 3 | # library.dynam("mclust", pkgname, libname) 4 | # } 5 | 6 | mclustStartupMessage <- function() 7 | { 8 | # Startup message obtained as 9 | # > figlet -f slant MCLUST 10 | # msg <- c(paste0( 11 | # " __ ___________ __ _____________ 12 | # / |/ / ____/ / / / / / ___/_ __/ 13 | # / /|_/ / / / / / / / /\\__ \\ / / 14 | # / / / / /___/ /___/ /_/ /___/ // / 15 | # /_/ /_/\\____/_____/\\____//____//_/ version ", 16 | # 17 | # Startup message obtained as 18 | # > figlet -f slant mclust 19 | msg <- c(paste0( 20 | " __ __ 21 | ____ ___ _____/ /_ _______/ /_ 22 | / __ `__ \\/ ___/ / / / / ___/ __/ 23 | / / / / / / /__/ / /_/ (__ ) /_ 24 | /_/ /_/ /_/\\___/_/\\__,_/____/\\__/ version ", 25 | packageVersion("mclust")), 26 | "\nType 'citation(\"mclust\")' for citing this R package in publications.") 27 | return(msg) 28 | } 29 | 30 | .onAttach <- function(lib, pkg) 31 | { 32 | # unlock .mclust variable allowing its modification 33 | unlockBinding(".mclust", asNamespace("mclust")) 34 | # startup message 35 | msg <- mclustStartupMessage() 36 | if(!interactive()) 37 | msg[1] <- paste("Package 'mclust' version", packageVersion("mclust")) 38 | packageStartupMessage(msg) 39 | invisible() 40 | } 41 | -------------------------------------------------------------------------------- /build/partial.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/mclust/65e2a1c0538807f5e52ade030f12f5af4c1bc746/build/partial.rdb -------------------------------------------------------------------------------- /build/vignette.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/mclust/65e2a1c0538807f5e52ade030f12f5af4c1bc746/build/vignette.rds -------------------------------------------------------------------------------- /data/EuroUnemployment.R: -------------------------------------------------------------------------------- 1 | EuroUnemployment <- structure(list( 2 | "TUR" = c(8.5, 11.4, 6.1, 6.6, 5, 7.4, 11.3, 26.5, 24.5, 10.3, 17.3, 12.7, 3 | 16.1, 10.8, 10.7, 5.9, 7.7, 5.9, 7.4, 5.6, 9, 14.1, 6.8, 9.7, 13.2, 8.7, 4 | 7.9, 6.1, 5, 3.5, 9.9), 5 | "YUR" = c(23.2, 23.8, 15.9, 12.6, 7.7, 15, 23.9, 52.4, 53.2, 24.1, 45.5, 6 | 42.7, 35.9, 19.6, 19.3, 21.2, 20.4, 11.8, 12.7, 10.3, 23.9, 34.7, 24, 7 | 20.2, 29.7, 20.5, 22.9, 16.9, 10, 7.9, 18), 8 | LUR = c(4.3, 6.9, 2.7, 1.7, 2.2, 3.3, 6.7, 19.5, 12.9, 4.4, 10.1, 7.8, 9 | 7.7, 4.7, 4.8, 1.6, 3.7, 2.7, 3, 1.5, 3.8, 8.4, 2.8, 5.3, 9.3, 1.9, 1.5, 10 | 2.2, 0.6, 0.8, 2)), 11 | class = "data.frame", 12 | row.names = c("Belgium", "Bulgaria", "Czech Republic", "Denmark", "Germany", 13 | "Estonia", "Ireland", "Greece", "Spain", "France", "Croatia", "Italy", 14 | "Cyprus", "Latvia", "Lithuania", "Luxembourg", "Hungary", "Malta", 15 | "Netherlands", "Austria", "Poland", "Portugal", "Romania", "Slovenia", 16 | "Slovakia", "Finland", "Sweden", "United Kingdom", "Iceland", "Norway", 17 | "Turkey" 18 | )) -------------------------------------------------------------------------------- /data/acidity.R: -------------------------------------------------------------------------------- 1 | acidity = c( 2 | 2.928524, 3.910021, 3.732896, 3.688879, 3.822098, 3.735286, 4.143135, 3 | 4.276666, 3.931826, 4.077537, 4.779123, 4.234107, 4.276666, 4.543295, 4 | 6.467388, 4.127134, 3.977811, 4.264087, 4.007333, 3.921973, 5.384495, 5 | 4.912655, 4.046554, 4.043051, 4.406719, 4.505350, 3.931826, 6.752270, 6 | 6.928538, 5.994460, 4.248495, 4.060443, 4.727388, 6.047372, 4.082609, 7 | 4.244200, 4.890349, 4.416428, 5.743003, 4.127134, 5.489764, 4.778283, 8 | 5.249652, 4.855929, 4.128746, 4.442651, 4.025352, 4.290459, 4.593098, 9 | 4.652054, 4.178992, 4.382027, 5.569489, 5.049856, 4.188138, 6.629363, 10 | 4.647271, 4.784989, 4.348987, 5.361292, 4.574711, 4.442651, 6.120297, 11 | 4.060443, 4.143135, 4.510860, 6.049733, 4.510860, 4.406719, 6.343880, 12 | 4.430817, 5.929589, 5.973301, 4.481872, 4.301359, 6.452680, 4.204693, 13 | 4.143135, 6.603944, 4.644391, 5.863631, 4.025352, 5.717028, 5.308268, 14 | 6.267201, 4.060443, 5.017280, 4.510860, 5.834811, 4.330733, 4.007333, 15 | 6.806829, 5.257495, 4.624973, 4.781641, 4.099332, 7.044382, 3.914021, 16 | 4.330733, 4.016383, 5.572154, 4.043051, 4.843399, 4.110874, 4.454347, 17 | 4.356709, 6.154858, 6.284321, 6.978214, 4.301359, 5.929855, 4.465908, 18 | 6.035481, 6.726473, 7.105130, 6.014937, 4.882802, 7.032095, 4.518522, 19 | 6.476665, 6.125558, 4.189655, 5.323498, 4.938065, 6.313548, 5.853925, 20 | 6.278146, 7.020191, 5.023881, 4.262680, 6.725634, 6.489205, 5.743003, 21 | 6.739337, 6.466145, 6.855409, 5.120983, 5.913773, 6.516932, 4.058717, 22 | 6.213608, 6.554218, 6.155707, 4.314818, 6.662494, 6.749931, 6.100319, 23 | 4.112512, 6.946014, 4.131961, 6.234411, 6.595781, 6.683861, 6.957973, 24 | 4.497585) 25 | -------------------------------------------------------------------------------- /data/diabetes.R: -------------------------------------------------------------------------------- 1 | diabetes <- 2 | structure(.Data = list( 3 | "class" = structure(.Data = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 1, 7 | 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 9 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 10 | 3, 3, 3, 3, 3), 11 | class = "factor", levels = c("Chemical", "Normal", "Overt")), 12 | "glucose" = c(80., 97., 105., 90., 90., 86., 100., 85., 97., 97., 91., 13 | 87., 78., 90., 86., 14 | 80., 90., 99., 85., 90., 90., 88., 95., 90., 92., 74., 98., 100., 86., 15 | 98., 70., 99., 75., 90., 85., 99., 100., 78., 106., 98., 102., 90., 16 | 94., 80., 93., 86., 85., 96., 88., 87., 94., 93., 86., 86., 96., 86., 17 | 89., 83., 98., 100., 110., 88., 100., 80., 89., 91., 96., 95., 82., 18 | 84., 90., 100., 86., 93., 107., 112., 94., 93., 93., 90., 99., 93., 19 | 85., 89., 96., 111., 107., 114., 101., 108., 112., 105., 103., 99., 20 | 102., 110., 102., 96., 95., 112., 110., 92., 104., 75., 92., 92., 92., 21 | 93., 112., 88., 114., 103., 300., 303., 125., 280., 216., 190., 151., 22 | 303., 173., 203., 195., 140., 151., 275., 260., 149., 233., 146., 124., 23 | 213., 330., 123., 130., 120., 138., 188., 339., 265., 353., 180., 213., 24 | 328., 346.), 25 | "insulin" = c(356., 289., 319., 356., 323., 381., 350., 301., 379., 296., 26 | 353., 306., 290., 27 | 371., 312., 393., 364., 359., 296., 345., 378., 304., 347., 327., 386., 28 | 365., 365., 352., 325., 321., 360., 336., 352., 353., 373., 376., 367., 29 | 335., 396., 277., 378., 360., 291., 269., 318., 328., 334., 356., 291., 30 | 360., 313., 306., 319., 349., 332., 323., 323., 351., 478., 398., 426., 31 | 439., 429., 333., 472., 436., 418., 391., 390., 416., 413., 385., 393., 32 | 376., 403., 414., 426., 364., 391., 356., 398., 393., 425., 318., 465., 33 | 558., 503., 540., 469., 486., 568., 527., 537., 466., 599., 477., 472., 34 | 456., 517., 503., 522., 476., 472., 45., 442., 541., 580., 472., 562., 35 | 423., 643., 533., 1468., 1487., 714., 1470., 1113., 972., 854., 1364., 36 | 832., 967., 920., 613., 857., 1373., 1133., 849., 1183., 847., 538., 37 | 1001., 1520., 557., 670., 636., 741., 958., 1354., 1263., 1428., 923., 38 | 1025., 1246., 1568.), 39 | "sspg" = c(124., 117., 143., 199., 240., 157., 221., 186., 142., 131., 40 | 221., 178., 136., 41 | 200., 208., 202., 152., 185., 116., 123., 136., 134., 184., 192., 279., 42 | 228., 145., 172., 179., 222., 134., 143., 169., 263., 174., 134., 182., 43 | 241., 128., 222., 165., 282., 94., 121., 73., 106., 118., 112., 157., 44 | 292., 200., 220., 144., 109., 151., 158., 73., 81., 151., 122., 117., 45 | 208., 201., 131., 162., 148., 130., 137., 375., 146., 344., 192., 115., 46 | 195., 267., 281., 213., 156., 221., 199., 76., 490., 143., 73., 237., 47 | 748., 320., 188., 607., 297., 232., 480., 622., 287., 266., 124., 297., 48 | 326., 564., 408., 325., 433., 180., 392., 109., 313., 132., 285., 139., 49 | 212., 155., 120., 28., 23., 232., 54., 81., 87., 76., 42., 102., 138., 50 | 160., 131., 145., 45., 118., 159., 73., 103., 460., 42., 13., 130., 51 | 44., 314., 219., 100., 10., 83., 41., 77., 29., 124., 15.) 52 | ), 53 | class = "data.frame", 54 | names = c("class", "glucose", "insulin", "sspg"), 55 | row.names = c( 56 | "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", 57 | "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", 58 | "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", 59 | "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50", 60 | "51", "52", "53", "54", "55", "56", "57", "58", "59", "60", "61", "62", 61 | "63", "64", "65", "66", "67", "68", "69", "70", "71", "72", "73", "74", 62 | "75", "76", "77", "78", "79", "80", "81", "82", "83", "84", "85", "86", 63 | "87", "88", "89", "90", "91", "92", "93", "94", "95", "96", "97", "98", 64 | "99", "100", "101", "102", "103", "104", "105", "106", "107", "108", 65 | "109", "110", "111", "112", "113", "114", "115", "116", "117", "118", 66 | "119", "120", "121", "122", "123", "124", "125", "126", "127", "128", 67 | "129", "130", "131", "132", "133", "134", "135", "136", "137", "138", 68 | "139", "140", "141", "142", "143", "144", "145") 69 | ) 70 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite 'mclust' R package in publications, please use:") 2 | 3 | bibentry(bibtype = "Book", 4 | title = "Model-Based Clustering, Classification, and Density Estimation Using {mclust} in {R}", 5 | author = c(person(given="Luca", family="Scrucca"), 6 | person(given="Chris", family="Fraley"), 7 | person(given=c("T.", "Brendan"), family="Murphy"), 8 | person(given=c("Adrian", "E."), family="Raftery")), 9 | publisher = "Chapman and Hall/CRC", 10 | isbn = "978-1032234953", 11 | doi = "10.1201/9781003277965", 12 | year = "2023", 13 | url = "https://mclust-org.github.io/book/") 14 | 15 | # OLD 16 | # citEntry(entry = "Article", 17 | # title = "{mclust} 5: clustering, classification and density estimation using {G}aussian finite mixture models", 18 | # author = personList(person(given="Luca", family="Scrucca"), 19 | # person(given="Michael", family="Fop"), 20 | # person(given=c("T.", "Brendan"), family="Murphy"), 21 | # person(given=c("Adrian", "E."), family="Raftery")), 22 | # journal = "The {R} Journal", 23 | # year = "2016", 24 | # volume = "8", 25 | # number = "1", 26 | # pages = "289--317", 27 | # url="https://doi.org/10.32614/RJ-2016-021", 28 | # # 29 | # textVersion = 30 | # paste("Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016)", 31 | # "mclust 5: clustering, classification and density estimation using Gaussian finite mixture models", 32 | # "The R Journal", 33 | # "8/1, pp. 289-317")) 34 | -------------------------------------------------------------------------------- /man/Baudry_etal_2010_JCGS_examples.Rd: -------------------------------------------------------------------------------- 1 | \name{Baudry_etal_2010_JCGS_examples} 2 | \alias{Baudry_etal_2010_JCGS_examples} 3 | \alias{ex4.1} 4 | \alias{ex4.2} 5 | \alias{ex4.3} 6 | \alias{ex4.4.1} 7 | \alias{ex4.4.2} 8 | \alias{Test1D} 9 | 10 | \docType{data} 11 | 12 | \title{Simulated Example Datasets From Baudry et al. (2010)} 13 | 14 | \description{ 15 | Simulated datasets used in Baudry et al. (2010) to illustrate the proposed mixture components combining method for clustering. 16 | 17 | Please see the cited article for a detailed presentation of these datasets. The data frame with name exN.M is presented in Section N.M in the paper. 18 | 19 | Test1D (not in the article) has been simulated from a Gaussian mixture distribution in R. 20 | 21 | ex4.1 and ex4.2 have been simulated from a Gaussian mixture distribution in R^2. 22 | 23 | ex4.3 has been simulated from a mixture of a uniform distribution on a square and a spherical Gaussian distribution in R^2. 24 | 25 | ex4.4.1 has been simulated from a Gaussian mixture model in R^2 26 | 27 | ex4.4.2 has been simulated from a mixture of two uniform distributions in R^3. 28 | } 29 | \usage{data(Baudry_etal_2010_JCGS_examples)} 30 | \format{ 31 | 32 | \code{ex4.1} is a data frame with 600 observations on 2 real variables. 33 | 34 | \code{ex4.2} is a data frame with 600 observations on 2 real variables. 35 | 36 | \code{ex4.3} is a data frame with 200 observations on 2 real variables. 37 | 38 | \code{ex4.4.1} is a data frame with 800 observations on 2 real variables. 39 | 40 | \code{ex4.4.2} is a data frame with 300 observations on 3 real variables. 41 | 42 | \code{Test1D} is a data frame with 200 observations on 1 real variable. 43 | 44 | } 45 | \references{ 46 | J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics}, 19(2):332-353. 47 | } 48 | \examples{ 49 | \donttest{ 50 | data(Baudry_etal_2010_JCGS_examples) 51 | 52 | output <- clustCombi(data = ex4.4.1) 53 | output # is of class clustCombi 54 | 55 | # plots the hierarchy of combined solutions, then some "entropy plots" which 56 | # may help one to select the number of classes 57 | plot(output) 58 | } 59 | 60 | } 61 | \keyword{datasets} 62 | -------------------------------------------------------------------------------- /man/EuroUnemployment.Rd: -------------------------------------------------------------------------------- 1 | \name{EuroUnemployment} 2 | \alias{EuroUnemployment} 3 | \docType{data} 4 | 5 | \title{Unemployment data for European countries in 2014} 6 | 7 | \description{ 8 | The data set contains unemployment rates for 31 European countries for the year 2014.} 9 | 10 | \usage{data(EuroUnemployment)} 11 | 12 | \format{A data frame with the following variables: 13 | 14 | \describe{ 15 | 16 | \item{TUR}{Total unemployment rate, i.e. percentage of unemployed persons aged 15-74 in the economically active population.} 17 | \item{YUR}{Youth unemployment rate, i.e. percentage of unemployed persons aged 15-24 in the economically active population.} 18 | \item{LUR}{Long-term unemployment rate, i.e. percentage of unemployed persons who have been unemployed for 12 months or more.} 19 | } 20 | 21 | } 22 | 23 | \source{Dataset downloaded from EUROSTAT \url{https://ec.europa.eu/eurostat}.} 24 | 25 | \keyword{datasets} 26 | -------------------------------------------------------------------------------- /man/GvHD.Rd: -------------------------------------------------------------------------------- 1 | \name{GvHD} 2 | \alias{GvHD} 3 | \alias{GvHD.pos} 4 | \alias{GvHD.control} 5 | 6 | \docType{data} 7 | \title{GvHD Dataset} 8 | \description{ 9 | GvHD (Graft-versus-Host Disease) data of Brinkman et al. (2007). Two samples of this flow cytometry data, one from a patient with the GvHD, and the other from a control patient. The GvHD positive and control samples consist of 9083 and 6809 observations, respectively. Both samples include four biomarker variables, namely, CD4, CD8b, CD3, and CD8. The objective of the analysis is to identify CD3+ CD4+ CD8b+ cell sub-populations present in the GvHD positive sample. 10 | 11 | A treatment of this data by combining mixtures is proposed in Baudry et al. (2010). 12 | } 13 | \usage{data(GvHD)} 14 | \format{ 15 | GvHD.pos (positive patient) is a data frame with 9083 observations on the following 4 variables, which are biomarker measurements. 16 | \describe{ 17 | \item{CD4}{} 18 | \item{CD8b}{} 19 | \item{CD3}{} 20 | \item{CD8}{} 21 | } 22 | GvHD.control (control patient) is a data frame with 6809 observations on the following 4 variables, which are biomarker measurements. 23 | \describe{ 24 | \item{CD4}{} 25 | \item{CD8b}{} 26 | \item{CD3}{} 27 | \item{CD8}{} 28 | } 29 | } 30 | \references{ 31 | R. R. Brinkman, M. Gasparetto, S.-J. J. Lee, A. J. Ribickas, J. Perkins, W. Janssen, R. Smiley and C. Smith (2007). High-content flow cytometry and temporal data analysis for defining a cellular signature of Graft-versus-Host Disease. \emph{Biology of Blood and Marrow Transplantation, 13: 691-700.} 32 | 33 | K. Lo, R. R. Brinkman, R. Gottardo (2008). Automated gating of flow cytometry data via robust model-based clustering. \emph{Cytometry A, 73: 321-332.} 34 | 35 | J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} 36 | } 37 | \examples{ 38 | \donttest{ 39 | data(GvHD) 40 | dat <- GvHD.pos[1:500,] # only a few lines for a quick example 41 | output <- clustCombi(data = dat) 42 | output # is of class clustCombi 43 | # plot the hierarchy of combined solutions 44 | plot(output, what = "classification") 45 | # plot some "entropy plots" which may help one to select the number of classes 46 | plot(output, what = "entropy") 47 | # plot the tree structure obtained from combining mixture components 48 | plot(output, what = "tree") 49 | } 50 | 51 | } 52 | \keyword{datasets} 53 | -------------------------------------------------------------------------------- /man/acidity.Rd: -------------------------------------------------------------------------------- 1 | \name{acidity} 2 | \alias{acidity} 3 | \docType{data} 4 | 5 | \title{Acidity data} 6 | 7 | \description{ 8 | Acidity index measured in a sample of 155 lakes in the Northeastern United States. 9 | Following Crawford et al. (1992, 1994), the data are expressed as log(ANC+50), where ANC is the acidity neutralising capacity value. 10 | The data were also used to fit mixture of gaussian distributions by Richardson and Green (1997), and by McLachlan and Peel (2000, Sec. 6.6.2). 11 | } 12 | 13 | \usage{data(acidity)} 14 | 15 | \source{\code{https://www.stats.bris.ac.uk/~peter/mixdata}} 16 | 17 | \references{ 18 | Crawford, S. L. (1994) An application of the Laplace method to finite mixture distribution. \emph{Journal of the American Statistical Association}, 89, 259--267. 19 | 20 | Crawford, S. L., DeGroot, M. H., Kadane, J. B., and Small, M. J. (1994) Modeling lake chemistry distributions: Approximate Bayesian methods for estimating a finite mixture model. \emph{Technometrics}, 34, 441--453. 21 | 22 | McLachlan, G. and Peel, D. (2000) \emph{Finite Mixture Models}. Wiley, New York. 23 | 24 | Richardson, S. and Green, P. J. (1997) On Bayesian analysis of mixtures with unknown number of components (with discussion). \emph{Journal of the Royal Statistical Society, Series B}, 59, 731--792. 25 | } 26 | 27 | \keyword{datasets} 28 | -------------------------------------------------------------------------------- /man/adjustedRandIndex.Rd: -------------------------------------------------------------------------------- 1 | \name{adjustedRandIndex} 2 | \alias{adjustedRandIndex} 3 | \title{ 4 | Adjusted Rand Index 5 | } 6 | \description{ 7 | Computes the adjusted Rand index comparing two classifications. 8 | } 9 | \usage{ 10 | adjustedRandIndex(x, y) 11 | } 12 | \arguments{ 13 | \item{x}{ 14 | A numeric or character vector of class labels. 15 | } 16 | \item{y}{ 17 | A numeric or character vector of class labels. 18 | The length of \code{y} should be the same as that of \code{x}. 19 | } 20 | } 21 | \value{ 22 | The adjusted Rand index comparing the two partitions (a scalar). 23 | This index has zero expected value in the case of random partition, and it is bounded above by 1 in the case of perfect agreement between two partitions. 24 | } 25 | 26 | \references{ 27 | L. Hubert and P. Arabie (1985) Comparing Partitions, \emph{Journal of the Classification}, 2, pp. 193-218. 28 | } 29 | \seealso{ 30 | \code{\link{classError}}, 31 | \code{\link{mapClass}}, 32 | \code{\link{table}} 33 | } 34 | \examples{ 35 | a <- rep(1:3, 3) 36 | a 37 | b <- rep(c("A", "B", "C"), 3) 38 | b 39 | adjustedRandIndex(a, b) 40 | 41 | a <- sample(1:3, 9, replace = TRUE) 42 | a 43 | b <- sample(c("A", "B", "C"), 9, replace = TRUE) 44 | b 45 | adjustedRandIndex(a, b) 46 | 47 | a <- rep(1:3, 4) 48 | a 49 | b <- rep(c("A", "B", "C", "D"), 3) 50 | b 51 | adjustedRandIndex(a, b) 52 | 53 | irisHCvvv <- hc(modelName = "VVV", data = iris[,-5]) 54 | cl3 <- hclass(irisHCvvv, 3) 55 | adjustedRandIndex(cl3,iris[,5]) 56 | 57 | irisBIC <- mclustBIC(iris[,-5]) 58 | adjustedRandIndex(summary(irisBIC,iris[,-5])$classification,iris[,5]) 59 | adjustedRandIndex(summary(irisBIC,iris[,-5],G=3)$classification,iris[,5]) 60 | } 61 | \keyword{cluster} 62 | -------------------------------------------------------------------------------- /man/banknote.Rd: -------------------------------------------------------------------------------- 1 | \name{banknote} 2 | \alias{banknote} 3 | \docType{data} 4 | 5 | \title{Swiss banknotes data} 6 | 7 | \description{ 8 | The data set contains six measurements made on 100 genuine and 100 counterfeit old-Swiss 1000-franc bank notes.} 9 | 10 | \usage{data(banknote)} 11 | 12 | \format{A data frame with the following variables: 13 | 14 | \describe{ 15 | \item{Status}{the status of the banknote: \code{genuine} or \code{counterfeit}} 16 | \item{Length}{Length of bill (mm)} 17 | \item{Left}{Width of left edge (mm)} 18 | \item{Right}{Width of right edge (mm)} 19 | \item{Bottom}{Bottom margin width (mm)} 20 | \item{Top}{Top margin width (mm)} 21 | \item{Diagonal}{Length of diagonal (mm)} 22 | } 23 | 24 | } 25 | 26 | \source{Flury, B. and Riedwyl, H. (1988). \emph{Multivariate Statistics: A practical approach.} London: Chapman & Hall, Tables 1.1 and 1.2, pp. 5-8.} 27 | 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /man/bic.Rd: -------------------------------------------------------------------------------- 1 | \name{bic} 2 | \alias{bic} 3 | \title{ 4 | BIC for Parameterized Gaussian Mixture Models 5 | } 6 | \description{ 7 | Computes the BIC (Bayesian Information Criterion) for parameterized 8 | mixture models given the loglikelihood, the dimension of the data, 9 | and number of mixture components in the model. 10 | } 11 | \usage{ 12 | bic(modelName, loglik, n, d, G, noise=FALSE, equalPro=FALSE, \dots) 13 | } 14 | \arguments{ 15 | \item{modelName}{ 16 | A character string indicating the model. The help file for 17 | \code{\link{mclustModelNames}} describes the available models. 18 | } 19 | \item{loglik}{ 20 | The log-likelihood for a data set with respect to the Gaussian mixture model 21 | specified in the \code{modelName} argument. 22 | } 23 | \item{n}{ 24 | The number of observations in the data used to compute \code{loglik}. 25 | } 26 | \item{d}{ 27 | The dimension of the data used to compute \code{loglik}. 28 | } 29 | \item{G}{ 30 | The number of components in the Gaussian mixture model used to compute 31 | \code{loglik}. 32 | } 33 | \item{noise}{ 34 | A logical variable indicating whether or not the model includes an 35 | optional Poisson noise component. The default is to assume no noise 36 | component. 37 | } 38 | \item{equalPro}{ 39 | A logical variable indicating whether or not the components in the 40 | model are assumed to be present in equal proportion. The default is 41 | to assume unequal mixing proportions. 42 | } 43 | \item{\dots}{ 44 | Catches unused arguments in an indirect or list call via \code{do.call}. 45 | } 46 | } 47 | \value{ 48 | The BIC or Bayesian Information Criterion for the given input arguments. 49 | } 50 | 51 | \seealso{ 52 | \code{\link{mclustBIC}}, 53 | \code{\link{nVarParams}}, 54 | \code{\link{mclustModelNames}}. 55 | } 56 | \examples{ 57 | \donttest{ 58 | n <- nrow(iris) 59 | d <- ncol(iris)-1 60 | G <- 3 61 | 62 | emEst <- me(modelName="VVI", data=iris[,-5], unmap(iris[,5])) 63 | names(emEst) 64 | 65 | args(bic) 66 | bic(modelName="VVI", loglik=emEst$loglik, n=n, d=d, G=G) 67 | # do.call("bic", emEst) ## alternative call 68 | } 69 | } 70 | \keyword{cluster} 71 | -------------------------------------------------------------------------------- /man/cdens.Rd: -------------------------------------------------------------------------------- 1 | \name{cdens} 2 | \alias{cdens} 3 | \title{ 4 | Component Density for Parameterized MVN Mixture Models 5 | } 6 | \description{ 7 | Computes component densities for observations in MVN mixture models 8 | parameterized by eigenvalue decomposition. 9 | } 10 | \usage{ 11 | cdens(data, modelName, parameters, logarithm = FALSE, warn = NULL, \dots) 12 | } 13 | \arguments{ 14 | \item{data}{ 15 | A numeric vector, matrix, or data frame of observations. Categorical 16 | variables are not allowed. If a matrix or data frame, rows 17 | correspond to observations and columns correspond to variables. 18 | } 19 | \item{modelName}{ 20 | A character string indicating the model. The help file for 21 | \code{\link{mclustModelNames}} describes the available models. 22 | } 23 | \item{parameters}{ 24 | The parameters of the model: 25 | \describe{ 26 | \item{\code{mean}}{ 27 | The mean for each component. If there is more than one component, 28 | this is a matrix whose kth column is the mean of the \emph{k}th 29 | component of the mixture model. 30 | } 31 | \item{\code{variance}}{ 32 | A list of variance parameters for the model. 33 | The components of this list depend on the model 34 | specification. See the help file for \code{\link{mclustVariance}} 35 | for details. 36 | } 37 | } 38 | } 39 | \item{logarithm}{ 40 | A logical value indicating whether or not the logarithm of the component 41 | densities should be returned. The default is to return the component 42 | densities, obtained from the log component densities by exponentiation. 43 | } 44 | \item{warn}{ 45 | A logical value indicating whether or not a warning should be issued 46 | when computations fail. The default is \code{warn=FALSE}. 47 | } 48 | \item{\dots}{ 49 | Catches unused arguments in indirect or list calls via \code{do.call}. 50 | } 51 | } 52 | \value{ 53 | A numeric matrix whose \code{[i,k]}th entry is the 54 | density or log density of observation \emph{i} in component \emph{k}. 55 | The densities are not scaled by mixing proportions. 56 | } 57 | \note{ 58 | When one or more component densities are very large in magnitude, 59 | it may be possible to compute the logarithm of the component 60 | densities but not the component densities themselves due to overflow. 61 | } 62 | 63 | \seealso{ 64 | \code{\link{cdensE}}, \dots, 65 | \code{\link{cdensVVV}}, 66 | \code{\link{dens}}, 67 | \code{\link{estep}}, 68 | \code{\link{mclustModelNames}}, 69 | \code{\link{mclustVariance}}, 70 | \code{\link{mclust.options}}, 71 | \code{\link{do.call}} 72 | } 73 | \examples{ 74 | z2 <- unmap(hclass(hcVVV(faithful),2)) # initial value for 2 class case 75 | 76 | model <- me(modelName = "EEE", data = faithful, z = z2) 77 | cdens(modelName = "EEE", data = faithful, logarithm = TRUE, 78 | parameters = model$parameters)[1:5,] 79 | 80 | data(cross) 81 | odd <- seq(1, nrow(cross), by = 2) 82 | oddBIC <- mclustBIC(cross[odd,-1]) 83 | oddModel <- mclustModel(cross[odd,-1], oddBIC) ## best parameter estimates 84 | names(oddModel) 85 | 86 | even <- odd + 1 87 | densities <- cdens(modelName = oddModel$modelName, data = cross[even,-1], 88 | parameters = oddModel$parameters) 89 | cbind(class = cross[even,1], densities)[1:5,] 90 | } 91 | \keyword{cluster} 92 | -------------------------------------------------------------------------------- /man/cdfMclust.Rd: -------------------------------------------------------------------------------- 1 | \name{cdfMclust} 2 | \alias{cdfMclust} 3 | \alias{quantileMclust} 4 | 5 | \title{ 6 | Cumulative Distribution and Quantiles for a univariate Gaussian mixture 7 | distribution 8 | } 9 | 10 | \description{ 11 | Compute the cumulative density function (cdf) or quantiles from an estimated one-dimensional Gaussian mixture fitted using \code{\link{densityMclust}}.} 12 | 13 | \usage{ 14 | cdfMclust(object, data, ngrid = 100, \dots) 15 | quantileMclust(object, p, \dots) 16 | } 17 | 18 | \arguments{ 19 | \item{object}{a \code{densityMclust} model object.} 20 | \item{data}{a numeric vector of evaluation points.} 21 | \item{ngrid}{the number of points in a regular grid to be used as evaluation points if no \code{data} are provided.} 22 | \item{p}{a numeric vector of probabilities.} 23 | \item{\dots}{further arguments passed to or from other methods.} 24 | } 25 | 26 | \details{The cdf is evaluated at points given by the optional argument \code{data}. If not provided, a regular grid of length \code{ngrid} for the evaluation points is used. 27 | 28 | The quantiles are computed using bisection linear search algorithm. 29 | } 30 | 31 | \value{ 32 | \code{cdfMclust} returns a list of \code{x} and \code{y} values providing, respectively, the evaluation points and the estimated cdf. 33 | 34 | \code{quantileMclust} returns a vector of quantiles. 35 | } 36 | 37 | \author{Luca Scrucca} 38 | 39 | \seealso{ 40 | \code{\link{densityMclust}}, 41 | \code{\link{plot.densityMclust}}. 42 | } 43 | 44 | \examples{ 45 | \donttest{ 46 | x <- c(rnorm(100), rnorm(100, 3, 2)) 47 | dens <- densityMclust(x, plot = FALSE) 48 | summary(dens, parameters = TRUE) 49 | cdf <- cdfMclust(dens) 50 | str(cdf) 51 | q <- quantileMclust(dens, p = c(0.01, 0.1, 0.5, 0.9, 0.99)) 52 | cbind(quantile = q, cdf = cdfMclust(dens, q)$y) 53 | plot(cdf, type = "l", xlab = "x", ylab = "CDF") 54 | points(q, cdfMclust(dens, q)$y, pch = 20, col = "red3") 55 | 56 | par(mfrow = c(2,2)) 57 | dens.waiting <- densityMclust(faithful$waiting) 58 | plot(cdfMclust(dens.waiting), type = "l", 59 | xlab = dens.waiting$varname, ylab = "CDF") 60 | dens.eruptions <- densityMclust(faithful$eruptions) 61 | plot(cdfMclust(dens.eruptions), type = "l", 62 | xlab = dens.eruptions$varname, ylab = "CDF") 63 | par(mfrow = c(1,1)) 64 | } 65 | } 66 | 67 | \keyword{cluster} 68 | \keyword{dplot} 69 | -------------------------------------------------------------------------------- /man/chevron.Rd: -------------------------------------------------------------------------------- 1 | \name{chevron} 2 | \alias{chevron} 3 | \title{Simulated minefield data} 4 | \usage{data(chevron)} 5 | \description{A set of simulated bivariate minefield data 6 | (1104 observations).} 7 | \references{ 8 | A. Dasgupta and A. E. Raftery (1998). 9 | Detecting features in spatial point processes with clutter via model-based 10 | clustering. 11 | \emph{Journal of the American Statistical Association 93:294-302}. 12 | 13 | C. Fraley and A.E. Raftery (1998). 14 | \emph{Computer Journal 41:578-588}. 15 | 16 | G. J. McLachlan and D. Peel (2000). 17 | \emph{Finite Mixture Models}, Wiley, pages 110-112. 18 | } 19 | 20 | \keyword{datasets} 21 | 22 | -------------------------------------------------------------------------------- /man/clPairs.Rd: -------------------------------------------------------------------------------- 1 | \name{clPairs} 2 | \alias{clPairs} 3 | \alias{clPairsLegend} 4 | 5 | \title{Pairwise Scatter Plots showing Classification} 6 | 7 | \description{ 8 | Creates a scatter plot for each pair of variables in given data. 9 | Observations in different classes are represented by different colors and symbols. 10 | } 11 | 12 | \usage{ 13 | clPairs(data, classification, 14 | symbols = NULL, colors = NULL, cex = NULL, 15 | labels = dimnames(data)[[2]], cex.labels = 1.5, 16 | gap = 0.2, grid = FALSE, \dots) 17 | 18 | clPairsLegend(x, y, class, col, pch, cex, box = TRUE, \dots) 19 | } 20 | 21 | \arguments{ 22 | \item{data}{ 23 | A numeric vector, matrix, or data frame of observations. Categorical 24 | variables are not allowed. If a matrix or data frame, rows 25 | correspond to observations and columns correspond to variables. 26 | } 27 | \item{classification}{ 28 | A numeric or character vector representing a classification of observations 29 | (rows) of \code{data}. 30 | } 31 | \item{symbols}{ 32 | Either an integer or character vector assigning a plotting symbol to each 33 | unique class in \code{classification}. Elements in \code{symbols} 34 | correspond to classes in order of appearance in the sequence of 35 | observations (the order used by the function \code{unique}). 36 | The default is given by \code{mclust.options("classPlotSymbols")}. 37 | } 38 | \item{colors}{ 39 | Either an integer or character vector assigning a color to each 40 | unique class in \code{classification}. Elements in \code{colors} 41 | correspond to classes in order of appearance in the sequence of 42 | observations (the order used by the function \code{unique}). 43 | The default is given by \code{mclust.options("classPlotColors")}. 44 | } 45 | \item{cex}{ 46 | A vector of numerical values specifying the size of the plotting 47 | symbol for each unique class in \code{classification}. Values in 48 | \code{cex} correspond to classes in order of appearance in the 49 | sequence of observations (the order used by the function \code{unique}). 50 | By default \code{cex = 1} for all classes is used. 51 | } 52 | \item{labels}{ 53 | A vector of character strings for labelling the variables. The default 54 | is to use the column dimension names of \code{data}. 55 | } 56 | \item{cex.labels}{ 57 | A numerical value specifying the size of the text labels. 58 | } 59 | \item{gap}{ 60 | An argument specifying the distance between subplots (see \code{\link{pairs}}). 61 | } 62 | \item{grid}{ 63 | A logical specifying if grid lines should be added to panels (see \code{\link{grid}}). 64 | } 65 | \item{x,y}{ 66 | The x and y co-ordinates with respect to a graphic device having 67 | plotting region coordinates \code{par("usr" = c(0,1,0,1))}. 68 | } 69 | \item{class}{ 70 | The class labels. 71 | } 72 | \item{box}{ 73 | A logical, if \code{TRUE} then a box is drawn around the current plot figure. 74 | } 75 | \item{col, pch}{ 76 | The colors and plotting symbols appearing in the legend. 77 | } 78 | \item{\dots}{ 79 | For a \code{clPairs} call may be additional arguments to be passed to \code{\link{pairs}}. 80 | For a \code{clPairsLegend} call may be additional arguments to be passed to \code{\link{legend}}. 81 | } 82 | } 83 | 84 | \details{ 85 | The function \code{clPairs()} draws scatter plots on the current graphics device for each combination of variables in \code{data}. Observations of different classifications are labeled with different symbols. 86 | 87 | The function \code{clPairsLegend()} can be used to add a legend. See examples below. 88 | } 89 | 90 | \value{ 91 | The function \code{clPairs()} invisibly returns a list with the following components: 92 | \item{class}{A character vector of class labels.} 93 | \item{col}{A vector of colors used for each class.} 94 | \item{pch}{A vector of plotting symbols used for each class.} 95 | } 96 | 97 | \seealso{ 98 | \code{\link{pairs}}, 99 | \code{\link{coordProj}}, 100 | \code{\link{mclust.options}} 101 | } 102 | \examples{ 103 | clPairs(iris[,1:4], cl = iris$Species) 104 | 105 | clp <- clPairs(iris[,1:4], cl = iris$Species, lower.panel = NULL) 106 | clPairsLegend(0.1, 0.4, class = clp$class, 107 | col = clp$col, pch = clp$pch, 108 | title = "Iris data") 109 | 110 | } 111 | \keyword{cluster} 112 | -------------------------------------------------------------------------------- /man/classError.Rd: -------------------------------------------------------------------------------- 1 | \name{classError} 2 | \alias{classError} 3 | 4 | \title{Classification error} 5 | 6 | \description{ 7 | Computes the errore rate of a given classification relative to the known classes, and the location of misclassified data points.} 8 | 9 | \usage{ 10 | classError(classification, class) 11 | } 12 | \arguments{ 13 | \item{classification}{ 14 | A numeric, character vector or factor specifying the predicted class 15 | labels. Must have the same length as \code{class}. 16 | } 17 | \item{class}{ 18 | A numeric, character vector or factor of known true class labels. 19 | Must have the same length as \code{classification}. 20 | } 21 | 22 | } 23 | \value{ 24 | A list with the following two components: 25 | \item{misclassified}{ 26 | The indexes of the misclassified data points in a minimum error 27 | mapping between the predicted classification and the known true classes. 28 | } 29 | \item{errorRate}{ 30 | The error rate corresponding to a minimum error mapping 31 | between the predicted classification and the known true classes. 32 | } 33 | } 34 | 35 | \details{ 36 | If more than one mapping between predicted classification and the known 37 | truth corresponds to the minimum number of classification errors, 38 | only one possible set of misclassified observations is returned. 39 | } 40 | 41 | \seealso{ 42 | \code{\link{map}} 43 | \code{\link{mapClass}}, 44 | \code{\link{table}} 45 | } 46 | \examples{ 47 | (a <- rep(1:3, 3)) 48 | (b <- rep(c("A", "B", "C"), 3)) 49 | classError(a, b) 50 | 51 | (a <- sample(1:3, 9, replace = TRUE)) 52 | (b <- sample(c("A", "B", "C"), 9, replace = TRUE)) 53 | classError(a, b) 54 | 55 | class <- factor(c(5,5,5,2,5,3,1,2,1,1), levels = 1:5) 56 | probs <- matrix(c(0.15, 0.01, 0.08, 0.23, 0.01, 0.23, 0.59, 0.02, 0.38, 0.45, 57 | 0.36, 0.05, 0.30, 0.46, 0.15, 0.13, 0.06, 0.19, 0.27, 0.17, 58 | 0.40, 0.34, 0.18, 0.04, 0.47, 0.34, 0.32, 0.01, 0.03, 0.11, 59 | 0.04, 0.04, 0.09, 0.05, 0.28, 0.27, 0.02, 0.03, 0.12, 0.25, 60 | 0.05, 0.56, 0.35, 0.22, 0.09, 0.03, 0.01, 0.75, 0.20, 0.02), 61 | nrow = 10, ncol = 5) 62 | cbind(class, probs, map = map(probs)) 63 | classError(map(probs), class) 64 | } 65 | \keyword{cluster} 66 | -------------------------------------------------------------------------------- /man/clustCombi-internals.Rd: -------------------------------------------------------------------------------- 1 | \name{clustCombi-internal} 2 | \title{Internal clustCombi functions} 3 | \alias{combi} 4 | \alias{pcws2_reg} 5 | \alias{pcws3_reg} 6 | \alias{xlog} 7 | \description{ 8 | Internal functions not intended to be called directly by users. 9 | } 10 | 11 | \keyword{internal} 12 | -------------------------------------------------------------------------------- /man/clustCombiOptim.Rd: -------------------------------------------------------------------------------- 1 | \name{clustCombiOptim} 2 | \alias{clustCombiOptim} 3 | 4 | \title{Optimal number of clusters obtained by combining mixture components} 5 | 6 | \description{ 7 | Return the optimal number of clusters by combining mixture components based on the entropy method discussed in the reference given below. 8 | } 9 | 10 | \usage{ 11 | clustCombiOptim(object, reg = 2, plot = FALSE, \dots) 12 | } 13 | \arguments{ 14 | \item{object}{ 15 | An object of class \code{'clustCombi'} resulting from a call to \code{\link{clustCombi}}. 16 | } 17 | \item{reg}{ 18 | The number of parts of the piecewise linear regression for the entropy plots. 19 | Choose 2 for a two-segment piecewise linear regression model (i.e. 1 change-point), and 3 for a three-segment piecewise linear regression model (i.e. 3 change-points). 20 | } 21 | \item{plot}{ 22 | Logical, if \code{TRUE} an entropy plot is also produced. 23 | } 24 | \item{\dots}{Further arguments passed to or from other methods.} 25 | } 26 | 27 | \value{ 28 | The function returns a list with the following components: 29 | \item{numClusters.combi}{The estimated number of clusters.} 30 | \item{z.combi}{A matrix whose \emph{[i,k]}th entry is the probability that observation \emph{i} in the data belongs to the \emph{k}th cluster.} 31 | \item{cluster.combi}{The clustering labels.} 32 | } 33 | \references{ 34 | J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} 35 | } 36 | \author{ 37 | J.-P. Baudry, A. E. Raftery, L. Scrucca 38 | } 39 | \seealso{ 40 | \code{\link{combiPlot}}, \code{\link{entPlot}}, \code{\link{clustCombi}} 41 | } 42 | \examples{ 43 | data(Baudry_etal_2010_JCGS_examples) 44 | output <- clustCombi(data = ex4.1) 45 | combiOptim <- clustCombiOptim(output) 46 | str(combiOptim) 47 | 48 | # plot optimal clustering with alpha color transparency proportional to uncertainty 49 | zmax <- apply(combiOptim$z.combi, 1, max) 50 | col <- mclust.options("classPlotColors")[combiOptim$cluster.combi] 51 | vadjustcolor <- Vectorize(adjustcolor) 52 | alphacol = (zmax - 1/combiOptim$numClusters.combi)/(1-1/combiOptim$numClusters.combi) 53 | col <- vadjustcolor(col, alpha.f = alphacol) 54 | plot(ex4.1, col = col, pch = mclust.options("classPlotSymbols")[combiOptim$cluster.combi]) 55 | } 56 | 57 | \keyword{ cluster } 58 | -------------------------------------------------------------------------------- /man/combMat.Rd: -------------------------------------------------------------------------------- 1 | \name{combMat} 2 | \alias{combMat} 3 | \title{ 4 | Combining Matrix 5 | } 6 | \description{ 7 | Create a combining matrix 8 | } 9 | \usage{ 10 | combMat(K, l1, l2) 11 | } 12 | \arguments{ 13 | \item{K}{ 14 | The original number of classes: the matrix will define a combining from K to (K-1) classes. 15 | } 16 | \item{l1}{ 17 | Label of one of the two classes to be combined. 18 | } 19 | \item{l2}{ 20 | Label of the other class to be combined. 21 | } 22 | } 23 | \value{ 24 | If \code{z} is a vector (length \emph{K}) whose \emph{k}th entry is the probability that an observation belongs to the \emph{k}th class in a \emph{K}-classes classification, then \code{combiM \%*\% z} is the vector (length \emph{K-1}) whose \emph{k}th entry is the probability that the observation belongs to the \emph{k}th class in the \emph{K-1}-classes classification obtained by merging classes \code{l1} and \code{l2} in the initial classification. 25 | } 26 | \author{ 27 | J.-P. Baudry, A. E. Raftery, L. Scrucca 28 | } 29 | \seealso{ 30 | \code{\link{clustCombi}}, \code{\link{combiPlot}} 31 | } 32 | %\examples{} 33 | \keyword{ cluster } 34 | 35 | -------------------------------------------------------------------------------- /man/combiPlot.Rd: -------------------------------------------------------------------------------- 1 | \name{combiPlot} 2 | \alias{combiPlot} 3 | \title{ 4 | Plot Classifications Corresponding to Successive Combined Solutions 5 | } 6 | \description{ 7 | Plot classifications corresponding to successive combined solutions. 8 | } 9 | \usage{ 10 | combiPlot(data, z, combiM, \dots) 11 | } 12 | \arguments{ 13 | \item{data}{ 14 | The data. 15 | } 16 | \item{z}{ 17 | A matrix whose [i,k]th entry is the probability that observation i in the data belongs to the kth class, for the initial solution (ie before any combining). Typically, the one returned by \code{Mclust}/BIC. 18 | } 19 | \item{combiM}{ 20 | A "combining matrix" (as provided by \code{\link{clustCombi}}), ie a matrix whose kth row contains only zeros, but in columns corresponding to the labels of the classes in the initial solution to be merged together to get the combined solution. 21 | } 22 | \item{\dots}{ 23 | Other arguments to be passed to the \code{\link{Mclust}} plot functions. 24 | } 25 | } 26 | \value{ 27 | Plot the classifications obtained by MAP from the matrix \code{t(combiM \%*\% t(z))}, which is the matrix whose [i,k]th entry is the probability that observation i in the data belongs to the kth class, according to the combined solution obtained by merging (according to \code{combiM}) the initial solution described by \code{z}. 28 | } 29 | \references{ 30 | J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} 31 | } 32 | \author{ 33 | J.-P. Baudry, A. E. Raftery, L. Scrucca 34 | } 35 | \seealso{ 36 | \code{\link{clustCombi}}, \code{\link{combMat}}, \code{\link{clustCombi}} 37 | } 38 | \examples{ 39 | \donttest{ 40 | data(Baudry_etal_2010_JCGS_examples) 41 | MclustOutput <- Mclust(ex4.1) 42 | 43 | MclustOutput$G # Mclust/BIC selected 6 classes 44 | 45 | par(mfrow=c(2,2)) 46 | 47 | combiM0 <- diag(6) # is the identity matrix 48 | # no merging: plot the initial solution, given by z 49 | combiPlot(ex4.1, MclustOutput$z, combiM0, cex = 3) 50 | title("No combining") 51 | 52 | combiM1 <- combMat(6, 1, 2) # let's merge classes labeled 1 and 2 53 | combiM1 54 | combiPlot(ex4.1, MclustOutput$z, combiM1) 55 | title("Combine 1 and 2") 56 | 57 | # let's merge classes labeled 1 and 2, and then components labeled (in this 58 | # new 5-classes combined solution) 1 and 2 59 | combiM2 <- combMat(5, 1, 2) \%*\% combMat(6, 1, 2) 60 | combiM2 61 | combiPlot(ex4.1, MclustOutput$z, combiM2) 62 | title("Combine 1, 2 and then 1 and 2 again") 63 | 64 | plot(0,0,type="n", xlab = "", ylab = "", axes = FALSE) 65 | legend("center", legend = 1:6, 66 | col = mclust.options("classPlotColors"), 67 | pch = mclust.options("classPlotSymbols"), 68 | title = "Class labels:")} 69 | } 70 | 71 | \keyword{cluster} 72 | -------------------------------------------------------------------------------- /man/combiTree.Rd: -------------------------------------------------------------------------------- 1 | \name{combiTree} 2 | \alias{combiTree} 3 | 4 | \title{Tree structure obtained from combining mixture components} 5 | 6 | \description{The method implemented in \code{\link{clustCombi}} can be used for combining Gaussian mixture components for clustering. This provides a hierarchical structure which can be graphically represented as a tree.} 7 | 8 | \usage{ 9 | combiTree(object, type = c("triangle", "rectangle"), 10 | yaxis = c("entropy", "step"), 11 | edgePar = list(col = "darkgray", lwd = 2), 12 | \dots) 13 | } 14 | 15 | \arguments{ 16 | \item{object}{ 17 | An object of class \code{'clustCombi'} resulting from a call to \code{\link{clustCombi}}. 18 | } 19 | \item{type}{ 20 | A string specifying the dendrogram's type. Possible values are \code{"triangle"} (default), and \code{"rectangle"}. 21 | } 22 | \item{yaxis}{ 23 | A string specifying the quantity used to draw the vertical axis. Possible values are \code{"entropy"} (default), and \code{"step"}. 24 | } 25 | \item{edgePar}{ 26 | A list of plotting parameters. See \code{\link[stats]{dendrogram}}. 27 | } 28 | \item{\dots}{Further arguments passed to or from other methods.} 29 | } 30 | %\details{} 31 | \value{ 32 | The function always draw a tree and invisibly returns an object of class \code{'dendrogram'} for fine tuning. 33 | } 34 | %\references{} 35 | \author{L. Scrucca} 36 | %\note{} 37 | 38 | \seealso{\code{\link{clustCombi}}} 39 | 40 | \examples{ 41 | \donttest{ 42 | data(Baudry_etal_2010_JCGS_examples) 43 | output <- clustCombi(data = ex4.1) 44 | combiTree(output) 45 | combiTree(output, type = "rectangle") 46 | combiTree(output, yaxis = "step") 47 | combiTree(output, type = "rectangle", yaxis = "step") 48 | } 49 | } 50 | 51 | \keyword{cluster} 52 | \keyword{hplot} 53 | -------------------------------------------------------------------------------- /man/covw.Rd: -------------------------------------------------------------------------------- 1 | \name{covw} 2 | \alias{covw} 3 | 4 | \title{Weighted means, covariance and scattering matrices conditioning on a weighted matrix} 5 | 6 | \description{ 7 | Compute efficiently (via Fortran code) the means, covariance and scattering matrices conditioning on a weighted or indicator matrix 8 | } 9 | \usage{ 10 | covw(X, Z, normalize = TRUE) 11 | } 12 | 13 | \arguments{ 14 | \item{X}{A \eqn{(n x p)} data matrix, with \eqn{n} observations on \eqn{p} variables.} 15 | \item{Z}{A \eqn{(n x G)} matrix of weights, with \eqn{G} number of groups.} 16 | \item{normalize}{A logical indicating if rows of \code{Z} should be normalized to sum to one.} 17 | } 18 | 19 | \value{A list with the following components: 20 | \item{mean}{A \eqn{(p x G)} matrix of weighted means.} 21 | \item{S}{A \eqn{(p x p x G)} array of weighted covariance matrices.} 22 | \item{W}{A \eqn{(p x p x G)} array of weighted scattering matrices.} 23 | 24 | } 25 | 26 | %\seealso{} 27 | \author{M. Fop and L. Scrucca} 28 | 29 | \examples{ 30 | # Z as an indicator matrix 31 | X <- iris[,1:4] 32 | Z <- unmap(iris$Species) 33 | str(covw(X, Z)) 34 | # Z as a matrix of weights 35 | mod <- Mclust(X, G = 3, modelNames = "VVV") 36 | str(covw(X, mod$z)) 37 | } 38 | 39 | \keyword{multivariate} 40 | -------------------------------------------------------------------------------- /man/crimcoords.Rd: -------------------------------------------------------------------------------- 1 | \name{crimcoords} 2 | \alias{crimcoords} 3 | \alias{print.crimcoords} 4 | \alias{summary.crimcoords} 5 | \alias{print.summary.crimcoords} 6 | \alias{plot.crimcoords} 7 | 8 | \title{Discriminant coordinates data projection} 9 | 10 | \description{ 11 | Compute the discriminant coordinates or crimcoords obtained by projecting the observed data from multiple groups onto the discriminant subspace. 12 | The optimal projection subspace is given by the linear transformation of the original variables that maximizes the ratio of the between-groups covariance (which represents groups separation) to the pooled within-group covariance (which represents within-group dispersion).} 13 | 14 | \usage{ 15 | crimcoords(data, classification, 16 | numdir = NULL, 17 | unbiased = FALSE, 18 | \dots) 19 | 20 | \method{summary}{crimcoords}(object, numdir, \dots) 21 | 22 | \method{plot}{crimcoords}(x, \dots) 23 | } 24 | 25 | \arguments{ 26 | 27 | \item{data}{ 28 | A numeric vector, matrix, or data frame of observations. Categorical 29 | variables are not allowed. If a matrix or data frame, rows 30 | correspond to observations and columns correspond to variables. 31 | } 32 | 33 | \item{classification}{ 34 | A vector (numerical, character string, or factor) giving the 35 | groups classification (either the known class labels or the estimated 36 | clusters) for the observed data.} 37 | 38 | \item{numdir}{ 39 | An integer value specifying the number of directions of the 40 | discriminant subspace to return. If not provided, the maximal number of 41 | directions are returned (which is given by the number of non-null 42 | eigenvalues, the minimum among the number of variables and the number 43 | of groups minus one). 44 | However, since the effectiveness of the discriminant coordinates in 45 | highlighting the separation of groups is decreasing, it might be useful 46 | to provide a smaller value, say 2 or 3.} 47 | 48 | \item{unbiased}{ 49 | A logical specifying if unbiased estimates should be used for the 50 | between-groups and within-groups covariances. By default 51 | \code{unbiased = FALSE} so MLE estimates are used. 52 | Note that the use of unbiased or MLE estimates only changes the 53 | eigenvalues and eigenvectors of the generalized eigendecomposition by 54 | a constant of proportionality, so the discriminant coordinates or 55 | crimcoords are essentially the same.} 56 | 57 | \item{object, x}{ 58 | An object of class \code{crimcoords} as returned by \code{crimcoords()} function.} 59 | 60 | \item{\dots}{further arguments passed to or from other methods.} 61 | } 62 | 63 | \value{ 64 | A list of class \code{crimcoords} with the following components: 65 | 66 | \item{means}{A matrix of within-groups means.} 67 | \item{B}{The between-groups covariance matrix.} 68 | \item{W}{The pooled within-groups covariance matrix.} 69 | \item{evalues}{A vector of eigenvalues.} 70 | \item{basis}{A matrix of eigenvectors specifying the basis of the 71 | discriminant subspace.} 72 | \item{projection}{A matrix of projected data points onto the discriminant 73 | subspace.} 74 | \item{classification}{A vector giving the groups classification.} 75 | } 76 | 77 | \references{ 78 | Gnanadesikan, R. (1977) \emph{Methods for Statistical Data Analysis of Multivariate Observations}. John Wiley 1& Sons, Sec. 4.2. 79 | 80 | Flury, B. (1997) \emph{A First Course in Multivariate Statistics}. Springer, Sec. 7.3. 81 | } 82 | 83 | \author{ 84 | Luca Scrucca \email{luca.scrucca@unipg.it} 85 | } 86 | 87 | %\note{} 88 | 89 | \seealso{\code{\link{MclustDR}}, \code{\link{clPairs}}.} 90 | 91 | \examples{ 92 | # discriminant coordinates for the iris data using known classes 93 | data("iris") 94 | CRIMCOORDS = crimcoords(iris[,-5], iris$Species) 95 | summary(CRIMCOORDS) 96 | plot(CRIMCOORDS) 97 | 98 | # banknote data 99 | data("banknote") 100 | 101 | # discriminant coordinate on known classes 102 | CRIMCOORDS = crimcoords(banknote[,-1], banknote$Status) 103 | summary(CRIMCOORDS) 104 | plot(CRIMCOORDS) 105 | 106 | # discriminant coordinates on estimated clusters 107 | mod = Mclust(banknote[,-1]) 108 | CRIMCOORDS = crimcoords(banknote[,-1], mod$classification) 109 | summary(CRIMCOORDS) 110 | plot(CRIMCOORDS) 111 | plot(CRIMCOORDS$projection, type = "n") 112 | text(CRIMCOORDS$projection, cex = 0.8, 113 | labels = strtrim(banknote$Status, 2), 114 | col = mclust.options("classPlotColors")[1:mod$G][mod$classification]) 115 | } 116 | 117 | \keyword{multivariate} 118 | -------------------------------------------------------------------------------- /man/cross.Rd: -------------------------------------------------------------------------------- 1 | \name{cross} 2 | \alias{cross} 3 | 4 | \title{Simulated Cross Data} 5 | 6 | \usage{data(cross)} 7 | 8 | \description{ 9 | A 500 by 3 matrix in which the first column is the classification and 10 | the remaining columns are two data from a simulation of two crossed 11 | elliptical Gaussians. 12 | } 13 | 14 | \examples{ 15 | # This dataset was created as follows 16 | \donttest{ 17 | n <- 250 18 | set.seed(0) 19 | cross <- rbind(matrix(rnorm(n*2), n, 2) \%*\% diag(c(1,9)), 20 | matrix(rnorm(n*2), n, 2) \%*\% diag(c(1,9))[,2:1]) 21 | cross <- cbind(c(rep(1,n),rep(2,n)), cross) 22 | } 23 | } 24 | 25 | \keyword{datasets} 26 | -------------------------------------------------------------------------------- /man/cvMclustDA.Rd: -------------------------------------------------------------------------------- 1 | \name{cvMclustDA} 2 | \alias{cvMclustDA} 3 | 4 | \title{MclustDA cross-validation} 5 | 6 | \description{ 7 | V-fold cross-validation for classification models based on Gaussian 8 | finite mixture modelling. 9 | } 10 | 11 | \usage{ 12 | cvMclustDA(object, nfold = 10, 13 | prop = object$prop, 14 | verbose = interactive(), 15 | \dots) 16 | } 17 | 18 | \arguments{ 19 | \item{object}{ 20 | An object of class \code{'MclustDA'} resulting from a call to 21 | \code{\link{MclustDA}}. 22 | } 23 | \item{nfold}{ 24 | An integer specifying the number of folds (by defaul 10-fold CV is 25 | used). 26 | } 27 | 28 | \item{prop}{ 29 | A vector of class prior probabilities, which if not provided default 30 | to the class proportions in the training data. 31 | } 32 | 33 | \item{verbose}{ 34 | A logical controlling if a text progress bar is displayed during 35 | the cross-validation procedure. By default is \code{TRUE} if the 36 | session is interactive, and \code{FALSE} otherwise. 37 | } 38 | 39 | \item{\dots }{Further arguments passed to or from other methods.} 40 | } 41 | 42 | \details{ 43 | The function implements V-fold cross-validation for classification 44 | models fitted by \code{\link{MclustDA}}. 45 | Classification error and Brier score are the metrics returned, but other 46 | metrics can be computed using the output returned by this function 47 | (see Examples section below). 48 | } 49 | 50 | \value{ 51 | The function returns a list with the following components: 52 | 53 | \item{classification}{a factor of cross-validated class labels.} 54 | 55 | \item{z}{a matrix containing the cross-validated probabilites for class assignment.} 56 | 57 | \item{ce}{the cross-validation classification error.} 58 | 59 | \item{se.ce}{the standard error of the cross-validated classification error.} 60 | 61 | \item{brier}{the cross-validation Brier score.} 62 | 63 | \item{se.brier}{the standard error of the cross-validated Brier score.} 64 | } 65 | 66 | \author{Luca Scrucca} 67 | 68 | \seealso{ 69 | \code{\link{MclustDA}}, 70 | \code{\link{predict.MclustDA}}, 71 | \code{\link{classError}}, 72 | \code{\link{BrierScore}} 73 | } 74 | 75 | \examples{ 76 | \donttest{ 77 | # Iris data 78 | Class <- iris$Species 79 | X <- iris[,1:4] 80 | 81 | ## EDDA model with common covariance (essentially equivalent to linear discriminant analysis) 82 | irisEDDA <- MclustDA(X, Class, modelType = "EDDA", modelNames = "EEE") 83 | cv <- cvMclustDA(irisEDDA) # 10-fold CV (default) 84 | str(cv) 85 | cv <- cvMclustDA(irisEDDA, nfold = length(Class)) # LOO-CV 86 | str(cv) 87 | 88 | ## MclustDA model selected by BIC 89 | irisMclustDA <- MclustDA(X, Class) 90 | cv <- cvMclustDA(irisMclustDA) # 10-fold CV (default) 91 | str(cv) 92 | 93 | # Banknote data 94 | data("banknote") 95 | Class <- banknote$Status 96 | X <- banknote[,2:7] 97 | 98 | ## EDDA model selected by BIC 99 | banknoteEDDA <- MclustDA(X, Class, modelType = "EDDA") 100 | cv <- cvMclustDA(banknoteEDDA) # 10-fold CV (default) 101 | str(cv) 102 | 103 | (ConfusionMatrix <- table(Pred = cv$classification, Class)) 104 | TP <- ConfusionMatrix[1,1] 105 | FP <- ConfusionMatrix[1,2] 106 | FN <- ConfusionMatrix[2,1] 107 | TN <- ConfusionMatrix[2,2] 108 | (Sensitivity <- TP/(TP+FN)) 109 | (Specificity <- TN/(FP+TN)) 110 | } 111 | } 112 | 113 | \keyword{multivariate} 114 | -------------------------------------------------------------------------------- /man/decomp2sigma.Rd: -------------------------------------------------------------------------------- 1 | \name{decomp2sigma} 2 | \alias{decomp2sigma} 3 | \title{ 4 | Convert mixture component covariances to matrix form 5 | } 6 | \description{ 7 | Converts covariances from a parameterization by eigenvalue decomposition 8 | or cholesky factorization to representation as a 3-D array. 9 | } 10 | \usage{ 11 | decomp2sigma(d, G, scale, shape, orientation, \dots) 12 | } 13 | \arguments{ 14 | \item{d}{ 15 | The dimension of the data. 16 | } 17 | \item{G}{ 18 | The number of components in the mixture model. 19 | } 20 | \item{scale}{ 21 | Either a \emph{G}-vector giving the scale of the covariance (the 22 | \emph{d}th root of its determinant) for each component in the 23 | mixture model, or a single numeric value if the scale is the same 24 | for each component. 25 | } 26 | \item{shape}{ 27 | Either a \emph{G} by \emph{d} matrix in which the \emph{k}th column 28 | is the shape of the covariance matrix (normalized to have 29 | determinant 1) for the \emph{k}th component, or a \emph{d}-vector 30 | giving a common shape for all components. 31 | } 32 | \item{orientation}{ 33 | Either a \emph{d} by \emph{d} by \emph{G} array whose \code{[,,k]}th 34 | entry is the orthonomal matrix whose columns are the eigenvectors 35 | of the covariance matrix of the \emph{k}th component, or a 36 | \emph{d} by \emph{d} orthonormal matrix if the mixture components have a common 37 | orientation. The \code{orientation} component of \code{decomp} can 38 | be omitted in spherical and diagonal models, for which the principal 39 | components are parallel to the coordinate axes so that the 40 | orientation matrix is the identity. 41 | } 42 | \item{\dots}{ 43 | Catches unused arguments from an indirect or list call via \code{do.call}. 44 | } 45 | } 46 | \value{ 47 | A 3-D array whose \code{[,,k]}th component is the 48 | covariance matrix of the \emph{k}th component in an MVN mixture model. 49 | } 50 | 51 | \seealso{ 52 | \code{\link{sigma2decomp}} 53 | } 54 | \examples{ 55 | meEst <- meVEV(iris[,-5], unmap(iris[,5])) 56 | names(meEst) 57 | meEst$parameters$variance 58 | 59 | dec <- meEst$parameters$variance 60 | decomp2sigma(d=dec$d, G=dec$G, shape=dec$shape, scale=dec$scale, 61 | orientation = dec$orientation) 62 | \donttest{ 63 | do.call("decomp2sigma", dec) ## alternative call 64 | } 65 | } 66 | \keyword{cluster} 67 | -------------------------------------------------------------------------------- /man/dens.Rd: -------------------------------------------------------------------------------- 1 | \name{dens} 2 | \alias{dens} 3 | \title{ 4 | Density for Parameterized MVN Mixtures 5 | } 6 | \description{ 7 | Computes densities of observations in parameterized MVN mixtures. 8 | } 9 | \usage{ 10 | dens(data, modelName, parameters, logarithm = FALSE, warn=NULL, \dots) 11 | } 12 | \arguments{ 13 | \item{data}{ 14 | A numeric vector, matrix, or data frame of observations. Categorical 15 | variables are not allowed. If a matrix or data frame, rows 16 | correspond to observations and columns correspond to variables. 17 | } 18 | \item{modelName}{ 19 | A character string indicating the model. The help file for 20 | \code{\link{mclustModelNames}} describes the available models. 21 | } 22 | \item{parameters}{ 23 | The parameters of the model: 24 | \describe{ 25 | \item{\code{pro}}{ 26 | The vector of mixing proportions for the components of the mixture. 27 | } 28 | \item{\code{mean}}{ 29 | The mean for each component. If there is more than one component, 30 | this is a matrix whose kth column is the mean of the \emph{k}th 31 | component of the mixture model. 32 | } 33 | \item{\code{variance}}{ 34 | A list of variance parameters for the model. 35 | The components of this list depend on the model 36 | specification. See the help file for \code{\link{mclustVariance}} 37 | for details. 38 | } 39 | 40 | } 41 | } 42 | \item{logarithm}{ 43 | A logical value indicating whether or not the logarithm of the component 44 | densities should be returned. The default is to return the component 45 | densities, obtained from the log component densities by exponentiation. 46 | } 47 | \item{warn}{ 48 | A logical value indicating whether or not a warning should be issued 49 | when computations fail. The default is \code{warn=FALSE}. 50 | } 51 | \item{\dots}{ 52 | Catches unused arguments in indirect or list calls via \code{do.call}. 53 | } 54 | } 55 | \value{ 56 | A numeric vector whose \emph{i}th component is the density of the 57 | \emph{ith} observation in \code{data} in the MVN mixture specified 58 | by \code{parameters}. 59 | } 60 | 61 | \seealso{ 62 | \code{\link{cdens}}, 63 | \code{\link{mclust.options}}, 64 | \code{\link{do.call}} 65 | } 66 | \examples{ 67 | \donttest{ 68 | faithfulModel <- Mclust(faithful) 69 | Dens <- dens(modelName = faithfulModel$modelName, data = faithful, 70 | parameters = faithfulModel$parameters) 71 | Dens 72 | 73 | ## alternative call 74 | do.call("dens", faithfulModel)} 75 | } 76 | \keyword{cluster} 77 | 78 | -------------------------------------------------------------------------------- /man/densityMclust.Rd: -------------------------------------------------------------------------------- 1 | \name{densityMclust} 2 | \alias{densityMclust} 3 | 4 | \title{Density Estimation via Model-Based Clustering} 5 | 6 | \description{ 7 | Produces a density estimate for each data point using a Gaussian finite 8 | mixture model from \code{Mclust}. 9 | } 10 | 11 | \usage{ 12 | densityMclust(data, \dots, plot = TRUE) 13 | } 14 | 15 | \arguments{ 16 | \item{data}{ 17 | A numeric vector, matrix, or data frame of observations. Categorical 18 | variables are not allowed. If a matrix or data frame, rows 19 | correspond to observations and columns correspond to variables. 20 | } 21 | \item{\dots }{ 22 | Additional arguments for the \code{\link{Mclust}} function. 23 | In particular, setting the arguments \code{G} and \code{modelNames} 24 | allow to specify the number of mixture components and the type of 25 | model to be fitted. By default an "optimal" model is selected based 26 | on the BIC criterion. 27 | } 28 | \item{plot}{ 29 | A logical value specifying if the estimated density should be 30 | plotted. For more contols on the resulting graph see the associated 31 | \code{\link{plot.densityMclust}} method. 32 | } 33 | } 34 | 35 | \value{ 36 | An object of class \code{densityMclust}, which inherits from 37 | \code{Mclust}. This contains all the components described in 38 | \code{\link{Mclust}} and the additional element: 39 | \item{density}{The density evaluated at the input \code{data} 40 | computed from the estimated model.} 41 | } 42 | 43 | %\details{} 44 | 45 | \references{ 46 | Scrucca L., Fraley C., Murphy T. B. and Raftery A. E. (2023) \emph{Model-Based Clustering, Classification, and Density Estimation Using mclust in R}. Chapman & Hall/CRC, ISBN: 978-1032234953, https://mclust-org.github.io/book/ 47 | 48 | Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. 49 | 50 | Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, \emph{Journal of the American Statistical Association}, 97/458, pp. 611-631. 51 | } 52 | 53 | \author{Revised version by Luca Scrucca based on 54 | the original code by C. Fraley and A.E. Raftery.} 55 | 56 | \seealso{ 57 | \code{\link{plot.densityMclust}}, 58 | \code{\link{Mclust}}, 59 | \code{\link{summary.Mclust}}, 60 | \code{\link{predict.densityMclust}}. 61 | } 62 | 63 | \examples{ 64 | dens <- densityMclust(faithful$waiting) 65 | summary(dens) 66 | summary(dens, parameters = TRUE) 67 | plot(dens, what = "BIC", legendArgs = list(x = "topright")) 68 | plot(dens, what = "density", data = faithful$waiting) 69 | 70 | dens <- densityMclust(faithful, modelNames = "EEE", G = 3, plot = FALSE) 71 | summary(dens) 72 | summary(dens, parameters = TRUE) 73 | plot(dens, what = "density", data = faithful, 74 | drawlabels = FALSE, points.pch = 20) 75 | plot(dens, what = "density", type = "hdr") 76 | plot(dens, what = "density", type = "hdr", prob = c(0.1, 0.9)) 77 | plot(dens, what = "density", type = "hdr", data = faithful) 78 | plot(dens, what = "density", type = "persp") 79 | 80 | \donttest{ 81 | dens <- densityMclust(iris[,1:4], G = 2) 82 | summary(dens, parameters = TRUE) 83 | plot(dens, what = "density", data = iris[,1:4], 84 | col = "slategrey", drawlabels = FALSE, nlevels = 7) 85 | plot(dens, what = "density", type = "hdr", data = iris[,1:4]) 86 | plot(dens, what = "density", type = "persp", col = grey(0.9)) 87 | } 88 | } 89 | 90 | \keyword{cluster} 91 | -------------------------------------------------------------------------------- /man/densityMclust.diagnostic.Rd: -------------------------------------------------------------------------------- 1 | \name{densityMclust.diagnostic} 2 | \alias{densityMclust.diagnostic} 3 | 4 | \title{Diagnostic plots for \code{mclustDensity} estimation} 5 | 6 | \description{ 7 | Diagnostic plots for density estimation. Only available for the one-dimensional case. 8 | } 9 | 10 | \usage{ 11 | densityMclust.diagnostic(object, type = c("cdf", "qq"), 12 | col = c("black", "black"), 13 | lwd = c(2,1), lty = c(1,1), 14 | legend = TRUE, grid = TRUE, 15 | \dots) 16 | } 17 | 18 | \arguments{ 19 | \item{object}{An object of class \code{'mclustDensity'} obtained from a call to \code{\link{densityMclust}} function.} 20 | 21 | \item{type}{The type of graph requested: 22 | \describe{ 23 | \item{\code{"cdf"} =}{a plot of the estimated CDF versus the empirical distribution function.} 24 | \item{\code{"qq"} =}{a Q-Q plot of sample quantiles versus the quantiles obtained from the inverse of the estimated cdf.} 25 | } 26 | } 27 | 28 | \item{col}{A pair of values for the color to be used for plotting, respectively, the estimated CDF and the empirical cdf.} 29 | 30 | \item{lwd}{A pair of values for the line width to be used for plotting, respectively, the estimated CDF and the empirical cdf.} 31 | 32 | \item{lty}{A pair of values for the line type to be used for plotting, respectively, the estimated CDF and the empirical cdf.} 33 | 34 | \item{legend}{A logical indicating if a legend must be added to the plot of fitted CDF vs the empirical CDF.} 35 | 36 | \item{grid}{A logical indicating if a \code{\link{grid}} should be added to the plot.} 37 | 38 | \item{\dots}{Additional arguments.} 39 | } 40 | 41 | \details{ 42 | The two diagnostic plots for density estimation in the one-dimensional case are discussed in Loader (1999, pp- 87-90). 43 | 44 | } 45 | 46 | % \value{} 47 | 48 | \references{ 49 | Loader C. (1999), Local Regression and Likelihood. New York, Springer. 50 | 51 | Scrucca L., Fraley C., Murphy T. B. and Raftery A. E. (2023) \emph{Model-Based Clustering, Classification, and Density Estimation Using mclust in R}. Chapman & Hall/CRC, ISBN: 978-1032234953, https://mclust-org.github.io/book/ 52 | } 53 | 54 | \author{Luca Scrucca} 55 | 56 | \seealso{ 57 | \code{\link{densityMclust}}, 58 | \code{\link{plot.densityMclust}}. 59 | } 60 | 61 | \examples{ 62 | \donttest{ 63 | x <- faithful$waiting 64 | dens <- densityMclust(x, plot = FALSE) 65 | plot(dens, x, what = "diagnostic") 66 | # or 67 | densityMclust.diagnostic(dens, type = "cdf") 68 | densityMclust.diagnostic(dens, type = "qq") 69 | } 70 | } 71 | \keyword{cluster} 72 | \keyword{dplot} 73 | -------------------------------------------------------------------------------- /man/diabetes.Rd: -------------------------------------------------------------------------------- 1 | \name{diabetes} 2 | \alias{diabetes} 3 | \docType{data} 4 | 5 | \title{Diabetes Data (flawed)} 6 | 7 | \description{The data set contains three measurements made on 145 non-obese adult patients classified into three groups.} 8 | 9 | \usage{data(diabetes)} 10 | 11 | \format{A data frame with the following variables: 12 | \describe{ 13 | \item{class}{The type of diabete: \code{Normal}, \code{Overt}, and \code{Chemical}.} 14 | \item{glucose}{Area under plasma glucose curve after a three hour oral glucose tolerance test (OGTT).} 15 | \item{insulin}{Area under plasma insulin curve after a three hour oral glucose tolerance test (OGTT).} 16 | \item{sspg}{Steady state plasma glucose.} 17 | } 18 | } 19 | 20 | \details{This dataset is \emph{flawed} (compare with the reference) and it is provided here only for backward compatibility. A 5-variable version of the Reaven and Miller data is available in package \pkg{rrcov}. The \emph{glucose} and \emph{sspg} columns in this dataset are identical to the \emph{fpg} and \emph{insulin} columns, respectively in the \pkg{rrcov} version. The \emph{insulin} column in this dataset differs from the \emph{glucose} column in the \pkg{rrcov} version in one entry: observation 104 has the value 45 in the \emph{insulin} column in this data, and 455 in the corresponding \emph{glucose} column of the \pkg{rrcov} version.} 21 | 22 | \source{Reaven, G. M. and Miller, R. G. (1979). An attempt to define the nature of chemical diabetes using a multidimensional analysis. \emph{Diabetologia} 16:17-24.} 23 | 24 | \keyword{datasets} 25 | -------------------------------------------------------------------------------- /man/dmvnorm.Rd: -------------------------------------------------------------------------------- 1 | \name{dmvnorm} 2 | \alias{dmvnorm} 3 | 4 | \title{Density of multivariate Gaussian distribution} 5 | 6 | \description{ 7 | Efficiently computes the density of observations for a generic multivariate Gaussian distribution. 8 | } 9 | 10 | \usage{ 11 | dmvnorm(data, mean, sigma, log = FALSE) 12 | } 13 | 14 | \arguments{ 15 | \item{data}{ 16 | A numeric vector, matrix, or data frame of observations. Categorical 17 | variables are not allowed. If a matrix or data frame, rows 18 | correspond to observations and columns correspond to variables. 19 | } 20 | \item{mean}{ 21 | A vector of means for each variable. 22 | } 23 | \item{sigma}{ 24 | A positive definite covariance matrix. 25 | } 26 | \item{log}{ 27 | A logical value indicating whether or not the logarithm of the densities 28 | should be returned. 29 | } 30 | } 31 | \value{ 32 | A numeric vector whose \emph{i}th element gives the density of the 33 | \emph{ith} observation in \code{data} for the multivariate Gaussian 34 | distribution with parameters \code{mean} and \code{sigma}. 35 | } 36 | 37 | \seealso{ 38 | \code{\link{dnorm}}, 39 | \code{\link{dens}} 40 | } 41 | 42 | \examples{ 43 | # univariate 44 | ngrid <- 101 45 | x <- seq(-5, 5, length = ngrid) 46 | dens <- dmvnorm(x, mean = 1, sigma = 5) 47 | plot(x, dens, type = "l") 48 | 49 | # bivariate 50 | ngrid <- 101 51 | x1 <- x2 <- seq(-5, 5, length = ngrid) 52 | mu <- c(1,0) 53 | sigma <- matrix(c(1,0.5,0.5,2), 2, 2) 54 | dens <- dmvnorm(as.matrix(expand.grid(x1, x2)), mu, sigma) 55 | dens <- matrix(dens, ngrid, ngrid) 56 | image(x1, x2, dens) 57 | contour(x1, x2, dens, add = TRUE) 58 | } 59 | 60 | -------------------------------------------------------------------------------- /man/dupPartition.Rd: -------------------------------------------------------------------------------- 1 | \name{dupPartition} 2 | \alias{dupPartition} 3 | 4 | \title{Partition the data by grouping together duplicated data} 5 | 6 | \description{ 7 | Duplicated data are grouped together to form a basic partition that can be used to start hierarchical agglomeration. 8 | } 9 | \usage{ 10 | dupPartition(data) 11 | } 12 | \arguments{ 13 | \item{data}{ 14 | A numeric vector, matrix, or data frame of observations. 15 | If a matrix or data frame, rows correspond to observations (\eqn{n}) and 16 | columns correspond to variables (\eqn{d}). 17 | } 18 | } 19 | \value{ 20 | A vector of indices indicating the partition. 21 | } 22 | \seealso{ 23 | \code{\link{hc}} 24 | } 25 | \examples{ 26 | \donttest{ 27 | dupPartition(iris[,1:4]) 28 | dupPartition(iris) 29 | dupPartition(iris$Species) 30 | } 31 | } 32 | \keyword{cluster} 33 | -------------------------------------------------------------------------------- /man/emControl.Rd: -------------------------------------------------------------------------------- 1 | \name{emControl} 2 | \alias{emControl} 3 | 4 | \title{Set control values for use with the EM algorithm} 5 | 6 | \description{ 7 | Supplies a list of values including tolerances for singularity and 8 | convergence assessment, for use functions involving EM within \emph{MCLUST}. 9 | } 10 | \usage{ 11 | emControl(eps, tol, itmax, equalPro) 12 | } 13 | \arguments{ 14 | \item{eps}{ 15 | A scalar tolerance associated with deciding when to terminate 16 | computations due to computational singularity in 17 | covariances. Smaller values of \code{eps} allow computations to 18 | proceed nearer to singularity. The default is the relative machine 19 | precision \code{.Machine$double.eps}, which is approximately 20 | \eqn{2e-16} on IEEE-compliant machines. 21 | } 22 | \item{tol}{ 23 | A vector of length two giving relative convergence tolerances for the 24 | log-likelihood and for parameter convergence in the inner loop for models 25 | with iterative M-step ("VEI", "VEE", "EVE", "VVE", "VEV"), respectively. 26 | The default is \code{c(1.e-5, sqrt(.Machine$double.eps))}. 27 | If only one number is supplied, it is used as the tolerance 28 | for the outer iterations and the tolerance for the inner 29 | iterations is as in the default. 30 | } 31 | \item{itmax}{ 32 | A vector of length two giving integer limits on the number of EM 33 | iterations and on the number of iterations in the inner loop for 34 | models with iterative M-step ("VEI", "VEE", "EVE", "VVE", "VEV"), 35 | respectively. The default is 36 | \code{c(.Machine$integer.max, .Machine$integer.max)} 37 | allowing termination to be completely governed by \code{tol}. 38 | If only one number is supplied, it is used as the iteration 39 | limit for the outer iteration only. 40 | } 41 | \item{equalPro}{ 42 | Logical variable indicating whether or not the mixing proportions are 43 | equal in the model. Default: \code{equalPro = FALSE}. 44 | } 45 | } 46 | \value{ 47 | A named list in which the names are the names of the arguments 48 | and the values are the values supplied to the arguments. 49 | } 50 | \details{ 51 | \code{emControl} is provided for assigning values and defaults 52 | for EM within \emph{MCLUST}. 53 | } 54 | 55 | \seealso{ 56 | \code{\link{em}}, 57 | \code{\link{estep}}, 58 | \code{\link{me}}, 59 | \code{\link{mstep}}, 60 | \code{\link{mclustBIC}} 61 | } 62 | \examples{ 63 | irisBIC <- mclustBIC(iris[,-5], control = emControl(tol = 1.e-6)) 64 | summary(irisBIC, iris[,-5]) 65 | } 66 | \keyword{cluster} 67 | -------------------------------------------------------------------------------- /man/entPlot.Rd: -------------------------------------------------------------------------------- 1 | \name{entPlot} 2 | \alias{entPlot} 3 | \title{ 4 | Plot Entropy Plots 5 | } 6 | \description{ 7 | Plot "entropy plots" to help select the number of classes from a hierarchy of combined clusterings. 8 | } 9 | \usage{ 10 | entPlot(z, combiM, abc = c("standard", "normalized"), reg = 2, \dots) 11 | } 12 | \arguments{ 13 | \item{z}{ 14 | A matrix whose \code{[i,k]}th entry is the probability that observation \emph{i} in the data belongs to the \emph{k}th class, for the initial solution (ie before any combining). Typically, the one returned by \code{Mclust}/BIC. 15 | } 16 | \item{combiM}{ 17 | A list of "combining matrices" (as provided by \code{clustCombi}), ie \code{combiM[[K]]} is the matrix whose \emph{k}th row contains only zeros, but in columns corresponding to the labels of the classes in the \emph{(K+1)}-classes solution to be merged to get the \emph{K}-classes combined solution. \code{combiM} must contain matrices from \code{K} = number of classes in \code{z} to one. 18 | } 19 | \item{abc}{ 20 | Choose one or more of: "standard", "normalized", to specify whether the number of observations involved in each combining step should be taken into account to scale the plots or not. 21 | } 22 | \item{reg}{ 23 | The number of parts of the piecewise linear regression for the entropy plots. Choose one or more of: 2 (for 1 change-point), 3 (for 2 change-points). 24 | } 25 | \item{\dots}{ 26 | Other graphical arguments to be passed to the plot functions. 27 | } 28 | } 29 | \details{ 30 | Please see the article cited in the references for more details. A clear elbow in the "entropy plot" should suggest the user to consider the corresponding number(s) of class(es). 31 | } 32 | \value{ 33 | if \code{abc = "standard"}, plots the entropy against the number of clusters and the difference between the entropy of successive combined solutions against the number of clusters. 34 | if \code{abc = "normalized"}, plots the entropy against the cumulated number of observations involved in the successive combining steps and the difference between the entropy of successive combined solutions divided by the number of observations involved in the corresponding combining step against the number of clusters. 35 | } 36 | \references{ 37 | J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} 38 | } 39 | \author{ 40 | J.-P. Baudry, A. E. Raftery, L. Scrucca 41 | } 42 | \seealso{ 43 | \code{\link{plot.clustCombi}}, \code{\link{combiPlot}}, \code{\link{clustCombi}} 44 | } 45 | \examples{ 46 | \donttest{ 47 | data(Baudry_etal_2010_JCGS_examples) 48 | # run Mclust to get the MclustOutput 49 | output <- clustCombi(data = ex4.2, modelNames = "VII") 50 | 51 | entPlot(output$MclustOutput$z, output$combiM, reg = c(2,3)) 52 | # legend: in red, the single-change-point piecewise linear regression; 53 | # in blue, the two-change-point piecewise linear regression. 54 | } 55 | } 56 | \keyword{ cluster } 57 | -------------------------------------------------------------------------------- /man/errorBars.Rd: -------------------------------------------------------------------------------- 1 | \name{errorBars} 2 | \alias{errorBars} 3 | 4 | \title{Draw error bars on a plot} 5 | 6 | \description{ 7 | Draw error bars at x from upper to lower. If \code{horizontal = FALSE} (default) 8 | bars are drawn vertically, otherwise horizontally. 9 | } 10 | 11 | \usage{ 12 | errorBars(x, upper, lower, width = 0.1, code = 3, angle = 90, horizontal = FALSE, \dots) 13 | } 14 | 15 | \arguments{ 16 | \item{x}{A vector of values where the bars must be drawn.} 17 | \item{upper}{A vector of upper values where the bars must end.} 18 | \item{lower}{A vector of lower values where the bars must start.} 19 | \item{width}{A value specifying the width of the end-point segment.} 20 | \item{code}{An integer code specifying the kind of arrows to be drawn. For details see \code{\link[graphics]{arrows}}.} 21 | \item{angle}{A value specifying the angle at the arrow edge. For details see \code{\link[graphics]{arrows}}.} 22 | \item{horizontal}{A logical specifying if bars should be drawn vertically (default) or horizontally.} 23 | \item{\dots}{Further arguments are passed to \code{\link[graphics]{arrows}}.} 24 | } 25 | 26 | %\value{} 27 | 28 | \examples{ 29 | par(mfrow=c(2,2)) 30 | # Create a simple example dataset 31 | x <- 1:5 32 | n <- c(10, 15, 12, 6, 3) 33 | se <- c(1, 1.2, 2, 1, .5) 34 | # upper and lower bars 35 | b <- barplot(n, ylim = c(0, max(n)*1.5)) 36 | errorBars(b, lower = n-se, upper = n+se, lwd = 2, col = "red3") 37 | # one side bars 38 | b <- barplot(n, ylim = c(0, max(n)*1.5)) 39 | errorBars(b, lower = n, upper = n+se, lwd = 2, col = "red3", code = 1) 40 | # 41 | plot(x, n, ylim = c(0, max(n)*1.5), pch = 0) 42 | errorBars(x, lower = n-se, upper = n+se, lwd = 2, col = "red3") 43 | # 44 | dotchart(n, labels = x, pch = 19, xlim = c(0, max(n)*1.5)) 45 | errorBars(x, lower = n-se, upper = n+se, col = "red3", horizontal = TRUE) 46 | } 47 | -------------------------------------------------------------------------------- /man/estep.Rd: -------------------------------------------------------------------------------- 1 | \name{estep} 2 | \alias{estep} 3 | \title{ 4 | E-step for parameterized Gaussian mixture models. 5 | } 6 | \description{ 7 | Implements the expectation step of EM algorithm for parameterized Gaussian 8 | mixture models. 9 | } 10 | \usage{ 11 | estep(data, modelName, parameters, warn = NULL, \dots) 12 | } 13 | \arguments{ 14 | \item{data}{ 15 | A numeric vector, matrix, or data frame of observations. 16 | Categorical variables are not allowed. 17 | If a matrix or data frame, rows correspond to observations and 18 | columns correspond to variables. 19 | } 20 | \item{modelName}{ 21 | A character string indicating the model. The help file for 22 | \code{\link{mclustModelNames}} describes the available models. 23 | } 24 | \item{parameters}{ 25 | A names list giving the parameters of the model. 26 | The components are as follows: 27 | \describe{ 28 | \item{\code{pro}}{ 29 | Mixing proportions for the components of the mixture. 30 | If the model includes a Poisson term for noise, there 31 | should be one more mixing proportion than the number 32 | of Gaussian components. 33 | } 34 | \item{\code{mean}}{ 35 | The mean for each component. If there is more than one component, 36 | this is a matrix whose kth column is the mean of the \emph{k}th 37 | component of the mixture model. 38 | } 39 | \item{\code{variance}}{ 40 | A list of variance parameters for the model. 41 | The components of this list depend on the model 42 | specification. See the help file for \code{\link{mclustVariance}} 43 | for details. 44 | } 45 | \item{\code{Vinv}}{ 46 | An estimate of the reciprocal hypervolume of the data region. 47 | If set to NULL or a negative value, the default is determined 48 | by applying function \code{hypvol} to the data. 49 | Used only when \code{pro} includes an additional 50 | mixing proportion for a noise component. 51 | } 52 | } 53 | } 54 | \item{warn}{ 55 | A logical value indicating whether or not a warning should be issued 56 | when computations fail. The default is \code{warn=FALSE}. 57 | } 58 | \item{\dots}{ 59 | Catches unused arguments in indirect or list calls via \code{do.call}. 60 | } 61 | } 62 | \value{ 63 | A list including the following components: 64 | \item{modelName}{ 65 | A character string identifying the model (same as the input argument). 66 | } 67 | \item{z}{ 68 | A matrix whose \code{[i,k]}th entry is the conditional probability 69 | of the \emph{i}th observation belonging to the \emph{k}th component 70 | of the mixture. 71 | } 72 | \item{parameters}{ 73 | The input parameters. 74 | } 75 | \item{loglik}{ 76 | The log-likelihood for the data in the mixture model. 77 | } 78 | \item{Attributes}{ 79 | \code{"WARNING"}: an appropriate warning if problems are 80 | encountered in the computations. 81 | } 82 | } 83 | \seealso{ 84 | \code{\link{estepE}}, \dots, 85 | \code{\link{estepVVV}}, 86 | \code{\link{em}}, 87 | \code{\link{mstep}}, 88 | \code{\link{mclust.options}} 89 | \code{\link{mclustVariance}} 90 | } 91 | \examples{ 92 | \donttest{ 93 | msEst <- mstep(modelName = "VVV", data = iris[,-5], z = unmap(iris[,5])) 94 | names(msEst) 95 | 96 | estep(modelName = msEst$modelName, data = iris[,-5], 97 | parameters = msEst$parameters)} 98 | } 99 | \keyword{cluster} 100 | -------------------------------------------------------------------------------- /man/estepE.Rd: -------------------------------------------------------------------------------- 1 | \name{estepE} 2 | \alias{estepE} 3 | \alias{estepV} 4 | \alias{estepEII} 5 | \alias{estepVII} 6 | \alias{estepEEI} 7 | \alias{estepVEI} 8 | \alias{estepEVI} 9 | \alias{estepVVI} 10 | \alias{estepEEE} 11 | \alias{estepEEV} 12 | \alias{estepVEV} 13 | \alias{estepVVV} 14 | \alias{estepEVE} 15 | \alias{estepEVV} 16 | \alias{estepVEE} 17 | \alias{estepVVE} 18 | 19 | \title{ 20 | E-step in the EM algorithm for a parameterized Gaussian mixture model. 21 | } 22 | \description{ 23 | Implements the expectation step in the EM algorithm for a 24 | parameterized Gaussian mixture model. 25 | } 26 | \usage{ 27 | estepE(data, parameters, warn = NULL, \dots) 28 | estepV(data, parameters, warn = NULL, \dots) 29 | estepEII(data, parameters, warn = NULL, \dots) 30 | estepVII(data, parameters, warn = NULL, \dots) 31 | estepEEI(data, parameters, warn = NULL, \dots) 32 | estepVEI(data, parameters, warn = NULL, \dots) 33 | estepEVI(data, parameters, warn = NULL, \dots) 34 | estepVVI(data, parameters, warn = NULL, \dots) 35 | estepEEE(data, parameters, warn = NULL, \dots) 36 | estepEEV(data, parameters, warn = NULL, \dots) 37 | estepVEV(data, parameters, warn = NULL, \dots) 38 | estepVVV(data, parameters, warn = NULL, \dots) 39 | estepEVE(data, parameters, warn = NULL, \dots) 40 | estepEVV(data, parameters, warn = NULL, \dots) 41 | estepVEE(data, parameters, warn = NULL, \dots) 42 | estepVVE(data, parameters, warn = NULL, \dots) 43 | } 44 | \arguments{ 45 | \item{data}{ 46 | A numeric vector, matrix, or data frame of observations. 47 | Categorical variables are not allowed. 48 | If a matrix or data frame, rows correspond to observations and 49 | columns correspond to variables. 50 | } 51 | \item{parameters}{ 52 | The parameters of the model: 53 | %\itemize{ 54 | %\item An argument describing the variance (depends on the model): 55 | \describe{ 56 | \item{\code{pro}}{ 57 | Mixing proportions for the components of the mixture. 58 | If the model includes a Poisson term for noise, there 59 | should be one more mixing proportion than the number 60 | of Gaussian components. 61 | } 62 | \item{mu}{ 63 | The mean for each component. If there is more than one component, 64 | this is a matrix whose columns are the means of the components. 65 | } 66 | \item{\code{variance}}{ 67 | A list of variance parameters for the model. 68 | The components of this list depend on the model 69 | specification. See the help file for \code{\link{mclustVariance}} 70 | for details. 71 | } 72 | \item{\code{Vinv}}{ 73 | An estimate of the reciprocal hypervolume of the data region. 74 | If not supplied or set to a negative value, the default is 75 | determined by applying function \code{hypvol} to the data. 76 | Used only when \code{pro} includes an additional 77 | mixing proportion for a noise component. 78 | } 79 | } 80 | %} 81 | } 82 | \item{warn}{ 83 | A logical value indicating whether or certain warnings should be issued. 84 | The default is given by \code{mclust.options("warn")}. 85 | } 86 | \item{\dots}{ 87 | Catches unused arguments in indirect or list calls via \code{do.call}. 88 | } 89 | } 90 | \value{ 91 | A list including the following components: 92 | \item{modelName}{ 93 | Character string identifying the model. 94 | } 95 | \item{z}{ 96 | A matrix whose \code{[i,k]}th entry is the 97 | conditional probability of the \emph{i}th observation belonging to 98 | the \emph{k}th component of the mixture. 99 | } 100 | \item{parameters}{ 101 | The input parameters. 102 | } 103 | \item{loglik}{ 104 | The logliklihood for the data in the mixture model. 105 | } 106 | \item{Attribute}{ 107 | \code{"WARNING"}: An appropriate warning if problems are 108 | encountered in the computations. 109 | } 110 | } 111 | \seealso{ 112 | \code{\link{estep}}, 113 | \code{\link{em}}, 114 | \code{\link{mstep}}, 115 | \code{\link{do.call}}, 116 | \code{\link{mclustVariance}}, 117 | \code{\link{mclust.options}}. 118 | } 119 | \examples{ 120 | \donttest{ 121 | msEst <- mstepEII(data = iris[,-5], z = unmap(iris[,5])) 122 | names(msEst) 123 | 124 | estepEII(data = iris[,-5], parameters = msEst$parameters)} 125 | } 126 | \keyword{cluster} 127 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/mclust/65e2a1c0538807f5e52ade030f12f5af4c1bc746/man/figures/logo.png -------------------------------------------------------------------------------- /man/hcE.Rd: -------------------------------------------------------------------------------- 1 | \name{hcE} 2 | \alias{hcE} 3 | \alias{hcV} 4 | \alias{hcEII} 5 | \alias{hcVII} 6 | \alias{hcEEE} 7 | \alias{hcVVV} 8 | 9 | \title{Model-based Hierarchical Clustering} 10 | 11 | \description{ 12 | Agglomerative hierarchical clustering based on maximum likelihood 13 | for a Gaussian mixture model parameterized by eigenvalue decomposition. 14 | } 15 | 16 | \usage{ 17 | hcE(data, partition = NULL, minclus=1, \dots) 18 | hcV(data, partition = NULL, minclus = 1, alpha = 1, \dots) 19 | hcEII(data, partition = NULL, minclus = 1, \dots) 20 | hcVII(data, partition = NULL, minclus = 1, alpha = 1, \dots) 21 | hcEEE(data, partition = NULL, minclus = 1, \dots) 22 | hcVVV(data, partition = NULL, minclus = 1, alpha = 1, beta = 1, \dots) 23 | } 24 | \arguments{ 25 | \item{data}{ 26 | A numeric vector, matrix, or data frame of observations. Categorical 27 | variables are not allowed. If a matrix or data frame, rows 28 | correspond to observations and columns correspond to variables. 29 | } 30 | \item{partition}{ 31 | A numeric or character vector representing a partition of 32 | observations (rows) of \code{data}. If provided, group merges will 33 | start with this partition. Otherwise, each observation is assumed to 34 | be in a cluster by itself at the start of agglomeration. 35 | } 36 | \item{minclus}{ 37 | A number indicating the number of clusters at which to stop the 38 | agglomeration. The default is to stop when all observations have been 39 | merged into a single cluster. 40 | } 41 | \item{alpha, beta}{ 42 | Additional tuning parameters needed for initializatiion in some models. 43 | For details, see Fraley 1998. The defaults provided are usually adequate. 44 | } 45 | \item{\dots}{ 46 | Catch unused arguments from a \code{do.call} call. 47 | } 48 | } 49 | \value{ 50 | A numeric two-column matrix in which the \emph{i}th row gives the minimum 51 | index for observations in each of the two clusters merged at the 52 | \emph{i}th stage of agglomerative hierarchical clustering. 53 | } 54 | \details{ 55 | Most models have memory usage of the order of the square of the 56 | number groups in the initial partition for fast execution. 57 | Some models, such as equal variance or \code{"EEE"}, 58 | do not admit a fast algorithm under the usual agglomerative 59 | hierachical clustering paradigm. 60 | These use less memory but are much slower to execute. 61 | } 62 | \references{ 63 | J. D. Banfield and A. E. Raftery (1993). 64 | Model-based Gaussian and non-Gaussian Clustering. 65 | \emph{Biometrics 49:803-821}. 66 | 67 | C. Fraley (1998). 68 | Algorithms for model-based Gaussian hierarchical clustering. 69 | \emph{SIAM Journal on Scientific Computing 20:270-281}. 70 | 71 | C. Fraley and A. E. Raftery (2002). 72 | Model-based clustering, discriminant analysis, and density estimation. 73 | \emph{Journal of the American Statistical Association 97:611-631}. 74 | } 75 | \seealso{ 76 | \code{\link{hc}}, 77 | \code{\link{hclass}} 78 | \code{\link{hcRandomPairs}} 79 | } 80 | \examples{ 81 | hcTree <- hcEII(data = iris[,-5]) 82 | cl <- hclass(hcTree,c(2,3)) 83 | 84 | \donttest{ 85 | par(pty = "s", mfrow = c(1,1)) 86 | clPairs(iris[,-5],cl=cl[,"2"]) 87 | clPairs(iris[,-5],cl=cl[,"3"]) 88 | 89 | par(mfrow = c(1,2)) 90 | dimens <- c(1,2) 91 | coordProj(iris[,-5], classification=cl[,"2"], dimens=dimens) 92 | coordProj(iris[,-5], classification=cl[,"3"], dimens=dimens) 93 | } 94 | } 95 | \keyword{cluster} 96 | -------------------------------------------------------------------------------- /man/hcRandomPairs.Rd: -------------------------------------------------------------------------------- 1 | \name{hcRandomPairs} 2 | \alias{hcRandomPairs} 3 | \alias{randomPairs} 4 | 5 | \title{Random hierarchical structure} 6 | 7 | \description{Create a hierarchical structure using a random hierarchical partition of the data.} 8 | 9 | \usage{ 10 | hcRandomPairs(data, seed = NULL, \dots) 11 | } 12 | \arguments{ 13 | \item{data}{ 14 | A numeric matrix or data frame of observations. 15 | If a matrix or data frame, rows correspond to observations and 16 | columns correspond to variables. 17 | } 18 | \item{seed}{ 19 | Optional single value, interpreted as an integer, specifying the seed for random partition. 20 | } 21 | \item{\dots}{ 22 | Catches unused arguments in indirect or list calls via \code{do.call}. 23 | } 24 | } 25 | 26 | \value{ 27 | A numeric two-column matrix in which the \emph{i}th row gives the minimum 28 | index for observations in each of the two clusters merged at the 29 | \emph{i}th stage of a random agglomerative hierarchical clustering. 30 | } 31 | 32 | \seealso{ 33 | \code{\link{hc}}, 34 | \code{\link{hclass}} 35 | \code{\link{hcVVV}} 36 | } 37 | 38 | \examples{ 39 | data <- iris[,1:4] 40 | randPairs <- hcRandomPairs(data) 41 | str(randPairs) 42 | # start model-based clustering from a random partition 43 | mod <- Mclust(data, initialization = list(hcPairs = randPairs)) 44 | summary(mod) 45 | } 46 | 47 | \keyword{cluster} 48 | -------------------------------------------------------------------------------- /man/hclass.Rd: -------------------------------------------------------------------------------- 1 | \name{hclass} 2 | \alias{hclass} 3 | \title{ 4 | Classifications from Hierarchical Agglomeration 5 | } 6 | \description{ 7 | Determines the classifications corresponding to different numbers of groups 8 | given merge pairs from hierarchical agglomeration. 9 | } 10 | \usage{ 11 | hclass(hcPairs, G) 12 | } 13 | \arguments{ 14 | \item{hcPairs}{ 15 | A numeric two-column matrix in which the \emph{i}th row gives the minimum 16 | index for observations in each of the two clusters merged at the 17 | \emph{i}th stage of agglomerative hierarchical clustering. 18 | } 19 | \item{G}{ 20 | An integer or vector of integers giving the number of clusters for which 21 | the corresponding classfications are wanted. 22 | } 23 | } 24 | \value{ 25 | A matrix with \code{length(G)} columns, each column 26 | corresponding to a classification. Columns are indexed by the character 27 | representation of the integers in \code{G}. 28 | } 29 | \seealso{ 30 | \code{\link{hc}}, 31 | \code{\link{hcE}} 32 | } 33 | \examples{ 34 | hcTree <- hc(modelName="VVV", data = iris[,-5]) 35 | cl <- hclass(hcTree,c(2,3)) 36 | 37 | \donttest{ 38 | par(pty = "s", mfrow = c(1,1)) 39 | clPairs(iris[,-5],cl=cl[,"2"]) 40 | clPairs(iris[,-5],cl=cl[,"3"]) 41 | } 42 | } 43 | \keyword{cluster} 44 | -------------------------------------------------------------------------------- /man/hdrlevels.Rd: -------------------------------------------------------------------------------- 1 | \name{hdrlevels} 2 | \alias{hdrlevels} 3 | 4 | \title{Highest Density Region (HDR) Levels} 5 | 6 | \description{ 7 | Compute the levels of Highest Density Regions (HDRs) for any density and probability levels. 8 | } 9 | 10 | \usage{ 11 | hdrlevels(density, prob) 12 | } 13 | 14 | \arguments{ 15 | \item{density}{A vector of density values computed on a set of (observed) evaluation points.} 16 | \item{prob}{A vector of probability levels in the range \eqn{[0,1]}.} 17 | } 18 | 19 | \value{ 20 | The function returns a vector of density values corresponding to HDRs at given probability levels. 21 | } 22 | 23 | \details{ 24 | From Hyndman (1996), let \eqn{f(x)} be the density function of a random 25 | variable \eqn{X}. Then the \eqn{100(1-\alpha)\%} HDR is the subset 26 | \eqn{R(f_\alpha)} of the sample space of \eqn{X} such that 27 | \deqn{ 28 | R(f_\alpha) = {x : f(x) \ge f_\alpha } 29 | } 30 | where \eqn{f_\alpha} is the largest constant such that 31 | \eqn{ 32 | Pr( X \in R(f_\alpha)) \ge 1-\alpha 33 | } 34 | } 35 | 36 | \seealso{ 37 | \code{\link{plot.densityMclust}} 38 | } 39 | 40 | \references{ 41 | Rob J. Hyndman (1996) Computing and Graphing Highest Density Regions. \emph{The American Statistician}, 50(2):120-126. 42 | } 43 | 44 | \author{L. Scrucca} 45 | 46 | \examples{ 47 | # Example: univariate Gaussian 48 | x <- rnorm(1000) 49 | f <- dnorm(x) 50 | a <- c(0.5, 0.25, 0.1) 51 | (f_a <- hdrlevels(f, prob = 1-a)) 52 | 53 | plot(x, f) 54 | abline(h = f_a, lty = 2) 55 | text(max(x), f_a, labels = paste0("f_", a), pos = 3) 56 | 57 | mean(f > f_a[1]) 58 | range(x[which(f > f_a[1])]) 59 | qnorm(1-a[1]/2) 60 | 61 | mean(f > f_a[2]) 62 | range(x[which(f > f_a[2])]) 63 | qnorm(1-a[2]/2) 64 | 65 | mean(f > f_a[3]) 66 | range(x[which(f > f_a[3])]) 67 | qnorm(1-a[3]/2) 68 | 69 | # Example 2: univariate Gaussian mixture 70 | set.seed(1) 71 | cl <- sample(1:2, size = 1000, prob = c(0.7, 0.3), replace = TRUE) 72 | x <- ifelse(cl == 1, 73 | rnorm(1000, mean = 0, sd = 1), 74 | rnorm(1000, mean = 4, sd = 1)) 75 | f <- 0.7*dnorm(x, mean = 0, sd = 1) + 0.3*dnorm(x, mean = 4, sd = 1) 76 | 77 | a <- 0.25 78 | (f_a <- hdrlevels(f, prob = 1-a)) 79 | 80 | plot(x, f) 81 | abline(h = f_a, lty = 2) 82 | text(max(x), f_a, labels = paste0("f_", a), pos = 3) 83 | 84 | mean(f > f_a) 85 | 86 | # find the regions of HDR 87 | ord <- order(x) 88 | f <- f[ord] 89 | x <- x[ord] 90 | x_a <- x[f > f_a] 91 | j <- which.max(diff(x_a)) 92 | region1 <- x_a[c(1,j)] 93 | region2 <- x_a[c(j+1,length(x_a))] 94 | plot(x, f, type = "l") 95 | abline(h = f_a, lty = 2) 96 | abline(v = region1, lty = 3, col = 2) 97 | abline(v = region2, lty = 3, col = 3) 98 | } 99 | \keyword{density} 100 | -------------------------------------------------------------------------------- /man/hypvol.Rd: -------------------------------------------------------------------------------- 1 | \name{hypvol} 2 | \alias{hypvol} 3 | \title{ 4 | Aproximate Hypervolume for Multivariate Data 5 | } 6 | \description{ 7 | Computes a simple approximation to the hypervolume of a multivariate 8 | data set. 9 | } 10 | \usage{ 11 | hypvol(data, reciprocal=FALSE) 12 | } 13 | \arguments{ 14 | \item{data}{ 15 | A numeric vector, matrix, or data frame of observations. Categorical 16 | variables are not allowed. If a matrix or data frame, rows 17 | correspond to observations and columns correspond to variables. 18 | } 19 | \item{reciprocal}{ 20 | A logical variable indicating whether or not the reciprocal 21 | hypervolume is desired rather than the hypervolume itself. The 22 | default is to return the hypervolume. 23 | } 24 | } 25 | \value{ 26 | Returns the minimum of the hypervolume computed from simple variable bounds 27 | and that computed from variable bounds of the principal component scores. 28 | Used for the default hypervolume parameter for the noise 29 | component when observations are designated as noise in \code{Mclust} 30 | and \code{mclustBIC}. 31 | } 32 | \references{ 33 | A. Dasgupta and A. E. Raftery (1998). 34 | Detecting features in spatial point processes with clutter via model-based 35 | clustering. 36 | \emph{Journal of the American Statistical Association 93:294-302}. 37 | 38 | C. Fraley and A.E. Raftery (1998). 39 | \emph{Computer Journal 41:578-588}. 40 | 41 | C. Fraley and A. E. Raftery (2002). 42 | Model-based clustering, discriminant analysis, and density estimation. 43 | \emph{Journal of the American Statistical Association 97:611-631}. 44 | } 45 | \seealso{ 46 | \code{\link{mclustBIC}} 47 | } 48 | \examples{ 49 | hypvol(iris[,-5]) 50 | } 51 | \keyword{cluster} 52 | -------------------------------------------------------------------------------- /man/icl.Rd: -------------------------------------------------------------------------------- 1 | \name{icl} 2 | \alias{icl} 3 | \title{ 4 | ICL for an estimated Gaussian Mixture Model 5 | } 6 | \description{ 7 | Computes the ICL (Integrated Complete-data Likelihood) for criterion for a Gaussian Mixture Model fitted by \code{\link{Mclust}}. 8 | } 9 | 10 | \usage{ 11 | icl(object, \dots) 12 | } 13 | 14 | \arguments{ 15 | \item{object}{ 16 | An object of class \code{'Mclust'} resulting from a call to \code{\link{Mclust}}. 17 | } 18 | \item{\dots}{Further arguments passed to or from other methods.} 19 | } 20 | 21 | \value{ 22 | The ICL for the given input MCLUST model. 23 | } 24 | 25 | \references{ 26 | Biernacki, C., Celeux, G., Govaert, G. (2000). 27 | Assessing a mixture model for clustering with the integrated completed likelihood. 28 | \emph{IEEE Trans. Pattern Analysis and Machine Intelligence}, 22 (7), 719-725. 29 | } 30 | \seealso{ 31 | \code{\link{Mclust}}, 32 | \code{\link{mclustBIC}}, 33 | \code{\link{mclustICL}}, 34 | \code{\link{bic}}. 35 | } 36 | 37 | \examples{ 38 | mod <- Mclust(iris[,1:4]) 39 | icl(mod) 40 | } 41 | 42 | \keyword{cluster} 43 | -------------------------------------------------------------------------------- /man/imputeData.Rd: -------------------------------------------------------------------------------- 1 | \name{imputeData} 2 | \alias{imputeData} 3 | \alias{matchCluster} 4 | 5 | \title{Missing data imputation via the \pkg{mix} package} 6 | 7 | \description{ 8 | Imputes missing data using the \pkg{mix} package. 9 | } 10 | 11 | \usage{ 12 | imputeData(data, categorical = NULL, seed = NULL, verbose = interactive()) 13 | } 14 | 15 | \arguments{ 16 | \item{data}{ 17 | A numeric vector, matrix, or data frame of observations containing 18 | missing values. Categorical variables are allowed. If a matrix 19 | or data frame, rows correspond to observations and columns 20 | correspond to variables. 21 | } 22 | \item{categorical}{ 23 | A logical vectors whose \emph{i}th entry is \code{TRUE} if the 24 | \emph{i}th variable or column of \code{data} is to be interpreted as 25 | categorical and \code{FALSE} otherwise. The default is to assume that a 26 | variable is to be interpreted as categorical only if it is a factor. 27 | } 28 | \item{seed}{ 29 | A seed for the function \code{rngseed} that is used to initialize 30 | the random number generator in \pkg{mix}. By default, a seed is 31 | chosen uniformly in the interval \code{(.Machine$integer.max/1024, 32 | .Machine$integer.max)}. 33 | } 34 | \item{verbose}{ 35 | A logical, if \code{TRUE} reports info about iterations of the algorithm. 36 | } 37 | } 38 | \value{ 39 | A dataset of the same dimensions as \code{data} with missing values 40 | filled in. 41 | } 42 | \references{ 43 | Schafer J. L. (1997). Analysis of Imcomplete Multivariate Data, Chapman and Hall. 44 | } 45 | \seealso{ 46 | \code{\link{imputePairs}} 47 | } 48 | \examples{ 49 | \donttest{ 50 | # Note that package 'mix' must be installed 51 | data(stlouis, package = "mix") 52 | 53 | # impute the continuos variables in the stlouis data 54 | stlimp <- imputeData(stlouis[,-(1:3)]) 55 | 56 | # plot imputed values 57 | imputePairs(stlouis[,-(1:3)], stlimp) 58 | } 59 | } 60 | \keyword{cluster} 61 | -------------------------------------------------------------------------------- /man/imputePairs.Rd: -------------------------------------------------------------------------------- 1 | \name{imputePairs} 2 | \alias{imputePairs} 3 | \title{ 4 | Pairwise Scatter Plots showing Missing Data Imputations 5 | } 6 | \description{ 7 | Creates a scatter plot for each pair of variables in given data, 8 | allowing display of imputations for missing values in different 9 | colors and symbols than non missing values. 10 | } 11 | \usage{ 12 | imputePairs(data, dataImp, 13 | symbols = c(1,16), colors = c("black", "red"), labels, 14 | panel = points, \dots, lower.panel = panel, upper.panel = panel, 15 | diag.panel = NULL, text.panel = textPanel, label.pos = 0.5 + 16 | has.diag/3, cex.labels = NULL, font.labels = 1, row1attop = TRUE, 17 | gap = 0.2) 18 | } 19 | \arguments{ 20 | \item{data}{ 21 | A numeric vector, matrix, or data frame of observations containing 22 | missing values. Categorical variables are not allowed. If a matrix 23 | or data frame, rows correspond to observations and columns 24 | correspond to variables. 25 | } 26 | \item{dataImp}{ 27 | The dataset \code{data} with missing values imputed. 28 | } 29 | \item{symbols}{ 30 | Either an integer or character vector assigning plotting symbols to 31 | the nonmissing data and impued values, respectively. The default is a 32 | closed circle for the nonmissing data 33 | and an open circle for the imputed values. 34 | } 35 | \item{colors}{ 36 | Either an integer or character vector assigning colors to 37 | the nonmissing data and impued values, respectively. The default is 38 | black for the nonmissing data and red for the imputed values. 39 | } 40 | \item{labels}{ 41 | As in function \code{pairs}. 42 | } 43 | \item{panel}{ 44 | As in function \code{pairs}. 45 | } 46 | \item{\dots}{ 47 | As in function \code{pairs}. 48 | } 49 | \item{lower.panel}{ 50 | As in function \code{pairs}. 51 | } 52 | \item{upper.panel}{ 53 | As in function \code{pairs}. 54 | } 55 | \item{diag.panel}{ 56 | As in function \code{pairs}. 57 | } 58 | \item{text.panel}{ 59 | As in function \code{pairs}. 60 | } 61 | \item{label.pos}{ 62 | As in function \code{pairs}. 63 | } 64 | \item{cex.labels}{ 65 | As in function \code{pairs}. 66 | } 67 | \item{font.labels}{ 68 | As in function \code{pairs}. 69 | } 70 | \item{row1attop}{ 71 | As in function \code{pairs}. 72 | } 73 | \item{gap}{ 74 | As in function \code{pairs}. 75 | } 76 | } 77 | \value{ 78 | A pairs plot displaying the location of missing and nonmissing values. 79 | } 80 | 81 | \references{ 82 | Schafer J. L. (1997). Analysis of Imcomplete Multivariate Data, Chapman and Hall. 83 | } 84 | \seealso{ 85 | \code{\link{pairs}}, 86 | \code{\link{imputeData}} 87 | } 88 | \examples{ 89 | \donttest{ 90 | # Note that package 'mix' must be installed 91 | data(stlouis, package = "mix") 92 | 93 | # impute the continuos variables in the stlouis data 94 | stlimp <- imputeData(stlouis[,-(1:3)]) 95 | 96 | # plot imputed values 97 | imputePairs(stlouis[,-(1:3)], stlimp) 98 | } 99 | } 100 | \keyword{cluster} 101 | -------------------------------------------------------------------------------- /man/logLik.Mclust.Rd: -------------------------------------------------------------------------------- 1 | \name{logLik.Mclust} 2 | \alias{logLik.Mclust} 3 | 4 | \title{Log-Likelihood of a \code{Mclust} object} 5 | 6 | \description{ 7 | Returns the log-likelihood for a \code{'Mclust'} object.} 8 | 9 | \usage{ 10 | \method{logLik}{Mclust}(object, \dots) 11 | } 12 | 13 | \arguments{ 14 | 15 | \item{object}{an object of class \code{'Mclust'} resulting from a call to \code{\link{Mclust}}.} 16 | 17 | \item{\dots}{further arguments passed to or from other methods.} 18 | } 19 | 20 | % \details{} 21 | 22 | \value{Returns an object of class \code{'logLik'} with an element providing the maximized log-likelihood, and further arguments giving the number of (estimated) parameters in the model (\code{"df"}) and the sample size (\code{"nobs"}).} 23 | 24 | \author{Luca Scrucca} 25 | 26 | % \note{} 27 | 28 | \seealso{\code{\link{Mclust}}.} 29 | 30 | \examples{ 31 | \donttest{ 32 | irisMclust <- Mclust(iris[,1:4]) 33 | summary(irisMclust) 34 | logLik(irisMclust) 35 | } 36 | } 37 | 38 | \keyword{multivariate} 39 | -------------------------------------------------------------------------------- /man/logLik.MclustDA.Rd: -------------------------------------------------------------------------------- 1 | \name{logLik.MclustDA} 2 | \alias{logLik.MclustDA} 3 | 4 | \title{Log-Likelihood of a \code{MclustDA} object} 5 | 6 | \description{ 7 | Returns the log-likelihood for a \code{MclustDA} object.} 8 | 9 | \usage{ 10 | \method{logLik}{MclustDA}(object, data, \dots) 11 | } 12 | 13 | \arguments{ 14 | 15 | \item{object}{an object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}.} 16 | 17 | \item{data}{the data for which the log-likelihood must be computed. If missing, the observed data from the \code{'MclustDA'} object is used.} 18 | 19 | \item{\dots}{further arguments passed to or from other methods.} 20 | } 21 | 22 | % \details{} 23 | 24 | \value{Returns an object of class \code{'logLik'} with an element providing the maximized log-likelihood, and further arguments giving the number of (estimated) parameters in the model (\code{"df"}) and the sample size (\code{"nobs"}).} 25 | 26 | \author{Luca Scrucca} 27 | 28 | % \note{} 29 | 30 | \seealso{\code{\link{MclustDA}}.} 31 | 32 | \examples{ 33 | \donttest{ 34 | irisMclustDA <- MclustDA(iris[,1:4], iris$Species) 35 | summary(irisMclustDA) 36 | logLik(irisMclustDA) 37 | } 38 | } 39 | 40 | \keyword{multivariate} 41 | -------------------------------------------------------------------------------- /man/logsumexp.Rd: -------------------------------------------------------------------------------- 1 | \name{logsumexp} 2 | \alias{logsumexp} 3 | 4 | \title{Log sum of exponentials} 5 | 6 | \description{ 7 | Efficient implementation (via Fortran) of the log-sum-exp function. 8 | } 9 | 10 | \usage{ 11 | logsumexp(x, v = NULL) 12 | } 13 | 14 | \arguments{ 15 | \item{x}{a matrix of dimension \eqn{n \times k} of numerical values. If a vector is provided, it is converted to a single-row matrix.} 16 | \item{v}{an optional vector of length \eqn{k} of numerical values to be added to each row of \code{x} matrix. If not provided, a vector of zeros is used.} 17 | } 18 | 19 | \details{ 20 | Given the matrix \code{x}, for each row \eqn{x_{[i]} = [x_1, \dots, x_k]} (with \eqn{i=1,\dots,n}), the log-sum-exp (LSE) function calculates 21 | \deqn{ 22 | \text{LSE}(x_{[i]}) = \log \sum_{j=1}^k \exp(x_j + v_j) = m + \log \sum_{j=1}^k \exp(x_j + v_j - m) 23 | } 24 | where \eqn{m = \max(x_1+v_1, \dots, x_k+v_k)}. 25 | } 26 | 27 | \value{Returns a vector of values of length equal to the number of rows of \code{x}.} 28 | 29 | \author{Luca Scrucca} 30 | 31 | \seealso{\code{\link{softmax}}} 32 | 33 | \references{ 34 | Blanchard P., Higham D. J., Higham N. J. (2021). 35 | Accurately computing the log-sum-exp and softmax functions. 36 | \emph{IMA Journal of Numerical Analysis}, 41/4:2311–2330. 37 | \doi{10.1093/imanum/draa038} 38 | } 39 | 40 | \examples{ 41 | x = matrix(rnorm(15), 5, 3) 42 | v = log(c(0.5, 0.3, 0.2)) 43 | logsumexp(x, v) 44 | } 45 | -------------------------------------------------------------------------------- /man/majorityVote.Rd: -------------------------------------------------------------------------------- 1 | \name{majorityVote} 2 | \alias{majorityVote} 3 | 4 | \title{Majority vote} 5 | 6 | \description{ 7 | A function to compute the majority vote (some would say plurality) label in a vector of labels, breaking ties at random.} 8 | 9 | \usage{ 10 | majorityVote(x) 11 | } 12 | 13 | \arguments{ 14 | \item{x}{A vector of values, either numerical or not.} 15 | } 16 | 17 | \value{A list with the following components: 18 | \item{table}{A table of votes for each unique value of \code{x}.} 19 | \item{ind}{An integer specifying which unique value of \code{x} corresponds to the majority vote.} 20 | \item{majority}{A string specifying the majority vote label.} 21 | } 22 | 23 | %\seealso{} 24 | \author{L. Scrucca} 25 | 26 | \examples{ 27 | x <- c("A", "C", "A", "B", "C", "B", "A") 28 | majorityVote(x) 29 | } 30 | -------------------------------------------------------------------------------- /man/map.Rd: -------------------------------------------------------------------------------- 1 | \name{map} 2 | \alias{map} 3 | 4 | \title{Classification given Probabilities} 5 | 6 | \description{ 7 | Converts a matrix in which each row sums to 1 to an integer vector 8 | specifying for each row the column index of the maximum. 9 | } 10 | \usage{ 11 | map(z, warn = mclust.options("warn"), \dots) 12 | } 13 | \arguments{ 14 | \item{z}{ 15 | A matrix (for example a matrix of conditional 16 | probabilities in which each row sums to 1 17 | as produced by the E-step of the EM algorithm). 18 | } 19 | \item{warn}{ 20 | A logical variable indicating whether or not a warning should be 21 | issued when there are some columns of \code{z} for which no row 22 | attains a maximum. 23 | } 24 | \item{\dots }{ 25 | Provided to allow lists with elements other than the arguments can 26 | be passed in indirect or list calls with \code{do.call}. 27 | } 28 | } 29 | \value{ 30 | A integer vector with one entry for each row of z, 31 | in which the \emph{i}-th value is the column index at which the 32 | \emph{i}-th row of \code{z} attains a maximum. 33 | 34 | } 35 | \seealso{ 36 | \code{\link{unmap}}, 37 | \code{\link{estep}}, 38 | \code{\link{em}}, 39 | \code{\link{me}}. 40 | } 41 | \examples{ 42 | emEst <- me(modelName = "VVV", data = iris[,-5], z = unmap(iris[,5])) 43 | 44 | map(emEst$z) 45 | } 46 | \keyword{cluster} 47 | % docclass is function 48 | -------------------------------------------------------------------------------- /man/mapClass.Rd: -------------------------------------------------------------------------------- 1 | \name{mapClass} 2 | \alias{mapClass} 3 | 4 | \title{Correspondence between classifications} 5 | 6 | \description{ 7 | Best correspondence between classes given two vectors viewed 8 | as alternative classifications of the same object. 9 | } 10 | \usage{ 11 | mapClass(a, b) 12 | } 13 | \arguments{ 14 | \item{a}{ 15 | A numeric or character vector of class labels. 16 | } 17 | \item{b}{ 18 | A numeric or character vector of class labels. 19 | Must have the same length as 20 | \code{a}. 21 | } 22 | 23 | } 24 | \value{ 25 | A list with two named elements, 26 | \code{aTOb} and 27 | \code{bTOa} which are themselves lists. 28 | The \code{aTOb} list has a component corresponding 29 | to each unique element of \code{a}, which gives 30 | the element or elements of \code{b} 31 | that result in the closest class correspondence. 32 | 33 | The \code{bTOa} list has a component corresponding 34 | to each unique element of \code{b}, which gives 35 | the element or elements of \code{a} 36 | that result in the closest class correspondence. 37 | } 38 | \seealso{ 39 | \code{\link{classError}}, 40 | \code{\link{table}} 41 | } 42 | \examples{ 43 | a <- rep(1:3, 3) 44 | a 45 | b <- rep(c("A", "B", "C"), 3) 46 | b 47 | mapClass(a, b) 48 | a <- sample(1:3, 9, replace = TRUE) 49 | a 50 | b <- sample(c("A", "B", "C"), 9, replace = TRUE) 51 | b 52 | mapClass(a, b) 53 | } 54 | \keyword{cluster} 55 | -------------------------------------------------------------------------------- /man/mclust-deprecated.Rd: -------------------------------------------------------------------------------- 1 | \name{mclust-deprecated} 2 | \alias{cv.MclustDA} 3 | \alias{cv1EMtrain} 4 | \alias{bicEMtrain} 5 | 6 | \title{Deprecated Functions in mclust package} 7 | 8 | \description{ 9 | These functions are provided for compatibility with older versions of the \pkg{mclust} 10 | package only, and may be removed eventually. 11 | } 12 | 13 | \usage{ 14 | cv.MclustDA(\dots) 15 | cv1EMtrain(data, labels, modelNames=NULL) 16 | bicEMtrain(data, labels, modelNames=NULL) 17 | } 18 | 19 | \arguments{ 20 | \item{\dots}{pass arguments down.} 21 | \item{data}{A numeric vector or matrix of observations.} 22 | \item{labels}{Labels for each element or row in the dataset.} 23 | \item{modelNames}{Vector of model names that should be tested. 24 | The default is to select all available model names.} 25 | 26 | } 27 | 28 | \seealso{\code{\link{deprecated}}} 29 | -------------------------------------------------------------------------------- /man/mclust-internal.Rd: -------------------------------------------------------------------------------- 1 | \name{mclust-internal} 2 | 3 | \title{Internal MCLUST functions} 4 | 5 | \alias{pickBIC} 6 | \alias{bicFill} 7 | \alias{grid1} 8 | \alias{grid2} 9 | \alias{mvn2plot} 10 | \alias{vecnorm} 11 | \alias{traceW} 12 | \alias{qclass} 13 | \alias{unchol} 14 | \alias{shapeO} 15 | \alias{orth2} 16 | \alias{charconv} 17 | \alias{[.mclustBIC} 18 | \alias{checkModelName} 19 | \alias{balancedFolds} 20 | \alias{permuteRows} 21 | \alias{projpar.MclustDR} 22 | \alias{projdir.MclustDR} 23 | %\alias{mvdnorm} 24 | \alias{ellipse} 25 | \alias{eigen.decomp} 26 | \alias{getParameters.MclustDA} 27 | \alias{as.Mclust} 28 | \alias{as.Mclust.default} 29 | \alias{as.Mclust.densityMclust} 30 | \alias{as.densityMclust} 31 | \alias{as.densityMclust.default} 32 | \alias{as.densityMclust.Mclust} 33 | 34 | \description{ 35 | Internal functions not intended to be called directly by users. 36 | } 37 | 38 | \keyword{internal} 39 | -------------------------------------------------------------------------------- /man/mclust-package.Rd: -------------------------------------------------------------------------------- 1 | \name{mclust-package} 2 | \alias{mclust-package} 3 | \alias{mclust} 4 | \docType{package} 5 | 6 | \title{Gaussian Mixture Modelling for Model-Based Clustering, Classification, and Density Estimation} 7 | 8 | \description{ 9 | Gaussian finite mixture models estimated via EM algorithm for model-based clustering, classification, and density estimation, including Bayesian regularization and dimension reduction. 10 | } 11 | 12 | \details{ 13 | For a quick introduction to \pkg{mclust} see the vignette \href{../doc/mclust.html}{A quick tour of mclust}. 14 | 15 | See also: 16 | \itemize{ 17 | \item \code{\link{Mclust}} for clustering; 18 | \item \code{\link{MclustDA}} for supervised classification; 19 | \item \code{\link{MclustSSC}} for semi-supervised classification; 20 | \item \code{\link{densityMclust}} for density estimation. 21 | } 22 | } 23 | 24 | \author{ 25 | Chris Fraley, Adrian Raftery and Luca Scrucca. 26 | 27 | Maintainer: Luca Scrucca \email{luca.scrucca@unipg.it} 28 | } 29 | 30 | \references{ 31 | Scrucca L., Fraley C., Murphy T. B. and Raftery A. E. (2023) \emph{Model-Based Clustering, Classification, and Density Estimation Using mclust in R}. Chapman & Hall/CRC, ISBN: 978-1032234953, https://mclust-org.github.io/book/ 32 | 33 | Scrucca L., Fop M., Murphy T. B. and Raftery A. E. (2016) mclust 5: clustering, classification and density estimation using Gaussian finite mixture models, \emph{The R Journal}, 8/1, pp. 289-317. 34 | 35 | Fraley C. and Raftery A. E. (2002) Model-based clustering, discriminant analysis and density estimation, \emph{Journal of the American Statistical Association}, 97/458, pp. 611-631. 36 | } 37 | 38 | \examples{ 39 | \donttest{ 40 | # Clustering 41 | mod1 <- Mclust(iris[,1:4]) 42 | summary(mod1) 43 | plot(mod1, what = c("BIC", "classification")) 44 | 45 | # Classification 46 | data(banknote) 47 | mod2 <- MclustDA(banknote[,2:7], banknote$Status) 48 | summary(mod2) 49 | plot(mod2) 50 | 51 | # Density estimation 52 | mod3 <- densityMclust(faithful$waiting) 53 | summary(mod3) 54 | } 55 | } 56 | 57 | \keyword{package} 58 | -------------------------------------------------------------------------------- /man/mclustBICupdate.Rd: -------------------------------------------------------------------------------- 1 | \name{mclustBICupdate} 2 | \alias{mclustBICupdate} 3 | 4 | \title{Update BIC values for parameterized Gaussian mixture models} 5 | 6 | \description{ 7 | Update the BIC (Bayesian Information Criterion) for parameterized Gaussian 8 | mixture models by taking the best from BIC results as returned by \code{\link{mclustBIC}}. 9 | } 10 | 11 | \usage{ 12 | mclustBICupdate(BIC, \dots) 13 | } 14 | 15 | \arguments{ 16 | \item{BIC}{Object of class \code{'mclustBIC'} containing the 17 | BIC values as returned by a call to \code{\link{mclustBIC}}. 18 | } 19 | \item{\dots}{Further objects of class \code{'mclustBIC'} to be merged.} 20 | } 21 | 22 | \value{ 23 | An object of class \code{'mclustBIC'} containing the best values obtained from 24 | merging the input arguments. Attributes are also updated according to the best 25 | BIC found, so calling \code{\link{Mclust}} on the resulting ouput will return 26 | the corresponding best model (see example). 27 | } 28 | 29 | \seealso{ 30 | \code{\link{mclustBIC}}, 31 | \code{\link{Mclust}}. 32 | } 33 | \examples{ 34 | \donttest{ 35 | data(galaxies, package = "MASS") 36 | galaxies <- galaxies / 1000 37 | 38 | # use several random starting points 39 | BIC <- NULL 40 | for(j in 1:100) 41 | { 42 | rBIC <- mclustBIC(galaxies, verbose = FALSE, 43 | initialization = list(hcPairs = hcRandomPairs(galaxies))) 44 | BIC <- mclustBICupdate(BIC, rBIC) 45 | } 46 | pickBIC(BIC) 47 | plot(BIC) 48 | 49 | mod <- Mclust(galaxies, x = BIC) 50 | summary(mod) 51 | } 52 | } 53 | \keyword{cluster} 54 | -------------------------------------------------------------------------------- /man/mclustLoglik.Rd: -------------------------------------------------------------------------------- 1 | \name{mclustLoglik} 2 | \alias{mclustLoglik} 3 | \alias{print.mclustLoglik} 4 | 5 | \title{Log-likelihood from a table of BIC values for parameterized Gaussian mixture models} 6 | 7 | \description{ 8 | Compute the maximal log-likelihood from a table of BIC values contained in a \code{'mclustBIC'} object as returned by function \code{\link{mclustBIC}}. 9 | } 10 | 11 | \usage{ 12 | mclustLoglik(object, \dots) 13 | } 14 | 15 | \arguments{ 16 | \item{object}{An object of class \code{'mclustBIC'} containing the 17 | BIC values as returned by a call to \code{\link{mclustBIC}}. 18 | } 19 | \item{\dots}{ 20 | Catches unused arguments in an indirect or list call via \code{do.call}. 21 | } 22 | } 23 | 24 | \value{ 25 | An object of class \code{'mclustLoglik'} containing the maximal log-likelihood values for the Gaussian mixture models provided as input. 26 | } 27 | 28 | \seealso{ 29 | \code{\link{mclustBIC}}. 30 | } 31 | \examples{ 32 | \donttest{ 33 | BIC <- mclustBIC(iris[,1:4]) 34 | mclustLoglik(BIC) 35 | } 36 | } 37 | \keyword{cluster} 38 | -------------------------------------------------------------------------------- /man/mclustModel.Rd: -------------------------------------------------------------------------------- 1 | \name{mclustModel} 2 | \alias{mclustModel} 3 | \title{ 4 | Best model based on BIC 5 | } 6 | \description{ 7 | Determines the best model from clustering via \code{mclustBIC} 8 | for a given set of model parameterizations and numbers of components. 9 | } 10 | \usage{ 11 | mclustModel(data, BICvalues, G, modelNames, \dots) 12 | } 13 | \arguments{ 14 | \item{data}{ 15 | The matrix or vector of observations used to generate `object'. 16 | } 17 | \item{BICvalues}{ 18 | An \code{'mclustBIC'} object, 19 | which is the result of applying \code{mclustBIC} 20 | to \code{data}. 21 | } 22 | \item{G}{ 23 | A vector of integers giving the numbers of mixture components (clusters) 24 | from which the best model according to BIC will be selected 25 | (\code{as.character(G)} must be a subset of the row names of 26 | \code{BICvalues}). 27 | The default is to select the best model for all numbers 28 | of mixture components used to obtain \code{BICvalues}. 29 | } 30 | \item{modelNames}{ 31 | A vector of integers giving the model parameterizations 32 | from which the best model according to BIC will be selected 33 | (\code{as.character(model)} must be a subset of the column names of 34 | \code{BICvalues}). 35 | The default is to select the best model for parameterizations 36 | used to obtain \code{BICvalues}. 37 | } 38 | \item{\dots}{ 39 | Not used. For generic/method consistency. 40 | } 41 | } 42 | \value{ 43 | A list giving the optimal (according to BIC) parameters, 44 | conditional probabilities \code{z}, and log-likelihood, 45 | together with the associated classification and its uncertainty. 46 | 47 | The details of the output components are as follows: 48 | \item{modelName}{ 49 | A character string indicating the model. The help file for 50 | \code{\link{mclustModelNames}} describes the available models. 51 | } 52 | \item{n}{ 53 | The number of observations in the data. 54 | } 55 | \item{d}{ 56 | The dimension of the data. 57 | } 58 | \item{G}{ 59 | The number of components in the Gaussian mixture model corresponding 60 | to the optimal BIC. 61 | } 62 | \item{bic}{ 63 | The optimal BIC value. 64 | } 65 | \item{loglik}{ 66 | The log-likelihood corresponding to the optimal BIC. 67 | } 68 | \item{parameters}{ 69 | A list with the following components: 70 | \describe{ 71 | \item{\code{pro}}{ 72 | A vector whose \emph{k}th component is the mixing proportion for 73 | the \emph{k}th component of the mixture model. 74 | If missing, equal proportions are assumed. 75 | } 76 | \item{\code{mean}}{ 77 | The mean for each component. If there is more than one component, 78 | this is a matrix whose kth column is the mean of the \emph{k}th 79 | component of the mixture model. 80 | } 81 | \item{\code{variance}}{ 82 | A list of variance parameters for the model. 83 | The components of this list depend on the model 84 | specification. See the help file for \code{\link{mclustVariance}} 85 | for details. 86 | } 87 | \item{\code{Vinv}}{ 88 | The estimate of the reciprocal hypervolume of the data region 89 | used in the computation when the input indicates the 90 | addition of a noise component to the model. 91 | } 92 | } 93 | } 94 | \item{z}{ 95 | A matrix whose \emph{[i,k]}th entry is the probability that observation 96 | \emph{i} in the test data belongs to the \emph{k}th class. 97 | } 98 | } 99 | \seealso{ 100 | \code{\link{mclustBIC}} 101 | } 102 | \examples{ 103 | irisBIC <- mclustBIC(iris[,-5]) 104 | mclustModel(iris[,-5], irisBIC) 105 | mclustModel(iris[,-5], irisBIC, G = 1:6, modelNames = c("VII", "VVI", "VVV")) 106 | } 107 | \keyword{cluster} 108 | % docclass is function 109 | -------------------------------------------------------------------------------- /man/mclustModelNames.Rd: -------------------------------------------------------------------------------- 1 | \name{mclustModelNames} 2 | \alias{mclustModelNames} 3 | \title{ 4 | MCLUST Model Names 5 | } 6 | \description{ 7 | Description of model names used in the \emph{MCLUST} package. 8 | } 9 | \usage{ 10 | mclustModelNames(model) 11 | } 12 | \arguments{ 13 | \item{model}{A string specifying the model.} 14 | } 15 | 16 | \details{ 17 | The following models are available in package \pkg{mclust}:\cr 18 | 19 | \bold{univariate mixture} \cr 20 | \describe{ 21 | \item{\code{"E"}}{equal variance (one-dimensional)} 22 | \item{\code{"V"}}{variable/unqual variance (one-dimensional)} 23 | } 24 | \bold{multivariate mixture}\cr 25 | \describe{ 26 | \item{\code{"EII"}}{spherical, equal volume} 27 | \item{\code{"VII"}}{spherical, unequal volume} 28 | \item{\code{"EEI"}}{diagonal, equal volume and shape} 29 | \item{\code{"VEI"}}{diagonal, varying volume, equal shape} 30 | \item{\code{"EVI"}}{diagonal, equal volume, varying shape} 31 | \item{\code{"VVI"}}{diagonal, varying volume and shape} 32 | \item{\code{"EEE"}}{ellipsoidal, equal volume, shape, and orientation} 33 | \item{\code{"VEE"}}{ellipsoidal, equal shape and orientation (*)} 34 | \item{\code{"EVE"}}{ellipsoidal, equal volume and orientation (*)} 35 | \item{\code{"VVE"}}{ellipsoidal, equal orientation (*)} 36 | \item{\code{"EEV"}}{ellipsoidal, equal volume and equal shape} 37 | \item{\code{"VEV"}}{ellipsoidal, equal shape} 38 | \item{\code{"EVV"}}{ellipsoidal, equal volume (*)} 39 | \item{\code{"VVV"}}{ellipsoidal, varying volume, shape, and orientation} 40 | } 41 | \bold{single component}\cr 42 | \describe{ 43 | \item{\code{"X"}}{univariate normal} 44 | \item{\code{"XII"}}{spherical multivariate normal} 45 | \item{\code{"XXI"}}{diagonal multivariate normal} 46 | \item{\code{"XXX"}}{ellipsoidal multivariate normal} 47 | } 48 | (*) new models in \pkg{mclust} version >= 5.0.0. 49 | } 50 | 51 | \value{Returns a list with the following components: 52 | \item{model}{a character string indicating the model (as in input).} 53 | \item{type}{the description of the indicated model (see Details section).} 54 | } 55 | 56 | \seealso{ 57 | \code{\link{Mclust}}, 58 | \code{\link{mclustBIC}} 59 | } 60 | \examples{ 61 | mclustModelNames("E") 62 | mclustModelNames("EEE") 63 | mclustModelNames("VVV") 64 | mclustModelNames("XXI") 65 | } 66 | \keyword{cluster} 67 | 68 | -------------------------------------------------------------------------------- /man/mclustVariance.Rd: -------------------------------------------------------------------------------- 1 | \name{mclustVariance} 2 | \alias{mclustVariance} 3 | 4 | \title{ 5 | Template for variance specification for parameterized Gaussian mixture models 6 | } 7 | \description{ 8 | Specification of variance parameters for the various types 9 | of Gaussian mixture models. 10 | } 11 | \usage{ 12 | mclustVariance(modelName, d = NULL, G = 2) 13 | } 14 | 15 | \arguments{ 16 | \item{modelName}{A character string specifying the model.} 17 | \item{d}{A integer specifying the dimension of the data.} 18 | \item{G}{An integer specifying the number of components in the mixture model.} 19 | } 20 | 21 | \details{The \code{variance} component in the \code{parameters} list from the 22 | output to e.g. \code{me} or \code{mstep} or input to e.g. \code{estep} may contain one or more of the following arguments, depending on the model: 23 | \describe{ 24 | \item{\code{modelName}}{ 25 | A character string indicating the model. 26 | } 27 | \item{\code{d}}{ 28 | The dimension of the data. 29 | } 30 | \item{\code{G}}{ 31 | The number of components in the mixture model. 32 | } 33 | \item{\code{sigmasq}}{ 34 | for the one-dimensional models (\code{"E"}, \code{"V"}) and spherical 35 | models (\code{"EII"}, \code{"VII"}). This is either a vector whose 36 | \emph{k}th component is the variance for the \emph{k}th component in 37 | the mixture model (\code{"V"} and \code{"VII"}), or a scalar giving 38 | the common variance for all components in the mixture model (\code{"E"} 39 | and \code{"EII"}). 40 | } 41 | \item{\code{Sigma}}{ 42 | For the equal variance models \code{"EII"}, \code{"EEI"}, and 43 | \code{"EEE"}. 44 | A \emph{d} by \emph{d} matrix giving the common covariance for all 45 | components of the mixture model. 46 | } 47 | \item{\code{cholSigma}}{ 48 | For the equal variance model \code{"EEE"}. 49 | A \emph{d} by \emph{d} upper triangular matrix giving the 50 | Cholesky factor of the common covariance for all 51 | components of the mixture model. 52 | } 53 | \item{\code{sigma}}{ 54 | For all multidimensional mixture models. A 55 | \emph{d} by \emph{d} by \emph{G} matrix array whose 56 | \code{[,,k]}th entry is the covariance matrix for 57 | the \emph{k}th component of the mixture model. 58 | } 59 | \item{\code{cholsigma}}{ 60 | For the unconstrained covariance mixture model \code{"VVV"}. 61 | A \emph{d} by \emph{d} by \emph{G} matrix array whose 62 | \code{[,,k]}th entry is the upper triangular Cholesky factor 63 | of the covariance matrix for the \emph{k}th component of the 64 | mixture model. 65 | } 66 | \item{\code{scale}}{ 67 | For diagonal models \code{"EEI"}, \code{"EVI"}, \code{"VEI"}, 68 | \code{"VVI"} and constant-shape models \code{"EEV"} and \code{"VEV"}. 69 | Either a \emph{G}-vector giving the scale of the covariance (the 70 | \emph{d}th root of its determinant) for each component in the 71 | mixture model, or a single numeric value if the scale is the 72 | same for each component. 73 | } 74 | \item{\code{shape}}{ 75 | For diagonal models \code{"EEI"}, \code{"EVI"}, \code{"VEI"}, 76 | \code{"VVI"} and constant-shape models \code{"EEV"} and \code{"VEV"}. 77 | Either a \emph{G} by \emph{d} matrix in which the \emph{k}th 78 | column is the shape of the covariance matrix (normalized to have 79 | determinant 1) for the \emph{k}th component, or a 80 | \emph{d}-vector giving a common shape for all components. 81 | } 82 | \item{\code{orientation}}{ 83 | For the constant-shape models \code{"EEV"} and \code{"VEV"}. 84 | Either a \emph{d} by \emph{d} by \emph{G} array whose 85 | \code{[,,k]}th entry is the orthonomal matrix whose 86 | columns are the eigenvectors of the covariance matrix of 87 | the \emph{k}th component, or a \emph{d} by \emph{d} 88 | orthonormal matrix if the mixture components have a 89 | common orientation. The \code{orientation} component 90 | is not needed in spherical and diagonal models, since 91 | the principal components are parallel to the coordinate axes 92 | so that the orientation matrix is the identity. 93 | } 94 | } 95 | In all cases, the value 96 | \code{-1} is used as a placeholder for unknown nonzero entries. 97 | } 98 | 99 | \keyword{cluster} 100 | -------------------------------------------------------------------------------- /man/mstep.Rd: -------------------------------------------------------------------------------- 1 | \name{mstep} 2 | \alias{mstep} 3 | 4 | \title{M-step for parameterized Gaussian mixture models} 5 | 6 | \description{ 7 | Maximization step in the EM algorithm for parameterized Gaussian 8 | mixture models. 9 | } 10 | \usage{ 11 | mstep(data, modelName, z, prior = NULL, warn = NULL, \dots) 12 | } 13 | \arguments{ 14 | \item{data}{ 15 | A numeric vector, matrix, or data frame of observations. 16 | Categorical variables are not allowed. 17 | If a matrix or data frame, rows correspond to observations and 18 | columns correspond to variables. 19 | } 20 | \item{modelName}{ 21 | A character string indicating the model. The help file for 22 | \code{\link{mclustModelNames}} describes the available models. 23 | } 24 | \item{z}{ 25 | A matrix whose \code{[i,k]}th entry is the 26 | conditional probability of the ith observation belonging to 27 | the \emph{k}th component of the mixture. 28 | In analyses involving noise, this should not include the 29 | conditional probabilities for the noise component. 30 | } 31 | \item{prior}{ 32 | Specification of a conjugate prior on the means and variances. 33 | The default assumes no prior. 34 | } 35 | \item{warn}{ 36 | A logical value indicating whether or not certain warnings 37 | (usually related to singularity) should be issued when the 38 | estimation fails. The default is given by \code{mclust.options("warn")}. 39 | } 40 | \item{\dots}{ 41 | Catches unused arguments in indirect or list calls via \code{do.call}. 42 | } 43 | } 44 | \value{ 45 | A list including the following components: 46 | \item{modelName}{ 47 | A character string identifying the model (same as the input argument). 48 | } 49 | \item{parameters}{ 50 | \describe{ 51 | \item{\code{pro}}{ 52 | A vector whose \emph{k}th component is the mixing proportion for 53 | the \emph{k}th component of the mixture model. 54 | If the model includes a Poisson term for noise, there 55 | should be one more mixing proportion than the number 56 | of Gaussian components. 57 | } 58 | \item{\code{mean}}{ 59 | The mean for each component. If there is more than one component, 60 | this is a matrix whose kth column is the mean of the \emph{k}th 61 | component of the mixture model. 62 | } 63 | \item{\code{variance}}{ 64 | A list of variance parameters for the model. 65 | The components of this list depend on the model 66 | specification. See the help file for \code{\link{mclustVariance}} 67 | for details. 68 | } 69 | } 70 | } 71 | \item{Attributes:}{ 72 | \code{"info"} For those models with iterative M-steps 73 | (\code{"VEI"} and \code{"VEV"}), information on the iteration.\cr 74 | \code{"WARNING"} An appropriate warning if problems are 75 | encountered in the computations. 76 | } 77 | } 78 | \note{ 79 | This function computes the M-step only for MVN mixtures, so in 80 | analyses involving noise, the conditional probabilities input should 81 | exclude those for the noise component. \cr 82 | 83 | In contrast to \code{me} for the EM algorithm, computations in \code{mstep} 84 | are carried out unless failure due to overflow would occur. To impose 85 | stricter tolerances on a single \code{mstep}, use \code{me} with the 86 | \emph{itmax} component of the \code{control} argument set to 1. 87 | } 88 | 89 | \seealso{ 90 | \code{\link{mstepE}}, \dots, 91 | \code{\link{mstepVVV}}, 92 | \code{\link{emControl}}, 93 | \code{\link{me}}, 94 | \code{\link{estep}}, 95 | \code{\link{mclust.options}}. 96 | } 97 | \examples{ 98 | \donttest{ 99 | mstep(modelName = "VII", data = iris[,-5], z = unmap(iris[,5]))} 100 | } 101 | \keyword{cluster} 102 | -------------------------------------------------------------------------------- /man/mvn.Rd: -------------------------------------------------------------------------------- 1 | \name{mvn} 2 | \alias{mvn} 3 | \title{ 4 | Univariate or Multivariate Normal Fit 5 | } 6 | \description{ 7 | Computes the mean, covariance, and log-likelihood from fitting a single 8 | Gaussian to given data (univariate or multivariate normal). 9 | } 10 | \usage{ 11 | mvn( modelName, data, prior = NULL, warn = NULL, \dots) 12 | } 13 | \arguments{ 14 | \item{modelName}{ 15 | A character string representing a model name. This can be either 16 | \code{"Spherical"}, \code{"Diagonal"}, or \code{"Ellipsoidal"} or 17 | else \cr 18 | \code{"X"} for one-dimensional data,\cr 19 | \code{"XII"} for a spherical Gaussian, \cr 20 | \code{"XXI"} for a diagonal Gaussian \cr 21 | \code{"XXX"} for a general ellipsoidal Gaussian 22 | } 23 | \item{data}{ 24 | A numeric vector, matrix, or data frame of observations. Categorical 25 | variables are not allowed. If a matrix or data frame, rows 26 | correspond to observations and columns correspond to variables. 27 | } 28 | \item{prior}{ 29 | Specification of a conjugate prior on the means and variances. 30 | The default assumes no prior. 31 | } 32 | \item{warn}{ 33 | A logical value indicating whether or not a warning should be issued 34 | whenever a singularity is encountered. 35 | The default is given by \code{mclust.options("warn")}. 36 | } 37 | \item{\dots }{ 38 | Catches unused arguments in indirect or list calls via \code{do.call}. 39 | } 40 | } 41 | \value{ 42 | A list including the following components: 43 | \item{modelName}{ 44 | A character string identifying the model (same as the input argument). 45 | } 46 | \item{parameters}{ 47 | \describe{ 48 | \item{\code{mean}}{ 49 | The mean for each component. If there is more than one component, 50 | this is a matrix whose kth column is the mean of the \emph{k}th 51 | component of the mixture model. 52 | } 53 | \item{\code{variance}}{ 54 | A list of variance parameters for the model. 55 | The components of this list depend on the model 56 | specification. See the help file for \code{\link{mclustVariance}} 57 | for details. 58 | } 59 | } 60 | } 61 | \item{loglik}{ 62 | The log likelihood for the data in the mixture model. 63 | } 64 | \item{Attributes:}{ 65 | \code{"WARNING"} An appropriate warning if problems are 66 | encountered in the computations. 67 | } 68 | } 69 | \seealso{ 70 | \code{\link{mvnX}}, 71 | \code{\link{mvnXII}}, 72 | \code{\link{mvnXXI}}, 73 | \code{\link{mvnXXX}}, 74 | \code{\link{mclustModelNames}} 75 | } 76 | \examples{ 77 | n <- 1000 78 | 79 | set.seed(0) 80 | x <- rnorm(n, mean = -1, sd = 2) 81 | mvn(modelName = "X", x) 82 | 83 | mu <- c(-1, 0, 1) 84 | 85 | set.seed(0) 86 | x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% (2*diag(3)), 87 | MARGIN = 2, STATS = mu, FUN = "+") 88 | mvn(modelName = "XII", x) 89 | mvn(modelName = "Spherical", x) 90 | 91 | set.seed(0) 92 | x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% diag(1:3), 93 | MARGIN = 2, STATS = mu, FUN = "+") 94 | mvn(modelName = "XXI", x) 95 | mvn(modelName = "Diagonal", x) 96 | 97 | Sigma <- matrix(c(9,-4,1,-4,9,4,1,4,9), 3, 3) 98 | set.seed(0) 99 | x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% chol(Sigma), 100 | MARGIN = 2, STATS = mu, FUN = "+") 101 | mvn(modelName = "XXX", x) 102 | mvn(modelName = "Ellipsoidal", x) 103 | } 104 | \keyword{cluster} 105 | -------------------------------------------------------------------------------- /man/mvnX.Rd: -------------------------------------------------------------------------------- 1 | \name{mvnX} 2 | \alias{mvnX} 3 | \alias{mvnXII} 4 | \alias{mvnXXI} 5 | \alias{mvnXXX} 6 | \title{ 7 | Univariate or Multivariate Normal Fit 8 | } 9 | \description{ 10 | Computes the mean, covariance, and log-likelihood from fitting a single 11 | Gaussian (univariate or multivariate normal). 12 | } 13 | \usage{ 14 | mvnX(data, prior = NULL, warn = NULL, \dots) 15 | mvnXII(data, prior = NULL, warn = NULL, \dots) 16 | mvnXXI(data, prior = NULL, warn = NULL, \dots) 17 | mvnXXX(data, prior = NULL, warn = NULL, \dots) 18 | } 19 | \arguments{ 20 | \item{data}{ 21 | A numeric vector, matrix, or data frame of observations. 22 | Categorical variables are not allowed. 23 | If a matrix or data frame, rows correspond to observations and 24 | columns correspond to variables. 25 | } 26 | \item{prior}{ 27 | Specification of a conjugate prior on the means and variances. 28 | The default assumes no prior. 29 | } 30 | \item{warn}{ 31 | A logical value indicating whether or not a warning should be issued 32 | whenever a singularity is encountered. 33 | The default is given by \code{mclust.options("warn")}. 34 | } 35 | \item{\dots }{ 36 | Catches unused arguments in indirect or list calls via \code{do.call}. 37 | } 38 | } 39 | 40 | \details{ 41 | \describe{ 42 | \item{\code{mvnXII}}{computes the best fitting Gaussian with the covariance restricted to be a multiple of the identity.} 43 | \item{\code{mvnXXI}}{computes the best fitting Gaussian with the covariance restricted to be diagonal.} 44 | \item{\code{mvnXXX}}{computes the best fitting Gaussian with ellipsoidal (unrestricted) covariance.} 45 | } 46 | } 47 | 48 | \value{ 49 | A list including the following components: 50 | \item{modelName}{ 51 | A character string identifying the model (same as the input argument). 52 | } 53 | \item{parameters}{ 54 | \describe{ 55 | \item{\code{mean}}{ 56 | The mean for each component. If there is more than one component, 57 | this is a matrix whose kth column is the mean of the \emph{k}th 58 | component of the mixture model. 59 | } 60 | \item{\code{variance}}{ 61 | A list of variance parameters for the model. 62 | The components of this list depend on the model 63 | specification. See the help file for \code{\link{mclustVariance}} 64 | for details. 65 | } 66 | } 67 | } 68 | \item{loglik}{ 69 | The log likelihood for the data in the mixture model. 70 | } 71 | \item{Attributes:}{ 72 | \code{"WARNING"} An appropriate warning if problems are 73 | encountered in the computations. 74 | } 75 | } 76 | 77 | \seealso{ 78 | \code{\link{mvn}}, 79 | \code{\link{mstepE}} 80 | } 81 | \examples{ 82 | \donttest{ 83 | n <- 1000 84 | 85 | set.seed(0) 86 | x <- rnorm(n, mean = -1, sd = 2) 87 | mvnX(x) 88 | 89 | mu <- c(-1, 0, 1) 90 | 91 | set.seed(0) 92 | x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% (2*diag(3)), 93 | MARGIN = 2, STATS = mu, FUN = "+") 94 | mvnXII(x) 95 | 96 | set.seed(0) 97 | x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% diag(1:3), 98 | MARGIN = 2, STATS = mu, FUN = "+") 99 | mvnXXI(x) 100 | 101 | Sigma <- matrix(c(9,-4,1,-4,9,4,1,4,9), 3, 3) 102 | set.seed(0) 103 | x <- sweep(matrix(rnorm(n*3), n, 3) \%*\% chol(Sigma), 104 | MARGIN = 2, STATS = mu, FUN = "+") 105 | mvnXXX(x) 106 | } 107 | } 108 | \keyword{cluster} 109 | -------------------------------------------------------------------------------- /man/nMclustParams.Rd: -------------------------------------------------------------------------------- 1 | \name{nMclustParams} 2 | \alias{nMclustParams} 3 | 4 | \title{Number of Estimated Parameters in Gaussian Mixture Models} 5 | 6 | \description{ 7 | Gives the number of estimated parameters for parameterizations of the 8 | Gaussian mixture model that are used in MCLUST. 9 | } 10 | \usage{ 11 | nMclustParams(modelName, d, G, noise = FALSE, equalPro = FALSE, \dots) 12 | } 13 | \arguments{ 14 | \item{modelName}{ 15 | A character string indicating the model. The help file for 16 | \code{\link{mclustModelNames}} describes the available models. 17 | } 18 | \item{d}{ 19 | The dimension of the data. Not used for models in which neither 20 | the shape nor the orientation varies. 21 | } 22 | \item{G}{ 23 | The number of components in the Gaussian mixture model used to compute 24 | \code{loglik}. 25 | } 26 | \item{noise}{ 27 | A logical variable indicating whether or not the model includes an 28 | optional Poisson noise component. 29 | } 30 | \item{equalPro}{ 31 | A logical variable indicating whether or not the components in the 32 | model are assumed to be present in equal proportion. 33 | } 34 | \item{\dots }{ 35 | Catches unused arguments in indirect or list calls via \code{do.call}. 36 | } 37 | } 38 | \value{ 39 | The number of variance parameters in the corresponding Gaussian mixture 40 | model. 41 | } 42 | \details{ 43 | To get the total number of parameters in model, add \code{G*d} for the 44 | means and \code{G-1} for the mixing proportions if they are unequal. 45 | } 46 | \seealso{ 47 | \code{\link{bic}}, \code{\link{nVarParams}}. 48 | } 49 | \examples{ 50 | mapply(nMclustParams, mclust.options("emModelNames"), d = 2, G = 3) 51 | } 52 | \keyword{cluster} 53 | -------------------------------------------------------------------------------- /man/nVarParams.Rd: -------------------------------------------------------------------------------- 1 | \name{nVarParams} 2 | \alias{nVarParams} 3 | \title{ 4 | Number of Variance Parameters in Gaussian Mixture Models 5 | } 6 | \description{ 7 | Gives the number of variance parameters for parameterizations of the 8 | Gaussian mixture model that are used in MCLUST. 9 | } 10 | \usage{ 11 | nVarParams(modelName, d, G, \dots) 12 | } 13 | \arguments{ 14 | \item{modelName}{ 15 | A character string indicating the model. The help file for 16 | \code{\link{mclustModelNames}} describes the available models. 17 | } 18 | \item{d}{ 19 | The dimension of the data. Not used for models in which neither 20 | the shape nor the orientation varies. 21 | } 22 | \item{G}{ 23 | The number of components in the Gaussian mixture model used to compute 24 | \code{loglik}. 25 | } 26 | \item{\dots }{ 27 | Catches unused arguments in indirect or list calls via \code{do.call}. 28 | } 29 | } 30 | \value{ 31 | The number of variance parameters in the corresponding Gaussian mixture 32 | model. 33 | } 34 | \details{ 35 | To get the total number of parameters in model, add \code{G*d} for the 36 | means and \code{G-1} for the mixing proportions if they are unequal. 37 | } 38 | 39 | \seealso{ 40 | \code{\link{bic}}, \code{\link{nMclustParams}}. 41 | } 42 | \examples{ 43 | mapply(nVarParams, mclust.options("emModelNames"), d = 2, G = 3) 44 | } 45 | \keyword{cluster} 46 | -------------------------------------------------------------------------------- /man/partconv.Rd: -------------------------------------------------------------------------------- 1 | \name{partconv} 2 | \alias{partconv} 3 | 4 | \title{Numeric Encoding of a Partitioning} 5 | 6 | \description{ 7 | Converts a vector interpreted as a classification or partitioning 8 | into a numeric vector. 9 | } 10 | \usage{ 11 | partconv(x, consec=TRUE) 12 | } 13 | 14 | \arguments{ 15 | \item{x}{ 16 | A vector interpreted as a classification or partitioning. 17 | } 18 | \item{consec}{ 19 | Logical value indicating whether or not to consecutive class 20 | numbers should be used . 21 | } 22 | } 23 | \value{ 24 | Numeric encoding of \code{x}. 25 | When \code{consec = TRUE}, the distinct values in \code{x} are numbered by 26 | the order in which they appear. 27 | When \code{consec = FALSE}, each distinct value in \code{x} is numbered by 28 | the index corresponding to its first appearance in \code{x}. 29 | } 30 | \seealso{ 31 | \code{\link{partuniq}} 32 | } 33 | \examples{ 34 | partconv(iris[,5]) 35 | 36 | set.seed(0) 37 | cl <- sample(LETTERS[1:9], 25, replace=TRUE) 38 | partconv(cl, consec=FALSE) 39 | partconv(cl, consec=TRUE) 40 | } 41 | 42 | \keyword{cluster} 43 | -------------------------------------------------------------------------------- /man/partuniq.Rd: -------------------------------------------------------------------------------- 1 | \name{partuniq} 2 | \alias{partuniq} 3 | \title{ 4 | Classifies Data According to Unique Observations 5 | } 6 | \description{ 7 | Gives a one-to-one mapping from unique observations to rows of a data matrix. 8 | } 9 | \usage{ 10 | partuniq(x) 11 | } 12 | \arguments{ 13 | \item{x}{Matrix of observations.} 14 | } 15 | \value{ 16 | A vector of length \code{nrow(x)} with integer entries. An observation 17 | \code{k} is assigned an integer \code{i} whenever observation \code{i} 18 | is the first row of \code{x} that is identical to observation \code{k} 19 | (note that \code{i <= k}). 20 | } 21 | \seealso{ 22 | \code{\link{partconv}} 23 | } 24 | \examples{ 25 | set.seed(0) 26 | 27 | mat <- data.frame(lets = sample(LETTERS[1:2],9,TRUE), nums = sample(1:2,9,TRUE)) 28 | mat 29 | 30 | ans <- partuniq(mat) 31 | ans 32 | 33 | partconv(ans,consec=TRUE) 34 | } 35 | \keyword{cluster} 36 | 37 | 38 | -------------------------------------------------------------------------------- /man/plot.Mclust.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.Mclust} 2 | \alias{plot.Mclust} 3 | 4 | \title{Plotting method for Mclust model-based clustering} 5 | 6 | \description{ 7 | Plots for model-based clustering results, such as BIC, classification, uncertainty and density. 8 | } 9 | \usage{ 10 | \method{plot}{Mclust}(x, what = c("BIC", "classification", "uncertainty", "density"), 11 | dimens = NULL, xlab = NULL, ylab = NULL, 12 | addEllipses = TRUE, main = FALSE, \dots) 13 | } 14 | \arguments{ 15 | \item{x}{ 16 | Output from \code{Mclust}. 17 | } 18 | \item{what}{ 19 | A string specifying the type of graph requested. Available choices are: 20 | \describe{ 21 | \item{\code{"BIC"}}{plot of BIC values used for choosing the number of clusters.} 22 | \item{\code{"classification"} =}{a plot showing the clustering. For data in more than two dimensions a pairs plot is produced, followed by a coordinate projection plot using specified \code{dimens}. Ellipses corresponding to covariances of mixture components are also drawn if \code{addEllipses = TRUE}.} 23 | \item{\code{"uncertainty"}}{a plot of classification uncertainty. For data in more than two dimensions a coordinate projection plot is drawn using specified \code{dimens}.} 24 | \item{\code{"density"}}{a plot of estimated density. For data in more than two dimensions a matrix of contours for coordinate projection plot is drawn using specified \code{dimens}.} 25 | } 26 | If not specified, in interactive sessions a menu of choices is proposed. 27 | } 28 | \item{dimens}{ 29 | A vector of integers specifying the dimensions of the coordinate projections 30 | in case of \code{"classification"}, \code{"uncertainty"}, or \code{"density"} 31 | plots. 32 | } 33 | \item{xlab, ylab}{ 34 | Optional labels for the x-axis and the y-axis. 35 | } 36 | \item{addEllipses}{ 37 | A logical indicating whether or not to add ellipses with axes 38 | corresponding to the within-cluster covariances in case of 39 | \code{"classification"} or \code{"uncertainty"} plots. 40 | } 41 | \item{main}{ 42 | A logical or \code{NULL} indicating whether or not to add a title 43 | to the plot identifying the type of plot drawn. 44 | } 45 | \item{\dots}{ 46 | Other graphics parameters. 47 | } 48 | } 49 | \details{ 50 | For more flexibility in plotting, use \code{mclust1Dplot}, 51 | \code{mclust2Dplot}, \code{surfacePlot}, \code{coordProj}, or 52 | \code{randProj}. 53 | } 54 | \seealso{ 55 | \code{\link{Mclust}}, 56 | \code{\link{plot.mclustBIC}}, 57 | \code{\link{plot.mclustICL}}, 58 | \code{\link{mclust1Dplot}}, 59 | \code{\link{mclust2Dplot}}, 60 | \code{\link{surfacePlot}}, 61 | \code{\link{coordProj}}, 62 | \code{\link{randProj}}. 63 | } 64 | \examples{ 65 | \donttest{ 66 | precipMclust <- Mclust(precip) 67 | plot(precipMclust) 68 | 69 | faithfulMclust <- Mclust(faithful) 70 | plot(faithfulMclust) 71 | 72 | irisMclust <- Mclust(iris[,-5]) 73 | plot(irisMclust) 74 | } 75 | } 76 | \keyword{cluster} 77 | -------------------------------------------------------------------------------- /man/plot.MclustBoostrap.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.MclustBootstrap} 2 | \alias{plot.MclustBootstrap} 3 | 4 | \title{Plot of bootstrap distributions for mixture model parameters} 5 | 6 | \description{ 7 | Plots the bootstrap distribution of parameters as returned by the \code{\link{MclustBootstrap}} function. 8 | } 9 | 10 | \usage{ 11 | \method{plot}{MclustBootstrap}(x, what = c("pro", "mean", "var"), 12 | show.parest = TRUE, show.confint = TRUE, 13 | hist.col = "grey", hist.border = "lightgrey", breaks = NA, 14 | col = "forestgreen", lwd = 2, lty = 3, 15 | xlab = NULL, xlim = NULL, ylim = NULL, \dots) 16 | } 17 | 18 | \arguments{ 19 | \item{x}{Object returned by \code{MclustBootstrap}.} 20 | 21 | \item{what}{Character string specifying if mixing proportions (\code{"pro"}), 22 | component means (\code{"mean"}) or component variances (\code{"var"}) 23 | should be drawn.} 24 | 25 | \item{show.parest}{A logical specifying if the parameter estimate should be drawn as vertical line.} 26 | 27 | \item{show.confint}{A logical specifying if the resampling-based confidence interval should be drawn at the bottom of the graph. Confidence level can be provided as further argument \code{conf.level}; see \code{\link{summary.MclustBootstrap}}.} 28 | 29 | \item{hist.col}{The color to be used to fill the bars of the histograms.} 30 | 31 | \item{hist.border}{The color of the border around the bars of the histograms.} 32 | 33 | \item{breaks}{The number of breaks used in histogram to visualize the bootstrap distribution. When \code{NA} (default), it is computed as the maximum between the "Sturges" and "Freedman-Diaconis" methods; see \code{\link{nclass.Sturges}} and \code{\link{nclass.FD}}.} 34 | 35 | \item{col, lwd, lty}{The color, line width and line type to be used to represent the estimated parameters and confidence intervals.} 36 | 37 | \item{xlab}{Optional label for the horizontal axis.} 38 | 39 | \item{xlim, ylim}{A two-values vector of axis range for, respectively, horizontal and 40 | vertical axis.} 41 | 42 | \item{\dots}{Other graphics parameters.} 43 | } 44 | 45 | \value{ 46 | A plot for each variable/component of the selected parameters. 47 | } 48 | 49 | \seealso{ 50 | \code{\link{MclustBootstrap}} 51 | } 52 | \examples{ 53 | \donttest{ 54 | data(diabetes) 55 | X <- diabetes[,-1] 56 | modClust <- Mclust(X, G = 3, modelNames = "VVV") 57 | bootClust <- MclustBootstrap(modClust, nboot = 99) 58 | par(mfrow = c(1,3), mar = c(4,2,2,0.5)) 59 | plot(bootClust, what = "pro") 60 | par(mfrow = c(3,3), mar = c(4,2,2,0.5)) 61 | plot(bootClust, what = "mean") 62 | } 63 | } 64 | 65 | \keyword{cluster} 66 | -------------------------------------------------------------------------------- /man/plot.MclustSSC.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.MclustSSC} 2 | \alias{plot.MclustSSC} 3 | 4 | \title{Plotting method for MclustSSC semi-supervised classification} 5 | 6 | \description{ 7 | Plots for semi-supervised classification based on Gaussian finite mixture models. 8 | } 9 | 10 | \usage{ 11 | \method{plot}{MclustSSC}(x, what = c("BIC", "classification", "uncertainty"), \dots) 12 | } 13 | 14 | \arguments{ 15 | \item{x}{ 16 | An object of class \code{'MclustSSC'} resulting from a call to \code{\link{MclustSSC}}. 17 | } 18 | 19 | \item{what}{ 20 | A string specifying the type of graph requested. Available choices are: 21 | \describe{ 22 | \item{\code{"BIC"} =}{plot of BIC values used for model selection, i.e. for choosing the model class covariances.} 23 | \item{\code{"classification"} =}{a plot of data with points marked based on the known and the predicted classification.} 24 | \item{\code{"uncertainty"} =}{a plot of classification uncertainty.} 25 | } 26 | If not specified, in interactive sessions a menu of choices is proposed. 27 | } 28 | 29 | \item{\dots}{further arguments passed to or from other methods. See \code{\link{plot.Mclust}}.} 30 | } 31 | 32 | %\value{} 33 | 34 | %\details{} 35 | 36 | \author{Luca Scrucca} 37 | 38 | \seealso{ 39 | \code{\link{MclustSSC}} 40 | } 41 | 42 | \examples{ 43 | X <- iris[,1:4] 44 | class <- iris$Species 45 | # randomly remove class labels 46 | set.seed(123) 47 | class[sample(1:length(class), size = 120)] <- NA 48 | table(class, useNA = "ifany") 49 | clPairs(X, ifelse(is.na(class), 0, class), 50 | symbols = c(0, 16, 17, 18), colors = c("grey", 4, 2, 3), 51 | main = "Partially classified data") 52 | 53 | # Fit semi-supervised classification model 54 | mod_SSC <- MclustSSC(X, class) 55 | summary(mod_SSC, parameters = TRUE) 56 | 57 | pred_SSC <- predict(mod_SSC) 58 | table(Predicted = pred_SSC$classification, Actual = class, useNA = "ifany") 59 | 60 | plot(mod_SSC, what = "BIC") 61 | plot(mod_SSC, what = "classification") 62 | plot(mod_SSC, what = "uncertainty") 63 | } 64 | 65 | \keyword{multivariate} 66 | -------------------------------------------------------------------------------- /man/plot.clustCombi.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.clustCombi} 2 | \alias{plot.clustCombi} 3 | \title{ 4 | Plot Combined Clusterings Results 5 | } 6 | \description{ 7 | Plot combined clusterings results: classifications corresponding to \code{Mclust}/BIC and to the hierarchically combined classes, "entropy plots" to help to select a number of classes, and the tree structure obtained from combining mixture components. 8 | } 9 | \usage{ 10 | \method{plot}{clustCombi}(x, what = c("classification", "entropy", "tree"), \dots) 11 | } 12 | \arguments{ 13 | \item{x}{ 14 | Object returned by \code{\link{clustCombi}} function. 15 | } 16 | \item{what}{ 17 | Type of plot. 18 | } 19 | \item{\dots}{ 20 | Other arguments to be passed to other functions: \code{\link{combiPlot}}, \code{\link{entPlot}}, \code{\link{combiTree}}. Please see the corresponding documentations. 21 | } 22 | } 23 | \value{ 24 | Classifications are plotted with \code{\link{combiPlot}}, which relies on the \code{Mclust} plot functions. 25 | Entropy plots are plotted with \code{\link{entPlot}} and may help to select a number of classes: please see the article cited in the references. 26 | Tree plots are produced by \code{\link{combiTree}} and graph the tree structure implied by the clusters combining process. 27 | } 28 | \references{ 29 | J.-P. Baudry, A. E. Raftery, G. Celeux, K. Lo and R. Gottardo (2010). Combining mixture components for clustering. \emph{Journal of Computational and Graphical Statistics, 19(2):332-353.} 30 | } 31 | \author{ 32 | J.-P. Baudry, A. E. Raftery, L. Scrucca 33 | } 34 | \seealso{ 35 | \code{\link{combiPlot}}, \code{\link{entPlot}}, \code{\link{combiTree}}, \code{\link{clustCombi}}. 36 | } 37 | \examples{ 38 | \donttest{ 39 | data(Baudry_etal_2010_JCGS_examples) 40 | 41 | ## 1D Example 42 | output <- clustCombi(data = Test1D, G=1:15) 43 | 44 | # plots the hierarchy of combined solutions, then some "entropy plots" which 45 | # may help one to select the number of classes (please see the article cited 46 | # in the references) 47 | plot(output) 48 | 49 | ## 2D Example 50 | output <- clustCombi(data = ex4.1) 51 | 52 | # plots the hierarchy of combined solutions, then some "entropy plots" which 53 | # may help one to select the number of classes (please see the article cited 54 | # in the references) 55 | plot(output) 56 | 57 | ## 3D Example 58 | output <- clustCombi(data = ex4.4.2) 59 | 60 | # plots the hierarchy of combined solutions, then some "entropy plots" which 61 | # may help one to select the number of classes (please see the article cited 62 | # in the references) 63 | plot(output) 64 | } 65 | 66 | } 67 | \keyword{ cluster } 68 | -------------------------------------------------------------------------------- /man/plot.hc.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.hc} 2 | \alias{plot.hc} 3 | 4 | \title{Dendrograms for Model-based Agglomerative Hierarchical Clustering} 5 | 6 | \description{ 7 | Display two types for dendrograms for model-based hierarchical clustering 8 | objects. 9 | } 10 | 11 | \usage{ 12 | \method{plot}{hc}(x, what=c("loglik","merge"), maxG=NULL, labels=FALSE, hang=0, \dots) 13 | } 14 | 15 | \arguments{ 16 | \item{x}{ 17 | An object of class \code{'hc'}. 18 | } 19 | \item{what}{ 20 | A character string indicating the type of dendrogram to be displayed.\cr 21 | Possible options are: 22 | \describe{ 23 | \item{\code{"loglik"}}{Distances between dendrogram levels are based on 24 | the classification likelihood.} 25 | \item{\code{"merge"}}{Distances between dendrogram levels are uniform, 26 | so that levels correspond to the number of clusters.} 27 | } 28 | } 29 | \item{maxG}{ 30 | The maximum number of clusters for the dendrogram. 31 | For \code{what = "merge"}, the default is the 32 | number of clusters in the initial partition. 33 | For \code{what = "loglik"}, the default is the minimnum of the 34 | maximum number of clusters for which the classification loglikelihood 35 | an be computed in most cases, and the maximum number of clusters for 36 | which the classification likelihood increases with increasing numbers of 37 | clusters. 38 | } 39 | \item{labels}{ 40 | A logical variable indicating whether or not to display leaf (observation) 41 | labels for the dendrogram (row names of the data). These are likely to be 42 | useful only if the number of observations in fairly small, since otherwise 43 | the labels will be too crowded to read. 44 | The default is not to display the leaf labels. 45 | } 46 | \item{hang}{ 47 | For \code{hclust} objects, this argument is the fraction of the plot 48 | height by which labels should hang below the rest of the plot. A negative 49 | value will cause the labels to hang down from 0. 50 | Because model-based hierarchical clustering does not share all of the 51 | properties of \code{hclust}, the \code{hang} argment won't work in 52 | many instances. 53 | } 54 | \item{\dots}{ 55 | Additional plotting arguments. 56 | } 57 | } 58 | \value{ 59 | A dendrogram is drawn, with distances based on either the classification 60 | likelihood or the merge level (number of clusters). 61 | } 62 | \details{ 63 | The plotting input does not share all of the properties of \code{hclust} 64 | objects, hence not all plotting arguments associated with \code{hclust} 65 | can be expected to work here. 66 | } 67 | \note{ 68 | If \code{modelName = "E"} (univariate with equal variances) or 69 | \code{modelName = "EII"} (multivariate with equal spherical 70 | covariances), then the underlying model is the same as for 71 | Ward's method for hierarchical clustering. 72 | } 73 | \references{ 74 | J. D. Banfield and A. E. Raftery (1993). 75 | Model-based Gaussian and non-Gaussian Clustering. 76 | \emph{Biometrics 49:803-821}. 77 | 78 | C. Fraley (1998). 79 | Algorithms for model-based Gaussian hierarchical clustering. 80 | \emph{SIAM Journal on Scientific Computing 20:270-281}. 81 | 82 | C. Fraley and A. E. Raftery (2002). 83 | Model-based clustering, discriminant analysis, and density estimation. 84 | \emph{Journal of the American Statistical Association 97:611-631}. 85 | } 86 | \seealso{ 87 | \code{\link{hc}} 88 | } 89 | \examples{ 90 | data(EuroUnemployment) 91 | hcTree <- hc(modelName = "VVV", data = EuroUnemployment) 92 | plot(hcTree, what = "loglik") 93 | plot(hcTree, what = "loglik", labels = TRUE) 94 | plot(hcTree, what = "loglik", maxG = 5, labels = TRUE) 95 | plot(hcTree, what = "merge") 96 | plot(hcTree, what = "merge", labels = TRUE) 97 | plot(hcTree, what = "merge", labels = TRUE, hang = 0.1) 98 | plot(hcTree, what = "merge", labels = TRUE, hang = -1) 99 | plot(hcTree, what = "merge", labels = TRUE, maxG = 5) 100 | } 101 | \keyword{cluster} 102 | -------------------------------------------------------------------------------- /man/plot.mclustBIC.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.mclustBIC} 2 | \alias{plot.mclustBIC} 3 | 4 | \title{BIC Plot for Model-Based Clustering} 5 | 6 | \description{ 7 | Plots the BIC values returned by the \code{\link{mclustBIC}} function. 8 | } 9 | 10 | \usage{ 11 | \method{plot}{mclustBIC}(x, G = NULL, modelNames = NULL, 12 | symbols = NULL, colors = NULL, 13 | xlab = NULL, ylab = "BIC", 14 | legendArgs = list(x = "bottomright", ncol = 2, cex = 1, inset = 0.01), 15 | \dots) 16 | } 17 | \arguments{ 18 | \item{x}{ 19 | Output from \code{mclustBIC}. 20 | } 21 | \item{G}{ 22 | One or more numbers of components corresponding to models fit in \code{x}. 23 | The default is to plot the BIC for all of the numbers of components fit. 24 | } 25 | \item{modelNames}{ 26 | One or more model names corresponding to models fit in \code{x}. 27 | The default is to plot the BIC for all of the models fit. 28 | } 29 | \item{symbols}{ 30 | Either an integer or character vector assigning a plotting symbol to each 31 | unique class in \code{classification}. Elements in \code{colors} 32 | correspond to classes in order of appearance in the sequence of 33 | observations (the order used by the function \code{unique}). 34 | The default is given by \code{mclust.options("classPlotSymbols")}. 35 | } 36 | \item{colors}{ 37 | Either an integer or character vector assigning a color to each 38 | unique class in \code{classification}. Elements in \code{colors} 39 | correspond to classes in order of appearance in the sequence of 40 | observations (the order used by the function \code{unique}). 41 | The default is given by \code{mclust.options("classPlotColors")}. 42 | } 43 | \item{xlab}{ 44 | Optional label for the horizontal axis of the BIC plot. 45 | } 46 | \item{ylab}{ 47 | Label for the vertical axis of the BIC plot. 48 | } 49 | \item{legendArgs}{ 50 | Arguments to pass to the \code{legend} function. Set to \code{NULL} 51 | for no legend. 52 | } 53 | \item{\dots}{ 54 | Other graphics parameters. 55 | } 56 | } 57 | \value{ 58 | A plot of the BIC values. 59 | } 60 | 61 | \seealso{ 62 | \code{\link{mclustBIC}} 63 | } 64 | \examples{ 65 | \donttest{ 66 | plot(mclustBIC(precip), legendArgs = list(x = "bottomleft")) 67 | 68 | plot(mclustBIC(faithful)) 69 | 70 | plot(mclustBIC(iris[,-5])) 71 | } 72 | } 73 | \keyword{cluster} 74 | % docclass is function 75 | -------------------------------------------------------------------------------- /man/plot.mclustICL.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.mclustICL} 2 | \alias{plot.mclustICL} 3 | 4 | \title{ICL Plot for Model-Based Clustering} 5 | 6 | \description{ 7 | Plots the ICL values returned by the \code{\link{mclustICL}} function. 8 | } 9 | 10 | \usage{ 11 | \method{plot}{mclustICL}(x, ylab = "ICL", \dots) 12 | } 13 | 14 | \arguments{ 15 | \item{x}{ 16 | Output from \code{\link{mclustICL}}. 17 | } 18 | 19 | \item{ylab}{ 20 | Label for the vertical axis of the plot. 21 | } 22 | 23 | \item{\dots}{ 24 | Further arguments passed to the \code{\link{plot.mclustBIC}} function. 25 | } 26 | } 27 | 28 | \value{ 29 | A plot of the ICL values. 30 | } 31 | 32 | \seealso{ 33 | \code{\link{mclustICL}} 34 | } 35 | \examples{ 36 | \donttest{ 37 | data(faithful) 38 | faithful.ICL = mclustICL(faithful) 39 | plot(faithful.ICL) 40 | } 41 | } 42 | 43 | \keyword{cluster} 44 | % docclass is function 45 | -------------------------------------------------------------------------------- /man/predict.Mclust.Rd: -------------------------------------------------------------------------------- 1 | \name{predict.Mclust} 2 | \alias{predict.Mclust} 3 | 4 | \title{Cluster multivariate observations by Gaussian finite mixture modeling} 5 | 6 | \description{Cluster prediction for multivariate observations based on Gaussian finite mixture models estimated by \code{\link{Mclust}}.} 7 | 8 | \usage{ 9 | \method{predict}{Mclust}(object, newdata, \dots) 10 | } 11 | 12 | \arguments{ 13 | 14 | \item{object}{an object of class \code{'Mclust'} resulting from a call to \code{\link{Mclust}}.} 15 | 16 | \item{newdata}{a data frame or matrix giving the data. If missing the clustering data obtained from the call to \code{\link{Mclust}} are classified.} 17 | 18 | \item{\dots}{further arguments passed to or from other methods.} 19 | } 20 | 21 | % \details{} 22 | 23 | \value{ 24 | Returns a list of with the following components: 25 | \item{classification}{a factor of predicted cluster labels for \code{newdata}.} 26 | \item{z}{a matrix whose \emph{[i,k]}th entry is the probability that 27 | observation \emph{i} in \code{newdata} belongs to the \emph{k}th cluster.} 28 | } 29 | 30 | \author{Luca Scrucca} 31 | 32 | % \note{} 33 | 34 | \seealso{\code{\link{Mclust}}.} 35 | 36 | \examples{ 37 | model <- Mclust(faithful) 38 | 39 | # predict cluster for the observed data 40 | pred <- predict(model) 41 | str(pred) 42 | pred$z # equal to model$z 43 | pred$classification # equal to 44 | plot(faithful, col = pred$classification, pch = pred$classification) 45 | 46 | # predict cluster over a grid 47 | grid <- apply(faithful, 2, function(x) seq(min(x), max(x), length = 50)) 48 | grid <- expand.grid(eruptions = grid[,1], waiting = grid[,2]) 49 | pred <- predict(model, grid) 50 | plot(grid, col = mclust.options("classPlotColors")[pred$classification], pch = 15, cex = 0.5) 51 | points(faithful, pch = model$classification) 52 | } 53 | 54 | \keyword{multivariate} 55 | -------------------------------------------------------------------------------- /man/predict.MclustDA.Rd: -------------------------------------------------------------------------------- 1 | \name{predict.MclustDA} 2 | \alias{predict.MclustDA} 3 | 4 | \title{Classify multivariate observations by Gaussian finite mixture modeling} 5 | 6 | \description{Classify multivariate observations based on Gaussian finite mixture models estimated by \code{\link{MclustDA}}.} 7 | 8 | \usage{ 9 | \method{predict}{MclustDA}(object, newdata, prop = object$prop, \dots) 10 | } 11 | 12 | \arguments{ 13 | 14 | \item{object}{an object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}.} 15 | 16 | \item{newdata}{a data frame or matrix giving the data. If missing the train data obtained from the call to \code{\link{MclustDA}} are classified.} 17 | 18 | \item{prop}{the class proportions or prior class probabilities to belong to each class; by default, this is set at the class proportions in the training data.} 19 | 20 | \item{\dots}{further arguments passed to or from other methods.} 21 | } 22 | 23 | % \details{} 24 | 25 | \value{ 26 | Returns a list of with the following components: 27 | \item{classification}{a factor of predicted class labels for \code{newdata}.} 28 | \item{z}{a matrix whose \emph{[i,k]}th entry is the probability that 29 | observation \emph{i} in \code{newdata} belongs to the \emph{k}th class.} 30 | } 31 | 32 | \author{Luca Scrucca} 33 | 34 | % \note{} 35 | 36 | \seealso{\code{\link{MclustDA}}.} 37 | 38 | \examples{ 39 | \donttest{ 40 | odd <- seq(from = 1, to = nrow(iris), by = 2) 41 | even <- odd + 1 42 | X.train <- iris[odd,-5] 43 | Class.train <- iris[odd,5] 44 | X.test <- iris[even,-5] 45 | Class.test <- iris[even,5] 46 | 47 | irisMclustDA <- MclustDA(X.train, Class.train) 48 | 49 | predTrain <- predict(irisMclustDA) 50 | predTrain 51 | predTest <- predict(irisMclustDA, X.test) 52 | predTest 53 | } 54 | } 55 | 56 | \keyword{multivariate} 57 | -------------------------------------------------------------------------------- /man/predict.MclustDR.Rd: -------------------------------------------------------------------------------- 1 | \name{predict.MclustDR} 2 | \alias{predict.MclustDR} 3 | \alias{predict2D.MclustDR} 4 | 5 | 6 | \title{Classify multivariate observations on a dimension reduced subspace by Gaussian finite mixture modeling} 7 | 8 | \description{Classify multivariate observations on a dimension reduced subspace estimated from a Gaussian finite mixture model.} 9 | 10 | \usage{ 11 | \method{predict}{MclustDR}(object, dim = 1:object$numdir, newdata, eval.points, \dots) 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{object}{an object of class \code{'MclustDR'} resulting from a call to \code{\link{MclustDR}.}} 17 | 18 | \item{dim}{the dimensions of the reduced subspace used for prediction.} 19 | 20 | \item{newdata}{a data frame or matrix giving the data. If missing the data obtained from the call to \code{\link{MclustDR}} are used.} 21 | 22 | \item{eval.points}{a data frame or matrix giving the data projected on the reduced subspace. If provided \code{newdata} is not used.} 23 | 24 | \item{\dots}{further arguments passed to or from other methods.} 25 | } 26 | 27 | % \details{} 28 | 29 | \value{ 30 | Returns a list of with the following components: 31 | 32 | \item{dir}{a matrix containing the data projected onto the \code{dim} dimensions of the reduced subspace.} 33 | \item{density}{densities from mixture model for each data point.} 34 | \item{z}{a matrix whose \emph{[i,k]}th entry is the probability that 35 | observation \emph{i} in \code{newdata} belongs to the \emph{k}th class.} 36 | \item{uncertainty}{The uncertainty associated with the classification.} 37 | \item{classification}{A vector of values giving the MAP classification.} 38 | } 39 | 40 | \references{ 41 | Scrucca, L. (2010) Dimension reduction for model-based clustering. 42 | \emph{Statistics and Computing}, 20(4), pp. 471-484. 43 | } 44 | 45 | \author{Luca Scrucca} 46 | 47 | % \note{} 48 | 49 | \seealso{\code{\link{MclustDR}}.} 50 | 51 | \examples{ 52 | mod = Mclust(iris[,1:4]) 53 | dr = MclustDR(mod) 54 | pred = predict(dr) 55 | str(pred) 56 | 57 | data(banknote) 58 | mod = MclustDA(banknote[,2:7], banknote$Status) 59 | dr = MclustDR(mod) 60 | pred = predict(dr) 61 | str(pred) 62 | } 63 | 64 | \keyword{multivariate} 65 | -------------------------------------------------------------------------------- /man/predict.MclustSSC.Rd: -------------------------------------------------------------------------------- 1 | \name{predict.MclustSSC} 2 | \alias{predict.MclustSSC} 3 | 4 | \title{Classification of multivariate observations by semi-supervised Gaussian finite mixtures} 5 | 6 | \description{Classify multivariate observations based on Gaussian finite mixture models estimated by \code{\link{MclustSSC}}.} 7 | 8 | \usage{ 9 | \method{predict}{MclustSSC}(object, newdata, \dots) 10 | } 11 | 12 | \arguments{ 13 | 14 | \item{object}{an object of class \code{'MclustSSC'} resulting from a call to \code{\link{MclustSSC}}.} 15 | 16 | \item{newdata}{a data frame or matrix giving the data. If missing the train data obtained from the call to \code{\link{MclustSSC}} are classified.} 17 | 18 | \item{\dots}{further arguments passed to or from other methods.} 19 | } 20 | 21 | % \details{} 22 | 23 | \value{ 24 | Returns a list of with the following components: 25 | \item{classification}{a factor of predicted class labels for \code{newdata}.} 26 | \item{z}{a matrix whose \emph{[i,k]}th entry is the probability that 27 | observation \emph{i} in \code{newdata} belongs to the \emph{k}th class.} 28 | } 29 | 30 | \author{Luca Scrucca} 31 | 32 | % \note{} 33 | 34 | \seealso{\code{\link{MclustSSC}}.} 35 | 36 | \examples{ 37 | \donttest{ 38 | X <- iris[,1:4] 39 | class <- iris$Species 40 | # randomly remove class labels 41 | set.seed(123) 42 | class[sample(1:length(class), size = 120)] <- NA 43 | table(class, useNA = "ifany") 44 | clPairs(X, ifelse(is.na(class), 0, class), 45 | symbols = c(0, 16, 17, 18), colors = c("grey", 4, 2, 3), 46 | main = "Partially classified data") 47 | 48 | # Fit semi-supervised classification model 49 | mod_SSC <- MclustSSC(X, class) 50 | 51 | pred_SSC <- predict(mod_SSC) 52 | table(Predicted = pred_SSC$classification, Actual = class, useNA = "ifany") 53 | 54 | X_new = data.frame(Sepal.Length = c(5, 8), 55 | Sepal.Width = c(3.1, 4), 56 | Petal.Length = c(2, 5), 57 | Petal.Width = c(0.5, 2)) 58 | predict(mod_SSC, newdata = X_new) 59 | } 60 | } 61 | 62 | \keyword{classification} 63 | -------------------------------------------------------------------------------- /man/predict.densityMclust.Rd: -------------------------------------------------------------------------------- 1 | \name{predict.densityMclust} 2 | \alias{predict.densityMclust} 3 | 4 | \title{Density estimate of multivariate observations by Gaussian finite mixture modeling} 5 | 6 | \description{Compute density estimation for multivariate observations based on Gaussian finite mixture models estimated by \code{\link{densityMclust}}.} 7 | 8 | \usage{ 9 | \method{predict}{densityMclust}(object, newdata, what = c("dens", "cdens", "z"), logarithm = FALSE, \dots) 10 | } 11 | 12 | \arguments{ 13 | 14 | \item{object}{an object of class \code{'densityMclust'} resulting from a call to \code{\link{densityMclust}}.} 15 | 16 | \item{newdata}{a vector, a data frame or matrix giving the data. If missing the density is computed for the input data obtained from the call to \code{\link{densityMclust}}.} 17 | 18 | \item{what}{a character string specifying what to retrieve: \code{"dens"} returns a vector of values for the mixture density; \code{"cdens"} returns a matrix of component densities for each mixture component (along the columns); \code{"z"} returns a matrix of conditional probabilities of each data point to belong to a mixture component.} 19 | 20 | \item{logarithm}{A logical value indicating whether or not the logarithm of the density or component densities should be returned.} 21 | 22 | \item{\dots}{further arguments passed to or from other methods.} 23 | } 24 | 25 | % \details{} 26 | 27 | \value{ 28 | Returns a vector or a matrix of densities evaluated at \code{newdata} depending on the argument \code{what} (see above). 29 | } 30 | 31 | \author{Luca Scrucca} 32 | 33 | % \note{} 34 | 35 | \seealso{\code{\link{Mclust}}.} 36 | 37 | \examples{ 38 | \donttest{ 39 | x <- faithful$waiting 40 | dens <- densityMclust(x, plot = FALSE) 41 | x0 <- seq(50, 100, by = 10) 42 | d0 <- predict(dens, x0) 43 | plot(dens, what = "density") 44 | points(x0, d0, pch = 20) 45 | } 46 | } 47 | 48 | \keyword{multivariate} 49 | -------------------------------------------------------------------------------- /man/priorControl.Rd: -------------------------------------------------------------------------------- 1 | \name{priorControl} 2 | \alias{priorControl} 3 | \title{ 4 | Conjugate Prior for Gaussian Mixtures. 5 | } 6 | \description{ 7 | Specify a conjugate prior for Gaussian mixtures. 8 | } 9 | \usage{ 10 | priorControl(functionName = "defaultPrior", \dots) 11 | } 12 | \arguments{ 13 | \item{functionName}{ 14 | The name of the function specifying the conjugate prior. 15 | By default the function \code{\link{defaultPrior}} is used, and this 16 | can also be used as a template for alternative specification. 17 | } 18 | \item{\dots}{ 19 | Optional named arguments to the function specified in \code{functionName} 20 | together with their values. 21 | } 22 | } 23 | \value{ 24 | A list with the function name as the first component. The remaining 25 | components (if any) consist of a list of arguments to the function 26 | with assigned values. 27 | } 28 | \details{ 29 | The function \code{priorControl} is used to specify a conjugate prior 30 | for EM within \emph{MCLUST}.\cr 31 | Note that, as described in \code{\link{defaultPrior}}, in the multivariate 32 | case only 10 out of 14 models may be used in conjunction with a prior, i.e. 33 | those available in \emph{MCLUST} up to version 4.4. 34 | } 35 | \references{ 36 | C. Fraley and A. E. Raftery (2007). 37 | Bayesian regularization for normal mixture estimation and model-based 38 | clustering. \emph{Journal of Classification 24:155-181}. 39 | } 40 | \seealso{ 41 | \code{\link{mclustBIC}}, 42 | \code{\link{me}}, 43 | \code{\link{mstep}}, 44 | \code{\link{defaultPrior}} 45 | } 46 | \examples{ 47 | # default prior 48 | irisBIC <- mclustBIC(iris[,-5], prior = priorControl()) 49 | summary(irisBIC, iris[,-5]) 50 | 51 | # no prior on the mean; default prior on variance 52 | irisBIC <- mclustBIC(iris[,-5], prior = priorControl(shrinkage = 0)) 53 | summary(irisBIC, iris[,-5]) 54 | } 55 | \keyword{cluster} 56 | -------------------------------------------------------------------------------- /man/randomOrthogonalMatrix.Rd: -------------------------------------------------------------------------------- 1 | \name{randomOrthogonalMatrix} 2 | \alias{randomOrthogonalMatrix} 3 | 4 | \title{Random orthogonal matrix} 5 | 6 | \description{ 7 | Generate a random orthogonal basis matrix of dimension \eqn{(nrow x ncol)} using 8 | the method in Heiberger (1978). 9 | } 10 | 11 | \usage{ 12 | randomOrthogonalMatrix(nrow, ncol, n = nrow, d = ncol, seed = NULL) 13 | } 14 | 15 | \arguments{ 16 | \item{nrow}{the number of rows of the resulting orthogonal matrix.} 17 | \item{ncol}{the number of columns of the resulting orthogonal matrix.} 18 | \item{n}{deprecated. See \code{nrow} above.} 19 | \item{d}{deprecated. See \code{ncol} above.} 20 | \item{seed}{an optional integer argument to use in \code{set.seed()} for 21 | reproducibility. By default the current seed will be used. 22 | Reproducibility can also be achieved by calling \code{set.seed()} 23 | before calling this function.} 24 | } 25 | 26 | \details{ 27 | The use of arguments \code{n} and \code{d} is deprecated and they will be removed in the future. 28 | } 29 | 30 | \value{ 31 | An orthogonal matrix of dimension \eqn{nrow x ncol} such that each column is orthogonal to the other and has unit lenght. Because of the latter, it is also called orthonormal. 32 | } 33 | 34 | \seealso{\code{\link{coordProj}}} 35 | 36 | \references{ 37 | Heiberger R. (1978) Generation of random orthogonal matrices. \emph{Journal of the Royal Statistical Society. Series C (Applied Statistics)}, 27(2), 199-206. 38 | } 39 | 40 | \examples{ 41 | B <- randomOrthogonalMatrix(10,3) 42 | zapsmall(crossprod(B)) 43 | } 44 | -------------------------------------------------------------------------------- /man/sigma2decomp.Rd: -------------------------------------------------------------------------------- 1 | \name{sigma2decomp} 2 | \alias{sigma2decomp} 3 | \title{ 4 | Convert mixture component covariances to decomposition form. 5 | } 6 | \description{ 7 | Converts a set of covariance matrices from representation as a 3-D array 8 | to a parameterization by eigenvalue decomposition. 9 | } 10 | \usage{ 11 | sigma2decomp(sigma, G = NULL, tol = sqrt(.Machine$double.eps), \dots) 12 | } 13 | \arguments{ 14 | \item{sigma}{ 15 | Either a 3-D array whose [,,k]th component is the covariance matrix for the 16 | kth component in an MVN mixture model, or a single covariance 17 | matrix in the case that all components have the same covariance. 18 | } 19 | \item{G}{ 20 | The number of components in the mixture. When 21 | \code{sigma} is a 3-D array, the number of components 22 | can be inferred from its dimensions. 23 | } 24 | \item{tol}{ 25 | Tolerance for determining whether or not the covariances have equal volume, 26 | shape, and or orientation. The default is the square root of the relative 27 | machine precision, \code{sqrt(.Machine$double.eps)}, which is about 28 | \code{1.e-8}. 29 | } 30 | \item{\dots}{ 31 | Catches unused arguments from an indirect or list call via \code{do.call}. 32 | } 33 | } 34 | \value{ 35 | The covariance matrices for the mixture components in decomposition form, 36 | including the following components: 37 | \item{modelName}{ 38 | A character string indicating the infered model. The help file for 39 | \code{\link{mclustModelNames}} describes the available models. 40 | } 41 | \item{d}{ 42 | The dimension of the data. 43 | } 44 | \item{G}{ 45 | The number of components in the mixture model. 46 | } 47 | \item{scale}{ 48 | Either a \emph{G}-vector giving the scale of the covariance (the 49 | \emph{d}th root of its determinant) for each component in the 50 | mixture model, or a single numeric value if the scale is the same 51 | for each component. 52 | } 53 | \item{shape}{ 54 | Either a \emph{G} by \emph{d} matrix in which the \emph{k}th 55 | column is the shape of the covariance matrix (normalized to have 56 | determinant 1) for the \emph{k}th component, or a \emph{d}-vector 57 | giving a common shape for all components. 58 | } 59 | \item{orientation}{ 60 | Either a \emph{d} by \emph{d} by \emph{G} array whose 61 | \code{[,,k]}th entry is the orthonomal matrix whose columns are the 62 | eigenvectors of the covariance matrix of the \emph{k}th component, 63 | or a \emph{d} by \emph{d} orthonormal matrix if the mixture 64 | components have a common orientation. The \code{orientation} component of 65 | \code{decomp} can be omitted in spherical and diagonal models, for 66 | which the principal components are parallel to the coordinate axes 67 | so that the orientation matrix is the identity. 68 | } 69 | } 70 | 71 | \seealso{ 72 | \code{\link{decomp2sigma}} 73 | } 74 | \examples{ 75 | meEst <- meEEE(iris[,-5], unmap(iris[,5])) 76 | names(meEst$parameters$variance) 77 | meEst$parameters$variance$Sigma 78 | 79 | sigma2decomp(meEst$parameters$variance$Sigma, G = length(unique(iris[,5]))) 80 | } 81 | \keyword{cluster} 82 | -------------------------------------------------------------------------------- /man/sim.Rd: -------------------------------------------------------------------------------- 1 | \name{sim} 2 | \alias{sim} 3 | \title{ 4 | Simulate from Parameterized MVN Mixture Models 5 | } 6 | \description{ 7 | Simulate data from parameterized MVN mixture models. 8 | } 9 | \usage{ 10 | sim(modelName, parameters, n, seed = NULL, \dots) 11 | } 12 | \arguments{ 13 | \item{modelName}{ 14 | A character string indicating the model. The help file for 15 | \code{\link{mclustModelNames}} describes the available models. 16 | } 17 | \item{parameters}{ 18 | A list with the following components: 19 | \describe{ 20 | \item{\code{pro}}{ 21 | A vector whose \emph{k}th component is the mixing proportion for 22 | the \emph{k}th component of the mixture model. 23 | If missing, equal proportions are assumed. 24 | } 25 | \item{\code{mean}}{ 26 | The mean for each component. If there is more than one component, 27 | this is a matrix whose kth column is the mean of the \emph{k}th 28 | component of the mixture model. 29 | } 30 | \item{\code{variance}}{ 31 | A list of variance parameters for the model. 32 | The components of this list depend on the model 33 | specification. See the help file for \code{\link{mclustVariance}} 34 | for details. 35 | } 36 | } 37 | } 38 | \item{n}{ 39 | An integer specifying the number of data points to be simulated. 40 | } 41 | \item{seed}{ 42 | An optional integer argument to \code{set.seed} for reproducible 43 | random class assignment. By default the current seed will be used. 44 | Reproducibility can also be achieved by calling \code{set.seed} 45 | before calling \code{sim}. 46 | } 47 | \item{\dots }{ 48 | Catches unused arguments in indirect or list calls via \code{do.call}. 49 | } 50 | } 51 | \value{ 52 | A matrix in which first column is the classification and the remaining 53 | columns are the \code{n} observations simulated from the specified MVN 54 | mixture model. 55 | \item{Attributes:}{ 56 | \code{"modelName"} A character string indicating the variance 57 | model used for the simulation. 58 | } 59 | } 60 | \details{ 61 | This function can be used with an indirect or list call using 62 | \code{do.call}, allowing the output of e.g. \code{mstep}, \code{em}, 63 | \code{me}, \code{Mclust} to be passed directly without the need to 64 | specify individual parameters as arguments. 65 | } 66 | 67 | \seealso{ 68 | \code{\link{simE}}, \dots, 69 | \code{\link{simVVV}}, 70 | \code{\link{Mclust}}, 71 | \code{\link{mstep}}, 72 | \code{\link{do.call}} 73 | } 74 | \examples{ 75 | irisBIC <- mclustBIC(iris[,-5]) 76 | irisModel <- mclustModel(iris[,-5], irisBIC) 77 | names(irisModel) 78 | irisSim <- sim(modelName = irisModel$modelName, 79 | parameters = irisModel$parameters, 80 | n = nrow(iris)) 81 | 82 | \donttest{ 83 | do.call("sim", irisModel) # alternative call 84 | } 85 | 86 | par(pty = "s", mfrow = c(1,2)) 87 | 88 | dimnames(irisSim) <- list(NULL, c("dummy", (dimnames(iris)[[2]])[-5])) 89 | 90 | dimens <- c(1,2) 91 | lim1 <- apply(iris[,dimens],2,range) 92 | lim2 <- apply(irisSim[,dimens+1],2,range) 93 | lims <- apply(rbind(lim1,lim2),2,range) 94 | xlim <- lims[,1] 95 | ylim <- lims[,2] 96 | 97 | coordProj(iris[,-5], parameters=irisModel$parameters, 98 | classification=map(irisModel$z), 99 | dimens=dimens, xlim=xlim, ylim=ylim) 100 | 101 | coordProj(iris[,-5], parameters=irisModel$parameters, 102 | classification=map(irisModel$z), truth = irisSim[,-1], 103 | dimens=dimens, xlim=xlim, ylim=ylim) 104 | 105 | irisModel3 <- mclustModel(iris[,-5], irisBIC, G=3) 106 | irisSim3 <- sim(modelName = irisModel3$modelName, 107 | parameters = irisModel3$parameters, n = 500, seed = 1) 108 | \donttest{ 109 | irisModel3$n <- NULL 110 | irisSim3 <- do.call("sim",c(list(n=500,seed=1),irisModel3)) # alternative call 111 | } 112 | clPairs(irisSim3[,-1], cl = irisSim3[,1]) 113 | } 114 | \keyword{cluster} 115 | -------------------------------------------------------------------------------- /man/simE.Rd: -------------------------------------------------------------------------------- 1 | \name{simE} 2 | \alias{simE} 3 | \alias{simV} 4 | \alias{simEII} 5 | \alias{simVII} 6 | \alias{simEEI} 7 | \alias{simVEI} 8 | \alias{simEVI} 9 | \alias{simVVI} 10 | \alias{simEEV} 11 | \alias{simEEE} 12 | \alias{simVEV} 13 | \alias{simVVV} 14 | \alias{simEVE} 15 | \alias{simEVV} 16 | \alias{simVEE} 17 | \alias{simVVE} 18 | 19 | \title{ 20 | Simulate from a Parameterized MVN Mixture Model 21 | } 22 | \description{ 23 | Simulate data from a parameterized MVN mixture model. 24 | } 25 | \usage{ 26 | simE(parameters, n, seed = NULL, \dots) 27 | simV(parameters, n, seed = NULL, \dots) 28 | simEII(parameters, n, seed = NULL, \dots) 29 | simVII(parameters, n, seed = NULL, \dots) 30 | simEEI(parameters, n, seed = NULL, \dots) 31 | simVEI(parameters, n, seed = NULL, \dots) 32 | simEVI(parameters, n, seed = NULL, \dots) 33 | simVVI(parameters, n, seed = NULL, \dots) 34 | simEEE(parameters, n, seed = NULL, \dots) 35 | simVEE(parameters, n, seed = NULL, \dots) 36 | simEVE(parameters, n, seed = NULL, \dots) 37 | simVVE(parameters, n, seed = NULL, \dots) 38 | simEEV(parameters, n, seed = NULL, \dots) 39 | simVEV(parameters, n, seed = NULL, \dots) 40 | simEVV(parameters, n, seed = NULL, \dots) 41 | simVVV(parameters, n, seed = NULL, \dots) 42 | } 43 | \arguments{ 44 | \item{parameters}{ 45 | A list with the following components: 46 | \describe{ 47 | \item{\code{pro}}{ 48 | A vector whose \emph{k}th component is the mixing proportion for 49 | the \emph{k}th component of the mixture model. 50 | If missing, equal proportions are assumed. 51 | } 52 | \item{\code{mean}}{ 53 | The mean for each component. If there is more than one component, 54 | this is a matrix whose kth column is the mean of the \emph{k}th 55 | component of the mixture model. 56 | } 57 | \item{\code{variance}}{ 58 | A list of variance parameters for the model. 59 | The components of this list depend on the model 60 | specification. See the help file for \code{\link{mclustVariance}} 61 | for details. 62 | } 63 | } 64 | } 65 | \item{n}{ 66 | An integer specifying the number of data points to be simulated. 67 | } 68 | \item{seed}{ 69 | An optional integer argument to \code{set.seed()} for reproducible 70 | random class assignment. By default the current seed will be used. 71 | Reproducibility can also be achieved by calling \code{set.seed} 72 | before calling \code{sim}. 73 | } 74 | \item{\dots }{ 75 | Catches unused arguments in indirect or list calls via \code{do.call}. 76 | } 77 | } 78 | \value{ 79 | A matrix in which first column is the classification and the remaining 80 | columns are the \code{n} observations simulated from the specified MVN 81 | mixture model. 82 | \item{Attributes:}{ 83 | \code{"modelName"} A character string indicating the variance 84 | model used for the simulation. 85 | } 86 | } 87 | \details{ 88 | This function can be used with an indirect or list call using 89 | \code{do.call}, allowing the output of e.g. \code{mstep}, \code{em} 90 | \code{me}, \code{Mclust}, to be passed directly without the need 91 | to specify individual parameters as arguments. 92 | } 93 | 94 | \seealso{ 95 | \code{\link{sim}}, 96 | \code{\link{Mclust}}, 97 | \code{\link{mstepE}}, 98 | \code{\link{mclustVariance}}. 99 | } 100 | \examples{ 101 | \donttest{ 102 | d <- 2 103 | G <- 2 104 | scale <- 1 105 | shape <- c(1, 9) 106 | 107 | O1 <- diag(2) 108 | O2 <- diag(2)[,c(2,1)] 109 | O <- array(cbind(O1,O2), c(2, 2, 2)) 110 | O 111 | 112 | variance <- list(d= d, G = G, scale = scale, shape = shape, orientation = O) 113 | mu <- matrix(0, d, G) ## center at the origin 114 | simdat <- simEEV( n = 200, 115 | parameters = list(pro=c(1,1),mean=mu,variance=variance), 116 | seed = NULL) 117 | 118 | cl <- simdat[,1] 119 | 120 | sigma <- array(apply(O, 3, function(x,y) crossprod(x*y), 121 | y = sqrt(scale*shape)), c(2,2,2)) 122 | paramList <- list(mu = mu, sigma = sigma) 123 | coordProj( simdat, paramList = paramList, classification = cl) 124 | } 125 | } 126 | \keyword{cluster} 127 | -------------------------------------------------------------------------------- /man/softmax.Rd: -------------------------------------------------------------------------------- 1 | \name{softmax} 2 | \alias{softmax} 3 | 4 | \title{Softmax function} 5 | 6 | \description{ 7 | Efficient implementation (via Fortran) of the softmax (aka multinomial logistic) function converting a set of numerical values to probabilities summing to 1. 8 | } 9 | 10 | \usage{ 11 | softmax(x, v = NULL) 12 | } 13 | 14 | \arguments{ 15 | \item{x}{a matrix of dimension \eqn{n \times k} of numerical values. If a vector is provided, it is converted to a single-row matrix.} 16 | \item{v}{an optional vector of length \eqn{k} of numerical values to be added to each row of \code{x} matrix. If not provided, a vector of zeros is used.} 17 | } 18 | 19 | \details{ 20 | Given the matrix \code{x}, for each row \eqn{x_{[i]} = [x_1, \dots, x_k]} (with \eqn{i=1,\dots,n}), the softmax function calculates 21 | \deqn{ 22 | \text{softmax}(x_{[i]})_j = 23 | \dfrac{\exp(x_j + v_j)}{\sum_{l=1}^k \exp(x_l + v_l)} 24 | \qquad \text{for } j = 1,\dots,k 25 | } 26 | } 27 | 28 | \value{Returns a matrix of the same dimension as \code{x} with values in the range \eqn{(0,1)} that sum to 1 along the rows.} 29 | 30 | \author{Luca Scrucca} 31 | 32 | \seealso{\code{\link{logsumexp}}} 33 | 34 | \references{ 35 | Blanchard P., Higham D. J., Higham N. J. (2021). 36 | Accurately computing the log-sum-exp and softmax functions. 37 | \emph{IMA Journal of Numerical Analysis}, 41/4:2311–2330. 38 | \doi{10.1093/imanum/draa038} 39 | } 40 | 41 | \examples{ 42 | x = matrix(rnorm(15), 5, 3) 43 | v = log(c(0.5, 0.3, 0.2)) 44 | (z = softmax(x, v)) 45 | rowSums(z) 46 | } 47 | -------------------------------------------------------------------------------- /man/summary.Mclust.Rd: -------------------------------------------------------------------------------- 1 | \name{summary.Mclust} 2 | \alias{summary.Mclust} 3 | \alias{print.summary.Mclust} 4 | 5 | \title{Summarizing Gaussian Finite Mixture Model Fits} 6 | 7 | \description{Summary method for class \code{"Mclust"}.} 8 | 9 | \usage{ 10 | \method{summary}{Mclust}(object, classification = TRUE, parameters = FALSE, \dots) 11 | \method{print}{summary.Mclust}(x, digits = getOption("digits"), \dots) 12 | } 13 | 14 | \arguments{ 15 | \item{object}{An object of class \code{'Mclust'} resulting of a call to \code{\link{Mclust}} or \code{\link{densityMclust}}.} 16 | 17 | \item{x}{An object of class \code{'summary.Mclust'}, usually, a result of a call to \code{summary.Mclust}.} 18 | 19 | \item{classification}{Logical; if \code{TRUE} a table of MAP classification/clustering of observations is printed.} 20 | 21 | \item{parameters}{Logical; if \code{TRUE}, the parameters of mixture components are printed.} 22 | 23 | \item{digits}{The number of significant digits to use when printing.} 24 | 25 | \item{\dots}{Further arguments passed to or from other methods.} 26 | } 27 | 28 | % \details{} 29 | 30 | % \value{} 31 | 32 | \author{Luca Scrucca} 33 | 34 | % \note{} 35 | 36 | \seealso{\code{\link{Mclust}}, \code{\link{densityMclust}}.} 37 | 38 | \examples{ 39 | \donttest{ 40 | mod1 = Mclust(iris[,1:4]) 41 | summary(mod1) 42 | summary(mod1, parameters = TRUE, classification = FALSE) 43 | 44 | mod2 = densityMclust(faithful, plot = FALSE) 45 | summary(mod2) 46 | summary(mod2, parameters = TRUE) 47 | } 48 | } 49 | 50 | \keyword{cluster} 51 | -------------------------------------------------------------------------------- /man/summary.MclustBootstrap.Rd: -------------------------------------------------------------------------------- 1 | \name{summary.MclustBootstrap} 2 | \alias{summary.MclustBootstrap} 3 | \alias{print.summary.MclustBootstrap} 4 | 5 | \title{Summary Function for Bootstrap Inference for Gaussian Finite Mixture Models} 6 | 7 | \description{Summary of bootstrap distribution for the parameters of a Gaussian mixture model providing either standard errors or percentile bootstrap confidence intervals.} 8 | 9 | \usage{ 10 | \method{summary}{MclustBootstrap}(object, what = c("se", "ci", "ave"), conf.level = 0.95, \dots) 11 | } 12 | 13 | \arguments{ 14 | \item{object}{An object of class \code{'MclustBootstrap'} as returned by \code{\link{MclustBootstrap}}.} 15 | \item{what}{A character string: \code{"se"} for the standard errors; \code{"ci"} for the confidence intervals; \code{"ave"} for the averages.} 16 | \item{conf.level}{A value specifying the confidence level of the interval.} 17 | \item{\dots}{Further arguments passed to or from other methods.} 18 | } 19 | 20 | \details{For details about the procedure used to obtain the bootstrap distribution see \code{\link{MclustBootstrap}}.} 21 | 22 | %\value{} 23 | 24 | \seealso{\code{\link{MclustBootstrap}}.} 25 | 26 | \examples{ 27 | \donttest{ 28 | data(diabetes) 29 | X = diabetes[,-1] 30 | modClust = Mclust(X) 31 | bootClust = MclustBootstrap(modClust) 32 | summary(bootClust, what = "se") 33 | summary(bootClust, what = "ci") 34 | 35 | data(acidity) 36 | modDens = densityMclust(acidity, plot = FALSE) 37 | modDens = MclustBootstrap(modDens) 38 | summary(modDens, what = "se") 39 | summary(modDens, what = "ci") 40 | } 41 | } 42 | 43 | \keyword{htest} 44 | \keyword{cluster} 45 | -------------------------------------------------------------------------------- /man/summary.MclustDA.Rd: -------------------------------------------------------------------------------- 1 | \name{summary.MclustDA} 2 | \alias{summary.MclustDA} 3 | \alias{print.summary.MclustDA} 4 | 5 | \title{Summarizing discriminant analysis based on Gaussian finite mixture modeling} 6 | 7 | \description{Summary method for class \code{"MclustDA"}.} 8 | 9 | \usage{ 10 | \method{summary}{MclustDA}(object, parameters = FALSE, newdata, newclass, \dots) 11 | \method{print}{summary.MclustDA}(x, digits = getOption("digits"), \dots) 12 | } 13 | 14 | \arguments{ 15 | \item{object}{An object of class \code{'MclustDA'} resulting from a call to \code{\link{MclustDA}}.} 16 | 17 | \item{x}{An object of class \code{'summary.MclustDA'}, usually, a result of a call to \code{summary.MclustDA}.} 18 | 19 | \item{parameters}{Logical; if \code{TRUE}, the parameters of mixture components are printed.} 20 | 21 | \item{newdata}{A data frame or matrix giving the test data.} 22 | 23 | \item{newclass}{A vector giving the class labels for the observations in 24 | the test data.} 25 | 26 | \item{digits}{The number of significant digits to use when printing.} 27 | 28 | \item{\dots}{Further arguments passed to or from other methods.} 29 | } 30 | 31 | % \details{} 32 | 33 | \value{The function \code{summary.MclustDA} computes and returns a list of summary statistics of the estimated MclustDA or EDDA model for classification.} 34 | 35 | \author{Luca Scrucca} 36 | 37 | % \note{} 38 | 39 | \seealso{\code{\link{MclustDA}}, \code{\link{plot.MclustDA}}.} 40 | 41 | \examples{ 42 | mod = MclustDA(data = iris[,1:4], class = iris$Species) 43 | summary(mod) 44 | summary(mod, parameters = TRUE) 45 | } 46 | \keyword{multivariate} 47 | -------------------------------------------------------------------------------- /man/summary.MclustDR.Rd: -------------------------------------------------------------------------------- 1 | \name{summary.MclustDR} 2 | \alias{summary.MclustDR} 3 | \alias{print.summary.MclustDR} 4 | 5 | \title{Summarizing dimension reduction method for model-based clustering and classification} 6 | 7 | \description{Summary method for class \code{"MclustDR"}.} 8 | 9 | \usage{ 10 | \method{summary}{MclustDR}(object, numdir, std = FALSE, \dots) 11 | \method{print}{summary.MclustDR}(x, digits = max(5, getOption("digits") - 3), \dots) 12 | } 13 | 14 | \arguments{ 15 | \item{object}{An object of class \code{'MclustDR'} resulting from a call to \code{\link{MclustDR}}.} 16 | 17 | \item{x}{An object of class \code{'summary.MclustDR'}, usually, a result of a call to \code{summary.MclustDR}.} 18 | 19 | \item{numdir}{An integer providing the number of basis directions to be printed.} 20 | 21 | \item{std}{if \code{TRUE} the coefficients basis are scaled such that all 22 | predictors have unit standard deviation.} 23 | 24 | \item{digits}{The number of significant digits to use when printing.} 25 | 26 | \item{\dots}{Further arguments passed to or from other methods.} 27 | } 28 | 29 | %\details{} 30 | %\value{} 31 | \author{Luca Scrucca} 32 | %\note{} 33 | 34 | \seealso{ 35 | \code{\link{MclustDR}}, \code{\link{plot.MclustDR}} 36 | } 37 | %\examples{} 38 | %\keyword{} 39 | -------------------------------------------------------------------------------- /man/summary.MclustSSC.Rd: -------------------------------------------------------------------------------- 1 | \name{summary.MclustSSC} 2 | \alias{summary.MclustSSC} 3 | \alias{print.summary.MclustSSC} 4 | 5 | \title{Summarizing semi-supervised classification model based on Gaussian finite mixtures} 6 | 7 | \description{Summary method for class \code{"MclustSSC"}.} 8 | 9 | \usage{ 10 | \method{summary}{MclustSSC}(object, parameters = FALSE, \dots) 11 | \method{print}{summary.MclustSSC}(x, digits = getOption("digits"), \dots) 12 | } 13 | 14 | \arguments{ 15 | \item{object}{An object of class \code{'MclustSSC'} resulting from a call to \code{\link{MclustSSC}}.} 16 | 17 | \item{x}{An object of class \code{'summary.MclustSSC'}, usually, a result of a call to \code{summary.MclustSSC}.} 18 | 19 | \item{parameters}{Logical; if \code{TRUE}, the parameters of mixture components are printed.} 20 | 21 | \item{digits}{The number of significant digits to use when printing.} 22 | 23 | \item{\dots}{Further arguments passed to or from other methods.} 24 | } 25 | 26 | % \details{} 27 | 28 | \value{The function \code{summary.MclustSSC} computes and returns a list of summary statistics of the estimated MclustSSC model for semi-supervised classification.} 29 | 30 | \author{Luca Scrucca} 31 | 32 | % \note{} 33 | 34 | \seealso{\code{\link{MclustSSC}}, \code{\link{plot.MclustSSC}}.} 35 | 36 | \keyword{classification} 37 | -------------------------------------------------------------------------------- /man/thyroid.Rd: -------------------------------------------------------------------------------- 1 | \name{thyroid} 2 | \alias{thyroid} 3 | \docType{data} 4 | 5 | \title{UCI Thyroid Gland Data} 6 | 7 | \description{ 8 | Data on five laboratory tests administered to a sample of 215 patients. The tests are used to predict whether a patient's thyroid can be classified as euthyroidism (normal thyroid gland function), hypothyroidism (underactive thyroid not producing enough thyroid hormone) or hyperthyroidism (overactive thyroid producing and secreting excessive amounts of the free thyroid hormones T3 and/or thyroxine T4). Diagnosis of thyroid operation was based on a complete medical record, including anamnesis, scan, etc.} 9 | 10 | \usage{data(thyroid)} 11 | 12 | \format{A data frame with the following variables: 13 | 14 | \describe{ 15 | \item{Diagnosis}{Diagnosis of thyroid operation: \code{Hypo}, \code{Normal}, and \code{Hyper}.} 16 | \item{RT3U}{T3-resin uptake test (percentage).} 17 | \item{T4}{Total Serum thyroxin as measured by the isotopic displacement method.} 18 | \item{T3}{Total serum triiodothyronine as measured by radioimmuno assay.} 19 | \item{TSH}{Basal thyroid-stimulating hormone (TSH) as measured by radioimmuno assay.} 20 | \item{DTSH}{Maximal absolute difference of TSH value after injection of 200 micro grams of thyrotropin-releasing hormone as compared to the basal value.} 21 | } 22 | 23 | } 24 | 25 | \source{One of several databases in the Thyroid Disease Data Set (\code{new-thyroid.data}, \code{new-thyroid.names}) of the UCI Machine Learning Repository 26 | \url{https://archive.ics.uci.edu/ml/datasets/thyroid+disease}. Please note the UCI conditions of use.} 27 | 28 | \references{ 29 | Coomans, D., Broeckaert, M. Jonckheer M. and Massart D.L. (1983) 30 | Comparison of Multivariate Discriminant Techniques for Clinical Data - Application to the Thyroid Functional State, \emph{Meth. Inform. Med.} 22, pp. 93-101. 31 | 32 | Coomans, D. and I. Broeckaert (1986) \emph{Potential Pattern Recognition in Cemical and Medical Decision Making}, Research Studies Press, Letchworth, England. 33 | } 34 | 35 | \keyword{datasets} 36 | -------------------------------------------------------------------------------- /man/uncerPlot.Rd: -------------------------------------------------------------------------------- 1 | \name{uncerPlot} 2 | \alias{uncerPlot} 3 | \title{ 4 | Uncertainty Plot for Model-Based Clustering 5 | } 6 | \description{ 7 | Displays the uncertainty in converting a conditional probablility from EM 8 | to a classification in model-based clustering. 9 | } 10 | \usage{ 11 | uncerPlot(z, truth, \dots) 12 | } 13 | \arguments{ 14 | \item{z}{ 15 | A matrix whose \emph{[i,k]}th entry is the 16 | conditional probability of the ith observation belonging to 17 | the \emph{k}th component of the mixture. 18 | } 19 | \item{truth}{ 20 | A numeric or character vector giving the true classification of the data. 21 | } 22 | \item{\dots }{ 23 | Provided to allow lists with elements other than the arguments can 24 | be passed in indirect or list calls with \code{do.call}. 25 | } 26 | } 27 | \value{ 28 | A plot of the uncertainty profile of the data, 29 | with uncertainties in increasing order of magnitude. 30 | If \code{truth} is supplied and the number of 31 | classes is the same as the number of columns of 32 | \code{z}, the uncertainty 33 | of the misclassified data is marked by vertical lines on the plot. 34 | } 35 | \details{ 36 | When \code{truth} is provided and the number of classes is compatible 37 | with \code{z}, the function \code{compareClass} is used to to find best 38 | correspondence between classes in \code{truth} and \code{z}. 39 | } 40 | 41 | \seealso{ 42 | \code{\link{mclustBIC}}, 43 | \code{\link{em}}, 44 | \code{\link{me}}, 45 | \code{\link{mapClass}} 46 | } 47 | \examples{ 48 | irisModel3 <- Mclust(iris[,-5], G = 3) 49 | 50 | uncerPlot(z = irisModel3$z) 51 | 52 | uncerPlot(z = irisModel3$z, truth = iris[,5]) 53 | } 54 | \keyword{cluster} 55 | % docclass is function 56 | -------------------------------------------------------------------------------- /man/unmap.Rd: -------------------------------------------------------------------------------- 1 | \name{unmap} 2 | \alias{unmap} 3 | \title{ 4 | Indicator Variables given Classification 5 | } 6 | \description{ 7 | Converts a classification into a matrix of indicator variables. 8 | } 9 | \usage{ 10 | unmap(classification, groups=NULL, noise=NULL, \dots) 11 | } 12 | \arguments{ 13 | \item{classification}{ 14 | A numeric or character vector. Typically the distinct entries of this 15 | vector would represent a classification of observations in a data set. 16 | } 17 | \item{groups}{ 18 | A numeric or character vector indicating the groups from which 19 | \code{classification} is drawn. If not supplied, the default 20 | is to assumed to be the unique entries of classification. 21 | } 22 | \item{noise}{ 23 | A single numeric or character value used to indicate the value of 24 | \code{groups} corresponding to noise. 25 | } 26 | \item{\dots }{ 27 | Catches unused arguments in indirect or list calls via \code{do.call}. 28 | } 29 | } 30 | \value{ 31 | An \emph{n} by \emph{m} matrix of \emph{(0,1)} indicator variables, 32 | where \emph{n} is the length of \code{classification} and \emph{m} is 33 | the number of unique values or symbols in \code{classification}. 34 | Columns are labeled by the unique values in \code{classification}, 35 | and the \code{[i,j]}th entry is \emph{1} if \code{classification[i]} 36 | is the \emph{j}th unique value or symbol in sorted order 37 | \code{classification}. 38 | If a \code{noise} value of symbol is designated, the corresponding indicator 39 | variables are relocated to the last column of the matrix. 40 | } 41 | \seealso{ 42 | \code{\link{map}}, 43 | \code{\link{estep}}, 44 | \code{\link{me}} 45 | } 46 | \examples{ 47 | z <- unmap(iris[,5]) 48 | z[1:5, ] 49 | 50 | emEst <- me(modelName = "VVV", data = iris[,-5], z = z) 51 | emEst$z[1:5,] 52 | 53 | map(emEst$z) 54 | } 55 | \keyword{cluster} 56 | -------------------------------------------------------------------------------- /man/wdbc.Rd: -------------------------------------------------------------------------------- 1 | \name{wdbc} 2 | \alias{wdbc} 3 | \docType{data} 4 | 5 | \title{UCI Wisconsin Diagnostic Breast Cancer Data} 6 | 7 | \description{ 8 | The data set provides data for 569 patients on 30 features of the cell nuclei obtained from a digitized image of a fine needle aspirate (FNA) of a breast mass. For each patient the cancer was diagnosed as malignant or benign.} 9 | 10 | \usage{data(wdbc)} 11 | 12 | \format{A data frame with 569 observations on the following variables: 13 | \describe{ 14 | \item{\code{ID}}{ID number} 15 | \item{\code{Diagnosis}}{cancer diagnosis: \code{M} = malignant, \code{B} = benign} 16 | \item{\code{Radius_mean}}{a numeric vector} 17 | \item{\code{Texture_mean}}{a numeric vector} 18 | \item{\code{Perimeter_mean}}{a numeric vector} 19 | \item{\code{Area_mean}}{a numeric vector} 20 | \item{\code{Smoothness_mean}}{a numeric vector} 21 | \item{\code{Compactness_mean}}{a numeric vector} 22 | \item{\code{Concavity_mean}}{a numeric vector} 23 | \item{\code{Nconcave_mean}}{a numeric vector} 24 | \item{\code{Symmetry_mean}}{a numeric vector} 25 | \item{\code{Fractaldim_mean}}{a numeric vector} 26 | \item{\code{Radius_se}}{a numeric vector} 27 | \item{\code{Texture_se}}{a numeric vector} 28 | \item{\code{Perimeter_se}}{a numeric vector} 29 | \item{\code{Area_se}}{a numeric vector} 30 | \item{\code{Smoothness_se}}{a numeric vector} 31 | \item{\code{Compactness_se}}{a numeric vector} 32 | \item{\code{Concavity_se}}{a numeric vector} 33 | \item{\code{Nconcave_se}}{a numeric vector} 34 | \item{\code{Symmetry_se}}{a numeric vector} 35 | \item{\code{Fractaldim_se}}{a numeric vector} 36 | \item{\code{Radius_extreme}}{a numeric vector} 37 | \item{\code{Texture_extreme}}{a numeric vector} 38 | \item{\code{Perimeter_extreme}}{a numeric vector} 39 | \item{\code{Area_extreme}}{a numeric vector} 40 | \item{\code{Smoothness_extreme}}{a numeric vector} 41 | \item{\code{Compactness_extreme}}{a numeric vector} 42 | \item{\code{Concavity_extreme}}{a numeric vector} 43 | \item{\code{Nconcave_extreme}}{a numeric vector} 44 | \item{\code{Symmetry_extreme}}{a numeric vector} 45 | \item{\code{Fractaldim_extreme}}{a numeric vector} 46 | } 47 | } 48 | 49 | \details{ 50 | The recorded features are: 51 | \itemize{ 52 | \item \code{Radius} as mean of distances from center to points on the perimeter 53 | \item \code{Texture} as standard deviation of gray-scale values 54 | \item \code{Perimeter} as cell nucleus perimeter 55 | \item \code{Area} as cell nucleus area 56 | \item \code{Smoothness} as local variation in radius lengths 57 | \item \code{Compactness} as cell nucleus compactness, perimeter^2 / area - 1 58 | \item \code{Concavity} as severity of concave portions of the contour 59 | \item \code{Nconcave} as number of concave portions of the contour 60 | \item \code{Symmetry} as cell nucleus shape 61 | \item \code{Fractaldim} as fractal dimension, "coastline approximation" - 1 62 | } 63 | For each feature the recorded values are computed from each image as \code{_mean}, \code{_se}, and \code{_extreme}, for the mean, the standard error, and the mean of the three largest values. 64 | } 65 | 66 | \source{The Breast Cancer Wisconsin (Diagnostic) Data Set (\code{wdbc.data}, \code{wdbc.names}) from the UCI Machine Learning Repository 67 | \url{https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+(Diagnostic)}. Please note the UCI conditions of use.} 68 | 69 | \references{ 70 | Mangasarian, O. L., Street, W. N., and Wolberg, W. H. (1995) Breast cancer diagnosis and prognosis via linear programming. \emph{Operations Research}, 43(4), pp. 570-577. 71 | } 72 | 73 | \keyword{datasets} 74 | -------------------------------------------------------------------------------- /man/wreath.Rd: -------------------------------------------------------------------------------- 1 | \name{wreath} 2 | \alias{wreath} 3 | 4 | \title{Data Simulated from a 14-Component Mixture} 5 | 6 | \usage{data(wreath)} 7 | 8 | \description{ 9 | A dataset consisting of 1000 observations drawn from a 14-component 10 | normal mixture in which the covariances of the components have the 11 | same size and shape but differ in orientation. 12 | } 13 | \references{ 14 | C. Fraley, A. E. Raftery and R. Wehrens (2005). 15 | Incremental model-based clustering for large datasets with small clusters. 16 | \emph{Journal of Computational and Graphical Statistics 14:1:18}. 17 | } 18 | 19 | \keyword{datasets} 20 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 2 | -------------------------------------------------------------------------------- /src/dmvnorm.f: -------------------------------------------------------------------------------- 1 | * ===================================================================== 2 | subroutine dmvnorm ( x, mu, Sigma, n, p, w, hood, logdens) 3 | * 4 | * Compute log-density of multivariate Gaussian 5 | * 6 | * ===================================================================== 7 | 8 | implicit NONE 9 | 10 | integer n, p 11 | double precision hood 12 | double precision x(n,p), w(*), logdens(n) 13 | double precision mu(p), Sigma(p,p) 14 | 15 | integer info, i, j 16 | 17 | double precision detlog, umin, umax, const, temp 18 | 19 | double precision zero, one, two 20 | parameter (zero = 0.d0, one = 1.d0, two = 2.d0) 21 | 22 | double precision pi2log 23 | parameter (pi2log = 1.837877066409345d0) 24 | 25 | double precision FLMAX 26 | parameter (FLMAX = 1.7976931348623157d308) 27 | 28 | double precision RTMAX 29 | parameter (RTMAX = 1.340780792994260d154) 30 | 31 | double precision RTMIN 32 | parameter (RTMIN = 1.49166814624d-154) 33 | 34 | double precision ddot 35 | external ddot 36 | 37 | * --------------------------------------------------------------------------- 38 | 39 | 40 | * Cholesky factorization 41 | call dpotrf('U', p, Sigma, p, info) 42 | 43 | if (info .ne. 0) then 44 | w(1) = dble(info) 45 | hood = FLMAX 46 | return 47 | end if 48 | 49 | call absrng( p, Sigma, (p+1), umin, umax) 50 | 51 | if (umax .le. one .and. umax .ge. umin*RTMAX) then 52 | w(1) = zero 53 | hood = FLMAX 54 | return 55 | end if 56 | 57 | if (umax .ge. one .and. umin .le. umax*RTMIN) then 58 | w(1) = zero 59 | hood = FLMAX 60 | return 61 | end if 62 | 63 | detlog = zero 64 | do j = 1, p 65 | detlog = detlog + log(abs(Sigma(j,j))) 66 | end do 67 | 68 | const = dble(p)*pi2log/two + detlog 69 | 70 | do i = 1, n 71 | call dcopy(p, x(i,1), n, w, 1) 72 | call daxpy(p, (-one), mu(1), 1, w, 1) 73 | call dtrsv('U', 'T', 'N', p, Sigma, p, w, 1) 74 | temp = ddot(p, w, 1, w, 1)/two 75 | logdens(i) = -(const+temp) 76 | end do 77 | 78 | w(1) = zero 79 | 80 | return 81 | end 82 | -------------------------------------------------------------------------------- /src/logsumexp.f: -------------------------------------------------------------------------------- 1 | * ===================================================================== 2 | subroutine logsumexp(x, n, k, v, lse) 3 | * 4 | * Efficiently computes log-sum-exp(x_i+v) for i = 1,...,n 5 | * x = matrix (n x k) 6 | * v = vector (k) 7 | * lse = output vector(n) 8 | * 9 | * ===================================================================== 10 | implicit none 11 | integer :: n, k, i 12 | double precision :: x(n,k), v(k), lse(n), xv(k), m 13 | 14 | do i = 1, n 15 | xv = x(i,:) + v 16 | m = maxval(xv) 17 | lse(i) = m + log(sum(exp(xv - m))) 18 | end do 19 | 20 | return 21 | end 22 | 23 | * ===================================================================== 24 | subroutine softmax(x, n, k, v, lse, z) 25 | * 26 | * Efficiently computes softmax function based on 27 | * exp(x_i+v - log-sum-exp(x_i+v)) for i = 1,...,n 28 | * x = matrix (n x k) 29 | * v = vector (k) 30 | * z = output matrix (n x k) with rowsum(z_j) = 1 for j = 1,...,k 31 | * 32 | * ===================================================================== 33 | 34 | implicit none 35 | integer :: n, k, i 36 | double precision :: x(n,k), v(k), xv(k), lse(n), z(n,k) 37 | 38 | call logsumexp(x, n, k, v, lse) 39 | 40 | do i = 1, n 41 | xv = x(i,:) + v 42 | z(i,:) = exp(xv - lse(i)) 43 | end do 44 | 45 | return 46 | end 47 | --------------------------------------------------------------------------------