├── DESCRIPTION ├── MD5 ├── NAMESPACE ├── R ├── addclustermethods.R ├── clusterboot.R ├── clusterindexes.R ├── cquality20.R ├── dbscan.R ├── discrproj.R ├── discrproj2.R ├── fixreg.R ├── fpc.R ├── lcmixed.R ├── localshape.R ├── mergenormals.R ├── rFace.R └── regmix.R ├── data └── tonedata.txt.gz ├── man ├── adcoord.Rd ├── ancoord.Rd ├── awcoord.Rd ├── batcoord.Rd ├── bhattacharyya.dist.Rd ├── bhattacharyya.matrix.Rd ├── calinhara.Rd ├── can.Rd ├── cat2bin.Rd ├── cdbw.Rd ├── cgrestandard.Rd ├── classifdist.Rd ├── clucols.Rd ├── clujaccard.Rd ├── clusexpect.Rd ├── clustatsum.Rd ├── cluster.magazine.Rd ├── cluster.stats.Rd ├── cluster.varstats.Rd ├── clusterbenchstats.Rd ├── clusterboot.Rd ├── cmahal.Rd ├── concomp.Rd ├── confusion.Rd ├── cov.wml.Rd ├── cqcluster.stats.Rd ├── cvnn.Rd ├── cweight.Rd ├── dbscan.Rd ├── dipp.tantrum.Rd ├── diptest.multi.Rd ├── discrcoord.Rd ├── discrete.recode.Rd ├── discrproj.Rd ├── distancefactor.Rd ├── distcritmulti.Rd ├── distrsimilarity.Rd ├── dridgeline.Rd ├── dudahart2.Rd ├── extract.mixturepars.Rd ├── findrep.Rd ├── fixmahal.Rd ├── fixreg.Rd ├── flexmixedruns.Rd ├── fpc-package.Rd ├── fpclusters.Rd ├── itnumber.Rd ├── jittervar.Rd ├── kmeansCBI.Rd ├── kmeansruns.Rd ├── lcmixed.Rd ├── localshape.Rd ├── mahalanodisc.Rd ├── mahalanofix.Rd ├── mahalconf.Rd ├── mergenormals.Rd ├── mergeparameters.Rd ├── minsize.Rd ├── mixdens.Rd ├── mixpredictive.Rd ├── mvdcoord.Rd ├── ncoord.Rd ├── neginc.Rd ├── nselectboot.Rd ├── pamk.Rd ├── piridge.Rd ├── piridge.zeroes.Rd ├── plot.valstat.Rd ├── plotcluster.Rd ├── prediction.strength.Rd ├── rFace.Rd ├── randcmatrix.Rd ├── randconf.Rd ├── randomclustersim.Rd ├── regmix.Rd ├── ridgeline.Rd ├── ridgeline.diagnosis.Rd ├── simmatrix.Rd ├── solvecov.Rd ├── sseg.Rd ├── stupidkaven.Rd ├── stupidkcentroids.Rd ├── stupidkfn.Rd ├── stupidknn.Rd ├── tdecomp.Rd ├── tonedata.Rd ├── unimodal.ind.Rd ├── valstat.object.Rd ├── weightplots.Rd ├── wfu.Rd ├── xtable.Rd └── zmisclassification.matrix.Rd └── tests ├── Examples └── fpc-Ex.Rout.save ├── fpctests_notallin.R └── fpctests_notallin.Rout.save /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: fpc 2 | Title: Flexible Procedures for Clustering 3 | Version: 2.2-13 4 | Date: 2024-09-23 5 | Authors@R: person(given = "Christian", 6 | family = "Hennig", 7 | role = c("aut", "cre"), 8 | email = "christian.hennig@unibo.it") 9 | Depends: R (>= 2.0) 10 | Imports: MASS, cluster, mclust, flexmix, prabclus, class, diptest, 11 | robustbase, kernlab, grDevices, graphics, methods, stats, 12 | utils, parallel 13 | Suggests: tclust, pdfCluster, mvtnorm 14 | Description: Various methods for clustering and cluster validation. 15 | Fixed point clustering. Linear regression clustering. Clustering by 16 | merging Gaussian mixture components. Symmetric 17 | and asymmetric discriminant projections for visualisation of the 18 | separation of groupings. Cluster validation statistics 19 | for distance based clustering including corrected Rand index. 20 | Standardisation of cluster validation statistics by random clusterings and 21 | comparison between many clustering methods and numbers of clusters based on 22 | this. 23 | Cluster-wise cluster stability assessment. Methods for estimation of 24 | the number of clusters: Calinski-Harabasz, Tibshirani and Walther's 25 | prediction strength, Fang and Wang's bootstrap stability. 26 | Gaussian/multinomial mixture fitting for mixed 27 | continuous/categorical variables. Variable-wise statistics for cluster 28 | interpretation. DBSCAN clustering. Interface functions for many 29 | clustering methods implemented in R, including estimating the number of 30 | clusters with kmeans, pam and clara. Modality diagnosis for Gaussian 31 | mixtures. For an overview see package?fpc. 32 | License: GPL 33 | URL: https://www.unibo.it/sitoweb/christian.hennig/en/ 34 | NeedsCompilation: no 35 | Packaged: 2024-09-23 23:23:04 UTC; chrish 36 | Author: Christian Hennig [aut, cre] 37 | Maintainer: Christian Hennig 38 | Repository: CRAN 39 | Date/Publication: 2024-09-24 05:50:02 UTC 40 | -------------------------------------------------------------------------------- /MD5: -------------------------------------------------------------------------------- 1 | d8a47a3769189ca6cdf1348fce09fdb9 *DESCRIPTION 2 | 1125ef47578160408d5075ac0948abcd *NAMESPACE 3 | aec30efb5a63e14e8987a882e3585a7c *R/addclustermethods.R 4 | 8c8c3adab571a79097b5e0651e155390 *R/clusterboot.R 5 | 9df3134d1f6ad4f8f77a5943e0caec6d *R/clusterindexes.R 6 | 88465a515081c286290dc3bc7f800d73 *R/cquality20.R 7 | 742e9a99e68a078688ab046eba68e97b *R/dbscan.R 8 | b7c01634910fd6957ef94642398dc4d7 *R/discrproj.R 9 | d85447226e4ac2c17ce73fc907193176 *R/discrproj2.R 10 | ddd1d4523b35728dd81e26f593895b45 *R/fixreg.R 11 | 9e19a17d18fd66a939c4fa74a0234afd *R/fpc.R 12 | 05ccaaa9be1507b51a78e56418231ffc *R/lcmixed.R 13 | 9a31672e2d1b0eaa383861dd2b630b23 *R/localshape.R 14 | 20c83aae9aa85787f27ca65bca2c132d *R/mergenormals.R 15 | b2788e1bba14be4b1e1a1dc22b00b776 *R/rFace.R 16 | 382e8aa6e94a4dceaded3365064039d8 *R/regmix.R 17 | 3505e046680bd1f5bb7a8b2d20ef38c4 *data/tonedata.txt.gz 18 | a8ecd61d2070e101ba9ad5e8f42d01e6 *man/adcoord.Rd 19 | 3a4ec884478722c35b2cdd947dfe74b5 *man/ancoord.Rd 20 | 5ab5d71b1b9902e43b5837dfe03c9a4c *man/awcoord.Rd 21 | acb2a3f2ab2c8d0bcd78f333fe5f6322 *man/batcoord.Rd 22 | f9d350c902ba3fb50c1e4d664b89528c *man/bhattacharyya.dist.Rd 23 | afc86d08fa24b04bdf9a6bfed231f001 *man/bhattacharyya.matrix.Rd 24 | 0696e200baef6495722674ac041f4a4f *man/calinhara.Rd 25 | 2b5264ab2e49d9f1a6b69f6b371ba0d4 *man/can.Rd 26 | 168c38d6eaf6f7765492425a77dffb19 *man/cat2bin.Rd 27 | c1aac2369f3bd407a11f86f17232f919 *man/cdbw.Rd 28 | f8207303a74e577b004a128f3e8cb812 *man/cgrestandard.Rd 29 | 7276547ff5b24a46ac9db08b26ef5f22 *man/classifdist.Rd 30 | 388a1c27aeb2abf791c3694e5b6f0362 *man/clucols.Rd 31 | ab40c02193537ab2cec1a5c3cb848102 *man/clujaccard.Rd 32 | 117a797d11c1ab28b8a4453ac9ab49e5 *man/clusexpect.Rd 33 | e6afaf7f014e9aba03cd84d42c614ea3 *man/clustatsum.Rd 34 | 3cba7a50d7060a4587566801556ca9b3 *man/cluster.magazine.Rd 35 | 2e1d6c3f909a21259af7f08bf1a1f61e *man/cluster.stats.Rd 36 | 030b373fea2741e53ece7db3f4224032 *man/cluster.varstats.Rd 37 | 421c33589609de50ff288ac659eb87fe *man/clusterbenchstats.Rd 38 | e8fdf85cbdfc2de7056bacf599ca5c75 *man/clusterboot.Rd 39 | 9db8a890d7d579b062342b96ae870eed *man/cmahal.Rd 40 | dfc9b80c1b32810d9819f14bd18a2f4f *man/concomp.Rd 41 | 69c83485d86d098e5b1e382aa1aa0e03 *man/confusion.Rd 42 | 46511da6ae33e758ea4df921271a06e3 *man/cov.wml.Rd 43 | eac07f87a35f3afa23781188dfa7d19f *man/cqcluster.stats.Rd 44 | 3d82668239fad4518bcf284abea52948 *man/cvnn.Rd 45 | ad16fc22770ef7f7837edd1441ae9a19 *man/cweight.Rd 46 | 1de81baa2b792e07f951dbb68fe53571 *man/dbscan.Rd 47 | d13c67097f7c290f11bf4387aca5eb1a *man/dipp.tantrum.Rd 48 | 2a6593ed2ebd68ee4d4b82ad4c827ca1 *man/diptest.multi.Rd 49 | 927e7505630ff847155eb40fecc67b84 *man/discrcoord.Rd 50 | 5369ed120187a12fac527e05838f8385 *man/discrete.recode.Rd 51 | 1137fec01134e3337de519984530396c *man/discrproj.Rd 52 | 9bf4fba6be7109d5d969595a382e197e *man/distancefactor.Rd 53 | 7f1528fc62b218f63c6ea92b6b082488 *man/distcritmulti.Rd 54 | 356bb125dde6b29300be02bf2e3f0c16 *man/distrsimilarity.Rd 55 | 2fd8c12a19a526bf2838ea5e420df046 *man/dridgeline.Rd 56 | fbceefe2215ac7b210f3ae58f9242504 *man/dudahart2.Rd 57 | f26709267c6fdd746702b041504589cc *man/extract.mixturepars.Rd 58 | 6bf6188e60401384a81b74e7813ae724 *man/findrep.Rd 59 | 9af298038dffe9c45b66d2deb8fdc41f *man/fixmahal.Rd 60 | ef902926337553264cc8080cc2bfe18d *man/fixreg.Rd 61 | 8cc1d2d3d5ad5037de46ec7e578d37d5 *man/flexmixedruns.Rd 62 | cae437b4708a1d825f828d271bd50e99 *man/fpc-package.Rd 63 | 84bcae9460565f4244353ff7d349de38 *man/fpclusters.Rd 64 | 3406d81af07dca5bb922b4e7746d0bff *man/itnumber.Rd 65 | 2f0dd05ea31c9534529bb763076af795 *man/jittervar.Rd 66 | e027b94a960938fad460e8dfe4de81b4 *man/kmeansCBI.Rd 67 | aff11c26aeab6c65b65bf0cf17f35363 *man/kmeansruns.Rd 68 | f8ac0ee0d2969de37064f386b660f3b4 *man/lcmixed.Rd 69 | 1327aab55e8b9d988cb8702c360334e8 *man/localshape.Rd 70 | 1b4c349f88497821c18ba8f07eab9e0e *man/mahalanodisc.Rd 71 | 79dfef5657596ddd14fa54d259e7fc43 *man/mahalanofix.Rd 72 | 91bb05e5be571ca0a58aea51524ce634 *man/mahalconf.Rd 73 | 31b55db417104f978b01a3d66a5a837b *man/mergenormals.Rd 74 | 4a362ae7a8f5b62b3e6bcc4aa69d96d8 *man/mergeparameters.Rd 75 | eb4f876ed214aad657300e05732f87f9 *man/minsize.Rd 76 | 9d315a8d06ce60c5cd131dec61266728 *man/mixdens.Rd 77 | 9a57a7a77170d60660d9fe6b32d5bee2 *man/mixpredictive.Rd 78 | 2f12f3e1cab6b01d3dfbcbd62fb4e19d *man/mvdcoord.Rd 79 | 19bcb0510dc85e2e78ede077978169ce *man/ncoord.Rd 80 | 9cd75fca00ac100d2d5490c0b3efccc2 *man/neginc.Rd 81 | c1496afc6c30854f09ae355ade834bdd *man/nselectboot.Rd 82 | 2c5757fed7679a28e38574890abfbf37 *man/pamk.Rd 83 | 96a5321a0408ea3672912109356b330e *man/piridge.Rd 84 | e25a1ffe3fd58568f3fc22f778466898 *man/piridge.zeroes.Rd 85 | 9c2924a16367b879254f85c2328f49ea *man/plot.valstat.Rd 86 | a65a08d014a70310af397eeac7e6c515 *man/plotcluster.Rd 87 | c1446aacf9806e47031c2bb4f7cb3805 *man/prediction.strength.Rd 88 | 8eda1a69722326ae53f828c691677f6c *man/rFace.Rd 89 | 587846ed6be5faa2de83d952ec75e40d *man/randcmatrix.Rd 90 | fa4a2d8cf2ab7a0a95c27f487082892e *man/randconf.Rd 91 | add27cde61e11b21962206fa48710eff *man/randomclustersim.Rd 92 | 50d558a06762eccd46f93e36abfaee6f *man/regmix.Rd 93 | de39716505501f20388419b1b6dbf52f *man/ridgeline.Rd 94 | 68593a7a80098f9de2ffefbca6f048e7 *man/ridgeline.diagnosis.Rd 95 | eff9ad3ea5bbb8fd73203fdad64572b6 *man/simmatrix.Rd 96 | d6c7f469f0186b49621bcddd3c3fa2e7 *man/solvecov.Rd 97 | 9dd4c04057b4a3a3642b8950fd3cfc18 *man/sseg.Rd 98 | 82b4787f4a697756c94239f3eff00bf1 *man/stupidkaven.Rd 99 | ddb9d6f89110b180a574e6583b2838c1 *man/stupidkcentroids.Rd 100 | 3a83a0a91c82efda6e8be620ece68259 *man/stupidkfn.Rd 101 | 73018257b3c6d6710a4ebd2a25a5138e *man/stupidknn.Rd 102 | e5c96c7b34a1cfb10b0195b15a051741 *man/tdecomp.Rd 103 | e156cb0ced917fe86298bf3fbb316a25 *man/tonedata.Rd 104 | 1513bc6b3a73bbe9aae8718e5f825b0a *man/unimodal.ind.Rd 105 | c22b38806cbda0b6c46afd0702ffda27 *man/valstat.object.Rd 106 | 013d8afb7015e85ebb434ab27bc14265 *man/weightplots.Rd 107 | 19cb8bae42e9c79f99e7ec427a2c66ad *man/wfu.Rd 108 | c895dda06251f6f2c38b2f0669f66874 *man/xtable.Rd 109 | 98033be110c0e6d320a01868fa441d27 *man/zmisclassification.matrix.Rd 110 | 05c8bcca6070ae5a8f7bea3f66de5c63 *tests/Examples/fpc-Ex.Rout.save 111 | 19641842cec18ebe59fc649acc502274 *tests/fpctests_notallin.R 112 | 7b58a228503baef69789bccaacf38d65 *tests/fpctests_notallin.Rout.save 113 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Remove the previous line if you edit this file 2 | # This is the default, just nicked. 3 | 4 | # Export all names 5 | exportPattern(".") 6 | 7 | # Import all packages listed as Imports or Depends 8 | import( 9 | MASS, 10 | cluster, 11 | mclust, 12 | flexmix, 13 | prabclus, 14 | class, 15 | diptest, 16 | robustbase 17 | ) 18 | 19 | importFrom("kernlab",specc) 20 | 21 | importFrom("grDevices", "colors", "colours", "grey", "xy.coords") 22 | importFrom("graphics", "abline", "hist", "legend", "pairs", "par", 23 | "points", "polygon", "title","axis","text") 24 | importFrom("methods", "new") 25 | importFrom("stats", "BIC", "addmargins", "as.dist", "cmdscale", "coef", 26 | "coefficients", "cor", "cov", "cov.wt", "cutree", "density", 27 | "dist", "dnorm", "fitted.values", "hclust", "kmeans", "lm", 28 | "lsfit", "mahalanobis", "median", "pchisq", "pnorm", 29 | "qbinom", "qchisq", "qnorm", "quantile", "rbinom", "resid", 30 | "residuals", "rexp", "rgamma", "rnorm", "rt", "runif", "sd", 31 | "weighted.mean","ecdf","pgamma") 32 | importFrom("utils", "data") 33 | importFrom("parallel", "mclapply", "detectCores") 34 | 35 | S3method(fpclusters, mfpc) 36 | S3method(fpclusters, rfpc) 37 | S3method(plot, clboot) 38 | S3method(plot, dbscan) 39 | S3method(plot, mfpc) 40 | S3method(plot, rfpc) 41 | S3method(plot, valstat) 42 | S3method(predict, dbscan) 43 | S3method(print, clboot) 44 | S3method(print, dbscan) 45 | S3method(print, mfpc) 46 | S3method(print, predstr) 47 | S3method(print, rfpc) 48 | S3method(print, summary.mergenorm) 49 | S3method(print, summary.mfpc) 50 | S3method(print, summary.rfpc) 51 | S3method(print, summary.cquality) 52 | S3method(print, varwisetables) 53 | S3method(print, clusterbenchstats) 54 | S3method(print, valstat) 55 | S3method(summary, mergenorm) 56 | S3method(summary, mfpc) 57 | S3method(summary, rfpc) 58 | S3method(summary, cquality) 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /R/dbscan.R: -------------------------------------------------------------------------------- 1 | dbscan <- function( 2 | data 3 | , eps 4 | , MinPts = 5 5 | , scale = FALSE 6 | , method = c("hybrid","raw","dist") 7 | # , no.check = FALSE 8 | , seeds = TRUE 9 | , showplot = FALSE 10 | , countmode = NULL #c(1,2,3,5,10,100,1000,5000,10000,50000) 11 | ) 12 | { 13 | # if (!require(distpatch)) 14 | distcomb <- function(x,data){ 15 | data <- t(data) 16 | temp <- apply(x, 1, function(x){ 17 | sqrt(colSums((data-x)^2)) 18 | }) 19 | if (is.null(dim(temp))) 20 | matrix(temp, nrow(x), ncol(data)) 21 | else 22 | t(temp) 23 | } 24 | method <- match.arg(method) 25 | data <- as.matrix(data) 26 | n <- nrow(data) 27 | if (scale) 28 | data <- scale(data) 29 | classn <- cv <- integer(n) 30 | isseed <- logical(n) 31 | cn <- integer(1) 32 | for (i in 1:n){ 33 | if (i %in% countmode) 34 | cat("Processing point ", i," of ",n, ".\n") 35 | unclass <- (1:n)[cv<1] 36 | if (cv[i]==0){ 37 | if (method=="dist"){ 38 | reachables <- unclass[data[i,unclass]<=eps] 39 | }else{ 40 | reachables <- unclass[as.vector(distcomb(data[i,, drop=FALSE],data[unclass,, drop=FALSE]))<=eps] 41 | } 42 | if (length(reachables)+classn[i]1) 64 | plot(data, col=1+cv, pch=1+isseed) 65 | if (method=="dist"){ 66 | jreachables <- unclass[data[j,unclass]<=eps] 67 | }else if (method=="hybrid"){ 68 | jreachables <- unclass[tempdist[i2,match(unclass, frozen.unclass)]<=eps] 69 | }else{ 70 | jreachables <- unclass[as.vector(distcomb(data[j,, drop=FALSE], data[unclass,, drop=FALSE]))<=eps] 71 | } 72 | if (length(jreachables)+classn[j]>=MinPts){ 73 | isseed[j] <- TRUE 74 | cv[jreachables[cv[jreachables]<0]] <- cn 75 | reachables <- union(reachables, jreachables[cv[jreachables]==0]) # isseed for these new reachables tested at next while loop 76 | } 77 | # must be after querying classn, otherwise we count j itself twice 78 | classn[jreachables] <- classn[jreachables]+1 79 | unclass <- setdiff(unclass, j) 80 | } # for j 81 | } # while sum reachables>0 82 | } # else (sum reachables + ... >= MinPts) 83 | } # if cv==0 84 | if (!length(unclass)) 85 | break 86 | } # for i 87 | rm(classn) 88 | if (any(cv==(-1))){ 89 | cv[cv==(-1)] <- 0 90 | } 91 | if (showplot) 92 | plot(data, col=1+cv, pch=1+isseed) 93 | out <- list( 94 | cluster = cv 95 | , eps = eps 96 | , MinPts = MinPts 97 | ) 98 | if (seeds && cn>0){ 99 | out$isseed <- isseed 100 | } 101 | class(out) <- "dbscan" 102 | out 103 | } # dbscan 104 | 105 | 106 | print.dbscan <- function(x, ...){ 107 | cat("dbscan Pts=", length(x$cluster), " MinPts=", x$MinPts, " eps=", x$eps, "\n", sep="") 108 | if (is.null(x$isseed)) 109 | tab <- table(x$cluster) 110 | else{ 111 | tab <- table(c("seed", "border")[2-x$isseed], cluster=x$cluster) 112 | if (is.null(dim(tab))){ 113 | tab <- cbind(tab) 114 | colnames(tab) <- unique(x$cluster) 115 | } 116 | tab <- rbind(tab, total=colSums(tab)) 117 | } 118 | print(tab, ...) 119 | } 120 | 121 | plot.dbscan <- function(x, data, ...) 122 | { 123 | plot(data, col=1+x$cluster, pch=1+x$isseed, ...) 124 | } 125 | 126 | 127 | predict.dbscan <- function( 128 | object 129 | , data 130 | , newdata = NULL 131 | , predict.max = 1000 132 | # , no.check = FALSE 133 | , ... 134 | ) 135 | { 136 | if (is.null(newdata)){ 137 | 138 | return(object$cluster) 139 | 140 | }else{ 141 | 142 | if (is.null(object$isseed)) 143 | stop("no seeds to predict") 144 | 145 | dmax <- object$eps 146 | data <- data[object$isseed, , drop=FALSE] 147 | out <- object$cluster[object$isseed] 148 | 149 | # if (!require(distpatch)) 150 | distpair <- function(x,data){ 151 | sqrt(rowSums((x-data)^2)) 152 | } 153 | 154 | # require(class) 155 | batchpredict <- function(newdata){ 156 | w <- as.integer(knn1(data, newdata, 1:n.orig)) 157 | newout <- out[w] 158 | if (!is.null(dmax)){ 159 | d <- distpair(data[w,,drop=FALSE], newdata) 160 | newout[d>dmax] <- 0 161 | } 162 | return(newout) 163 | } 164 | n <- nrow(newdata) 165 | n.orig <- nrow(data) 166 | if (n>predict.max){ 167 | i <- 1:n 168 | ret <- do.call("c", lapply(split(i, (i-1)%/%predict.max), function(i)batchpredict(newdata[i, , drop=FALSE]))) 169 | }else{ 170 | ret <- batchpredict(newdata) 171 | } 172 | return(ret) 173 | } 174 | } 175 | 176 | 177 | 178 | # if (FALSE){ 179 | # 180 | # x <- t(t(sort(c(rnorm(20), 1:10)))) 181 | # ds1 <- dbscan1(x, MinPts=5, eps=2, showplot=1) 182 | # ds <- dbscan(x, MinPts=5, eps=2, showplot=1) 183 | # 184 | # par(mfrow=c(2, 1)) 185 | # plot(x, col=1+ds1$classification) 186 | # plot(ds, x) 187 | # ds1 188 | # ds 189 | # par(mfrow=c(1, 1)) 190 | # 191 | # } 192 | -------------------------------------------------------------------------------- /R/localshape.R: -------------------------------------------------------------------------------- 1 | localshape <- function(xdata,proportion=0.1,mscatter="mcd",mcdalpha=0.8, 2 | covstandard="det"){ 3 | # if (mscatter=="mcd") require(robustbase) 4 | xdata <- as.matrix(xdata) 5 | scatter <- switch(mscatter, 6 | mcd=covMcd(xdata,alpha=mcdalpha)$cov, 7 | cov=cov(xdata)) 8 | n <- nrow(xdata) 9 | p <- ncol(xdata) 10 | np <- round(proportion*n) 11 | mmatrix <- matrix(0,n,n) 12 | for (i in 1:n) 13 | mmatrix[i,] <- mahalanobis(xdata,xdata[i,],scatter) 14 | lcov <- matrix(0,p,p) 15 | for (i in 1:n){ 16 | xc <- cov(xdata[order(mmatrix[i,])[1:np],]) 17 | lcov <- lcov+switch(covstandard, 18 | trace=xc/sum(diag(xc)), 19 | det=xc/det(xc), 20 | none=xc) 21 | } 22 | lcov <- lcov/n 23 | lcov 24 | } 25 | 26 | -------------------------------------------------------------------------------- /R/regmix.R: -------------------------------------------------------------------------------- 1 | 2 | # randcmatrix=random partition matrix for n observations to cln clusters 3 | randcmatrix <- function (n,cln,p){ 4 | ct <- 0 5 | while(ct0.01) < p+2){ 46 | if (warnings) warning("Too small cluster") 47 | smallcluster <- TRUE 48 | } # if too small cluster 49 | else{ 50 | reg <- lm(dep~indep, weights=m[,i]) 51 | fv[,i] <- fitted.values(reg) 52 | rc[,i] <- coefficients(reg) 53 | # abline(rc[,i],col=i) 54 | for (j in 2:(p+1)) 55 | if (is.na(rc[j,i])){ 56 | smallcluster <- TRUE 57 | if (warnings) warning("Collinear regressors") 58 | } # if collinearity 59 | res <- residuals(reg) 60 | rv[i] <- weighted.mean(res^2,m[,i]) 61 | if (rv[i] icrit) 81 | } # if no collinearity & clusters large enough 82 | } # while change 83 | g <- c() 84 | for (i in 1:n) 85 | g[i] <- which.max(m[i,]) 86 | out <- list(coef=rc, vars=rv, z=m, g=g, eps=eps, loglik=loglik, 87 | warn=smallcluster) 88 | out 89 | } # regem 90 | 91 | 92 | 93 | # Regression mixture analysis (DeSarbo and Cron), 94 | # ir=iteration runs, nclust= cluster numbers vector, icrit=iteration stopping 95 | # criterion, minsig = minimum error variance 96 | regmix <- function (indep, dep, 97 | ir=1, nclust=1:7, icrit=1.e-5, minsig=1.e-6, 98 | warnings=FALSE){ 99 | n <- length(dep) 100 | p <- ncol(as.matrix(indep)) 101 | clnopt <- min(nclust) 102 | czmax <- max(nclust) 103 | bic <- loglik <- (-1.e9) 104 | clbic <- rep((-1.e9), czmax) 105 | eps <- rep(0, czmax) 106 | rc <- rep(0,(p+1)*czmax) 107 | dim(rc) <- c(p+1,czmax) 108 | rv <- rep(0,czmax) 109 | z <- rep(0, n*czmax) 110 | dim(z) <- c(n,czmax) 111 | for (cln in nclust){ 112 | for (i in 1:ir){ 113 | cat("Iteration ",i," for ",cln," clusters.\n") 114 | emi <- regem(indep, dep, m=randcmatrix(n,cln,p), cln=cln, 115 | icrit=icrit, minsig=minsig, warnings=warnings) 116 | if (emi$warn) 117 | emi <- regem(indep, dep, m=randcmatrix(n,cln,p), cln=cln, 118 | icrit=icrit, minsig=minsig, warnings=warnings) 119 | if (!emi$warn){ 120 | bicval <- 2*emi$loglik - log(n)*((p+3)*cln-1) 121 | if (bicval > clbic[cln]) 122 | clbic[cln] <- bicval 123 | if (bicval > bic){ 124 | clnopt <- cln 125 | bic <- bicval 126 | loglik <- emi$loglik 127 | eps[1:cln] <- emi$eps 128 | rc[,1:cln] <- emi$coef 129 | rv[1:cln] <- emi$var 130 | z[,1:cln] <- emi$z 131 | } # if bicval>bic 132 | } # if no warning 133 | } # for i 134 | } # for cln 135 | g <- c() 136 | for (i in 1:n) 137 | g[i] <- which.max(z[i,1:clnopt]) 138 | out <- list(clnopt=clnopt, loglik=loglik, bic=clbic, 139 | coef=rc[,1:clnopt], var=rv[1:clnopt], eps=eps[1:clnopt], 140 | z=z[,1:clnopt], g=g) 141 | out 142 | # clnopt: Optimal number of clusters, loglik: Loglikelihood, bic: Vector of 143 | # BIC values, coef: Regression coefficients, var: Error variances: 144 | # eps: cluster proportions, z:a posteriori probabilities, g:optimal 145 | # classification 146 | } 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | -------------------------------------------------------------------------------- /data/tonedata.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/fpc/ed319818bd2575441ff92d1905ebe77016e6c5e0/data/tonedata.txt.gz -------------------------------------------------------------------------------- /man/adcoord.Rd: -------------------------------------------------------------------------------- 1 | \name{adcoord} 2 | \alias{adcoord} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Asymmetric discriminant coordinates} 5 | \description{ 6 | Asymmetric discriminant coordinates as defined 7 | in Hennig (2003). Asymmetric discriminant projection means that there 8 | are two classes, one of which is treated as the homogeneous class 9 | (i.e., it should appear homogeneous and separated in the resulting projection) 10 | while the other may be heterogeneous. 11 | The principle is to maximize the ratio between the projection of a between 12 | classes separation matrix and the projection of the covariance matrix 13 | within the homogeneous class. 14 | } 15 | \usage{ 16 | adcoord(xd, clvecd, clnum=1) 17 | } 18 | %- maybe also `usage' for other objects documented here. 19 | \arguments{ 20 | \item{xd}{the data matrix; a numerical object which can be coerced 21 | to a matrix.} 22 | \item{clvecd}{integer vector of class numbers; length must equal 23 | \code{nrow(xd)}.} 24 | \item{clnum}{integer. Number of the homogeneous class.} 25 | } 26 | \details{ 27 | The square root of the homogeneous classes covariance matrix 28 | is inverted by use of 29 | \code{\link{tdecomp}}, which can be expected to give 30 | reasonable results for singular within-class covariance matrices. 31 | } 32 | % \details{ 33 | % } 34 | \value{ 35 | List with the following components 36 | \item{ev}{eigenvalues in descending order.} 37 | \item{units}{columns are coordinates of projection basis vectors. 38 | New points \code{x} can be projected onto the projection basis vectors 39 | by \code{x \%*\% units}} 40 | \item{proj}{projections of \code{xd} onto \code{units}.} 41 | } 42 | \references{ 43 | Hennig, C. (2004) Asymmetric linear dimension reduction for classification. 44 | Journal of Computational and Graphical Statistics 13, 930-945 . 45 | 46 | Hennig, C. (2005) A method for visual cluster validation. In: 47 | Weihs, C. and Gaul, W. (eds.): Classification - The Ubiquitous 48 | Challenge. Springer, Heidelberg 2005, 153-160. 49 | 50 | } 51 | \author{Christian Hennig 52 | \email{christian.hennig@unibo.it} 53 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 54 | } 55 | 56 | \seealso{ 57 | \code{\link{plotcluster}} for straight forward discriminant plots. 58 | \code{\link{discrproj}} for alternatives. 59 | \code{\link{rFace}} for generation of the example data used below. 60 | } 61 | 62 | \examples{ 63 | set.seed(4634) 64 | face <- rFace(600,dMoNo=2,dNoEy=0) 65 | grface <- as.integer(attr(face,"grouping")) 66 | adcf <- adcoord(face,grface==2) 67 | adcf2 <- adcoord(face,grface==4) 68 | plot(adcf$proj,col=1+(grface==2)) 69 | plot(adcf2$proj,col=1+(grface==4)) 70 | # ...done in one step by function plotcluster. 71 | } 72 | \keyword{multivariate}% at least one, from doc/KEYWORDS 73 | \keyword{classif}% __ONLY ONE__ keyword per line 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /man/ancoord.Rd: -------------------------------------------------------------------------------- 1 | \name{ancoord} 2 | \alias{ancoord} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Asymmetric neighborhood based discriminant coordinates} 5 | \description{ 6 | Asymmetric neighborhood based discriminant coordinates as defined 7 | in Hennig (2003). Asymmetric discriminant projection means that there 8 | are two classes, one of which is treated as the homogeneous class 9 | (i.e., it should appear homogeneous and separated in the resulting projection) 10 | while the other may be heterogeneous. 11 | The principle is to maximize the ratio between the projection of a between 12 | classes covariance matrix, which is defined by averaging the 13 | between classes covariance matrices in the neighborhoods of the points 14 | of the homogeneous class and the projection of the covariance matrix 15 | within the homogeneous class. 16 | } 17 | \usage{ 18 | ancoord(xd, clvecd, clnum=1, nn=50, method="mcd", countmode=1000, ...) 19 | } 20 | %- maybe also `usage' for other objects documented here. 21 | \arguments{ 22 | \item{xd}{the data matrix; a numerical object which can be coerced 23 | to a matrix.} 24 | \item{clvecd}{integer vector of class numbers; length must equal 25 | \code{nrow(xd)}.} 26 | \item{clnum}{integer. Number of the homogeneous class.} 27 | \item{nn}{integer. Number of points which belong to the neighborhood 28 | of each point (including the point itself).} 29 | \item{method}{one of 30 | "mve", "mcd" or "classical". Covariance matrix used within the 31 | homogeneous class. 32 | "mcd" and "mve" are robust covariance matrices as implemented 33 | in \code{\link[MASS]{cov.rob}}. "classical" refers to the classical 34 | covariance matrix.} 35 | \item{countmode}{optional positive integer. Every \code{countmode} 36 | algorithm runs \code{ancoord} shows a message.} 37 | \item{...}{no effect} 38 | } 39 | \details{ 40 | The square root of the homogeneous classes covariance matrix 41 | is inverted by use of 42 | \code{\link{tdecomp}}, which can be expected to give 43 | reasonable results for singular within-class covariance matrices. 44 | } 45 | % \details{ 46 | % } 47 | \value{ 48 | List with the following components 49 | \item{ev}{eigenvalues in descending order.} 50 | \item{units}{columns are coordinates of projection basis vectors. 51 | New points \code{x} can be projected onto the projection basis vectors 52 | by \code{x \%*\% units}} 53 | \item{proj}{projections of \code{xd} onto \code{units}.} 54 | } 55 | \references{ 56 | Hennig, C. (2004) Asymmetric linear dimension reduction for classification. 57 | Journal of Computational and Graphical Statistics 13, 930-945 . 58 | 59 | Hennig, C. (2005) A method for visual cluster validation. In: 60 | Weihs, C. and Gaul, W. (eds.): Classification - The Ubiquitous 61 | Challenge. Springer, Heidelberg 2005, 153-160. 62 | 63 | } 64 | \author{Christian Hennig 65 | \email{christian.hennig@unibo.it} 66 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 67 | } 68 | 69 | \seealso{ 70 | \code{\link{plotcluster}} for straight forward discriminant plots. 71 | \code{\link{discrproj}} for alternatives. 72 | \code{\link{rFace}} for generation of the example data used below. 73 | } 74 | 75 | \examples{ 76 | set.seed(4634) 77 | face <- rFace(600,dMoNo=2,dNoEy=0) 78 | grface <- as.integer(attr(face,"grouping")) 79 | ancf2 <- ancoord(face,grface==4) 80 | plot(ancf2$proj,col=1+(grface==4)) 81 | # ...done in one step by function plotcluster. 82 | } 83 | \keyword{multivariate}% at least one, from doc/KEYWORDS 84 | \keyword{classif}% __ONLY ONE__ keyword per line 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /man/awcoord.Rd: -------------------------------------------------------------------------------- 1 | \name{awcoord} 2 | \alias{awcoord} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Asymmetric weighted discriminant coordinates} 5 | \description{ 6 | Asymmetric weighted discriminant coordinates as defined 7 | in Hennig (2003). Asymmetric discriminant projection means that there 8 | are two classes, one of which is treated as the homogeneous class 9 | (i.e., it should appear homogeneous and separated in the resulting projection) 10 | while the other may be heterogeneous. 11 | The principle is to maximize the ratio between the projection of a between 12 | classes separation matrix and the projection of the covariance matrix 13 | within the homogeneous class. Points are weighted according to their 14 | (robust) Mahalanobis distance to the homogeneous class. 15 | } 16 | \usage{ 17 | awcoord(xd, clvecd, clnum=1, mahal="square", method="classical", 18 | clweight=switch(method,classical=FALSE,TRUE), 19 | alpha=0.99, subsample=0, countmode=1000, ...) 20 | } 21 | %- maybe also `usage' for other objects documented here. 22 | \arguments{ 23 | \item{xd}{the data matrix; a numerical object which can be coerced 24 | to a matrix.} 25 | \item{clvecd}{integer vector of class numbers; length must equal 26 | \code{nrow(xd)}.} 27 | \item{clnum}{integer. Number of the homogeneous class.} 28 | \item{mahal}{"md" or "square". If "md", the points are weighted by the 29 | square root of the \code{alpha}-quantile of the 30 | corresponding chi squared distribution 31 | over the roots of their Mahalanobis distance to the 32 | homogeneous class, unless 33 | this is smaller than 1. If "square" (which is recommended), the 34 | (originally squared) Mahalanobis distance and the 35 | unrooted quantile is used.} 36 | \item{method}{one of 37 | "mve", "mcd" or "classical". Covariance matrix used within the 38 | homogeneous class and for the computation of the Mahalanobis distances. 39 | "mcd" and "mve" are robust covariance matrices as implemented 40 | in \code{\link[MASS]{cov.rob}}. "classical" refers to the classical 41 | covariance matrix.} 42 | \item{clweight}{logical. If \code{FALSE}, only the points of the 43 | heterogeneous class are weighted. This, together with 44 | \code{method="classical"}, computes AWC as defined in Hennig (2003). If 45 | \code{TRUE}, all points are weighted. This, together with 46 | \code{method="mcd"}, computes ARC as defined in Hennig (2003).} 47 | \item{alpha}{numeric between 0 and 1. The corresponding quantile of 48 | the chi squared distribution is used for the downweighting 49 | of points. Points with a smaller Mahalanobis distance to the 50 | homogeneous class get full weight.} 51 | \item{subsample}{integer. If 0, all points are used. Else, only a 52 | subsample of \code{subsample} of the points is used.} 53 | \item{countmode}{optional positive integer. Every \code{countmode} 54 | algorithm runs \code{awcoord} shows a message.} 55 | \item{...}{no effect} 56 | } 57 | \details{ 58 | The square root of the homogeneous classes covariance matrix 59 | is inverted by use of 60 | \code{\link{tdecomp}}, which can be expected to give 61 | reasonable results for singular within-class covariance matrices. 62 | } 63 | % \details{ 64 | % } 65 | \value{ 66 | List with the following components 67 | \item{ev}{eigenvalues in descending order.} 68 | \item{units}{columns are coordinates of projection basis vectors. 69 | New points \code{x} can be projected onto the projection basis vectors 70 | by \code{x \%*\% units}} 71 | \item{proj}{projections of \code{xd} onto \code{units}.} 72 | } 73 | \references{ 74 | Hennig, C. (2004) Asymmetric linear dimension reduction for classification. 75 | Journal of Computational and Graphical Statistics 13, 930-945 . 76 | 77 | Hennig, C. (2005) A method for visual cluster validation. In: 78 | Weihs, C. and Gaul, W. (eds.): Classification - The Ubiquitous 79 | Challenge. Springer, Heidelberg 2005, 153-160. 80 | } 81 | \author{Christian Hennig 82 | \email{christian.hennig@unibo.it} 83 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 84 | } 85 | 86 | \seealso{ 87 | \code{\link{plotcluster}} for straight forward discriminant plots. 88 | \code{\link{discrproj}} for alternatives. 89 | \code{\link{rFace}} for generation of the example data used below. 90 | } 91 | 92 | \examples{ 93 | set.seed(4634) 94 | face <- rFace(600,dMoNo=2,dNoEy=0) 95 | grface <- as.integer(attr(face,"grouping")) 96 | awcf <- awcoord(face,grface==1) 97 | # awcf2 <- ancoord(face,grface==1, method="mcd") 98 | plot(awcf$proj,col=1+(grface==1)) 99 | # plot(awcf2$proj,col=1+(grface==1)) 100 | # ...done in one step by function plotcluster. 101 | } 102 | \keyword{multivariate}% at least one, from doc/KEYWORDS 103 | \keyword{classif}% __ONLY ONE__ keyword per line 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /man/batcoord.Rd: -------------------------------------------------------------------------------- 1 | \name{batcoord} 2 | \alias{batcoord} 3 | \alias{batvarcoord} 4 | %- Also NEED an `\alias' for EACH other topic documented here. 5 | \title{Bhattacharyya discriminant projection} 6 | \description{ 7 | Computes Bhattacharyya discriminant projection coordinates 8 | as described in Fukunaga (1990), p. 455 ff. 9 | } 10 | \usage{ 11 | batcoord(xd, clvecd, clnum=1, dom="mean") 12 | batvarcoord(xd, clvecd, clnum=1) 13 | } 14 | %- maybe also `usage' for other objects documented here. 15 | \arguments{ 16 | \item{xd}{the data matrix; a numerical object which can be coerced 17 | to a matrix.} 18 | \item{clvecd}{integer or logical vector of class numbers; length must equal 19 | \code{nrow(xd)}.} 20 | \item{clnum}{integer, one of the values of \code{clvecd}, if this is 21 | an integer vector. Bhattacharyya projections can only be computed if 22 | there are only two classes in the dataset. \code{clnum} is the number 23 | of one of the two classes. All the points indicated by other values 24 | of \code{clvecd} are interpreted as the second class.} 25 | \item{dom}{string. \code{dom="mean"} means that the discriminant 26 | coordinate for the group means is computed as the first projection 27 | direction by 28 | \code{\link{discrcoord}} (option \code{pool="equal"}; both classes 29 | have the same weight for computing the within-class covariance 30 | matrix). Then the data is projected into a subspace orthogonal 31 | (w.r.t. the within-class covariance) to the 32 | discriminant coordinate, and the projection coordinates to maximize 33 | the differences in variance are computed. \cr 34 | \code{dom="variance"} means that the projection coordinates 35 | maximizing the difference in variances are computed. Then they are 36 | ordered with respect to the Bhattacharyya distance, which takes also 37 | the mean differences into account. Both procedures are implemented 38 | as described in Fukunaga (1990).} 39 | } 40 | \details{ 41 | \code{batvarcoord} computes the optimal projection coordinates with 42 | respect to the difference in variances. \code{batcoord} combines the 43 | differences in mean and variance as explained for the argument \code{dom}. 44 | } 45 | \value{ 46 | \code{batcoord} returns a list with the components \code{ev, rev, 47 | units, proj}. \code{batvarcoord} returns a list with the components 48 | \code{ev, rev, units, proj, W, S1, S2}. 49 | \item{ev}{vector of eigenvalues. If \code{dom="mean"}, then first eigenvalue 50 | from \code{\link{discrcoord}}. Further eigenvalues are of 51 | \eqn{S_1^{-1}S_2}, where \eqn{S_i} is the covariance matrix of class 52 | i. For \code{batvarcoord} or 53 | if \code{dom="variance"}, all eigenvalues come from 54 | \eqn{S_1^{-1}S_2} and are ordered by \code{rev}.} 55 | \item{rev}{for \code{batcoord}: 56 | vector of projected Bhattacharyya distances (Fukunaga 57 | (1990), p. 99). Determine quality of the projection coordinates. 58 | For \code{batvarcoord}: vector of amount of projected difference in 59 | variances.} 60 | \item{units}{columns are coordinates of projection basis vectors. 61 | New points \code{x} can be projected onto the projection basis vectors 62 | by \code{x \%*\% units}.} 63 | \item{proj}{projections of \code{xd} onto \code{units}.} 64 | 65 | \item{W}{matrix \eqn{S_1^{-1}S_2}.} 66 | \item{S1}{covariance matrix of the first class.} 67 | \item{S2}{covariance matrix of the second class.} 68 | } 69 | \references{ 70 | Fukunaga, K. (1990). \emph{Introduction to Statistical Pattern 71 | Recognition} (2nd ed.). Boston: Academic Press. 72 | } 73 | \author{Christian Hennig 74 | \email{christian.hennig@unibo.it} 75 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 76 | 77 | \seealso{ 78 | \code{\link{plotcluster}} for straight forward discriminant plots. 79 | 80 | \code{\link{discrcoord}} for discriminant coordinates. 81 | 82 | \code{\link{rFace}} for generation of the example data used below. 83 | } 84 | 85 | \examples{ 86 | set.seed(4634) 87 | face <- rFace(600,dMoNo=2,dNoEy=0) 88 | grface <- as.integer(attr(face,"grouping")) 89 | bcf2 <- batcoord(face,grface==2) 90 | plot(bcf2$proj,col=1+(grface==2)) 91 | bcfv2 <- batcoord(face,grface==2,dom="variance") 92 | plot(bcfv2$proj,col=1+(grface==2)) 93 | bcfvv2 <- batvarcoord(face,grface==2) 94 | plot(bcfvv2$proj,col=1+(grface==2)) 95 | } 96 | \keyword{multivariate}% at least one, from doc/KEYWORDS 97 | \keyword{classif}% __ONLY ONE__ keyword per line 98 | 99 | -------------------------------------------------------------------------------- /man/bhattacharyya.dist.Rd: -------------------------------------------------------------------------------- 1 | \name{bhattacharyya.dist} 2 | \alias{bhattacharyya.dist} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Bhattacharyya distance between Gaussian distributions} 5 | \description{ 6 | Computes Bhattacharyya distance between two multivariate 7 | Gaussian distributions. See Fukunaga (1990). 8 | } 9 | \usage{ 10 | bhattacharyya.dist(mu1, mu2, Sigma1, Sigma2) 11 | } 12 | %- maybe also `usage' for other objects documented here. 13 | \arguments{ 14 | \item{mu1}{mean vector of component 1.} 15 | \item{mu2}{mean vector of component 2.} 16 | \item{Sigma1}{covariance matrix of component 1.} 17 | \item{Sigma2}{covariance matrix of component 2.} 18 | } 19 | 20 | \value{ 21 | The Bhattacharyya distance between the two Gaussian distributions. 22 | } 23 | 24 | \references{ 25 | Fukunaga, K. (1990) \emph{Introduction to Statistical Pattern 26 | Recognition}, 2nd edition, Academic 27 | Press, New York. 28 | 29 | Hennig, C. (2010) Methods for merging Gaussian mixture components, 30 | \emph{Advances in Data Analysis and Classification}, 4, 3-34. 31 | } 32 | \note{ 33 | Thanks to David Pinto for improving this function. 34 | } 35 | \author{Christian Hennig 36 | \email{christian.hennig@unibo.it} 37 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 38 | } 39 | \examples{ 40 | round(bhattacharyya.dist(c(1,1),c(2,5),diag(2),diag(2)),digits=2) 41 | } 42 | \keyword{multivariate} 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /man/bhattacharyya.matrix.Rd: -------------------------------------------------------------------------------- 1 | \name{bhattacharyya.matrix} 2 | \alias{bhattacharyya.matrix} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Matrix of pairwise Bhattacharyya distances} 5 | \description{ 6 | Computes Bhattachryya distances for pairs of components 7 | given the parameters of a Gaussian mixture. 8 | } 9 | \usage{ 10 | bhattacharyya.matrix(muarray,Sigmaarray,ipairs="all", 11 | misclassification.bound=TRUE) 12 | } 13 | %- maybe also `usage' for other objects documented here. 14 | \arguments{ 15 | \item{muarray}{matrix of component means (different components are in 16 | different columns).} 17 | \item{Sigmaarray}{three dimensional array with component covariance 18 | matrices (the third dimension refers to components).} 19 | \item{ipairs}{\code{"all"} or list of vectors of two integers. If 20 | \code{ipairs="all"}, computations are carried out for all pairs of 21 | components. Otherwise, ipairs gives the pairs of components for 22 | which computations are carried out.} 23 | \item{misclassification.bound}{logical. If \code{TRUE}, upper bounds 24 | for misclassification probabilities \code{exp(-b)} 25 | are given out instead of the original Bhattacharyya distances \code{b}.} 26 | } 27 | 28 | \value{ 29 | A matrix with Bhattacharyya distances (or derived misclassification 30 | bounds, see above) between pairs of Gaussian distributions with the 31 | provided parameters. If \code{ipairs!="all"}, the Bhattacharyya 32 | distance and the misclassification bound are given as \code{NA} for 33 | pairs not included in \code{ipairs}. 34 | } 35 | 36 | \references{ 37 | Fukunaga, K. (1990) \emph{Introduction to Statistical Pattern 38 | Recognition}, 2nd edition, Academic 39 | Press, New York. 40 | 41 | Hennig, C. (2010) Methods for merging Gaussian mixture components, 42 | \emph{Advances in Data Analysis and Classification}, 4, 3-34. 43 | } 44 | \author{Christian Hennig 45 | \email{christian.hennig@unibo.it} 46 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 47 | } 48 | 49 | \seealso{ 50 | \code{\link{bhattacharyya.dist}} 51 | } 52 | 53 | \examples{ 54 | muarray <-cbind(c(0,0),c(0,0.1),c(10,10)) 55 | sigmaarray <- array(c(diag(2),diag(2),diag(2)),dim=c(2,2,3)) 56 | bhattacharyya.matrix(muarray,sigmaarray,ipairs=list(c(1,2),c(2,3))) 57 | 58 | } 59 | \keyword{cluster}% at least one, from doc/KEYWORDS 60 | \keyword{multivariate} 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /man/calinhara.Rd: -------------------------------------------------------------------------------- 1 | \name{calinhara} 2 | \alias{calinhara} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Calinski-Harabasz index} 5 | \description{ 6 | Calinski-Harabasz index for estimating the number of clusters, 7 | based on an observations/variables-matrix here. A distance based 8 | version is available through \code{cluster.stats}. 9 | } 10 | \usage{ 11 | calinhara(x,clustering,cn=max(clustering)) 12 | } 13 | %- maybe also `usage' for other objects documented here. 14 | \arguments{ 15 | \item{x}{data matrix or data frame.} 16 | \item{clustering}{vector of integers. Clustering.} 17 | \item{cn}{integer. Number of clusters.} 18 | } 19 | 20 | \value{ 21 | Calinski-Harabasz statistic, which is 22 | \code{(n-cn)*sum(diag(B))/((cn-1)*sum(diag(W)))}. B being the 23 | between-cluster means, 24 | and W being the within-clusters covariance matrix. 25 | } 26 | 27 | \references{ 28 | Calinski, T., and Harabasz, J. (1974) A Dendrite Method for Cluster 29 | Analysis, \emph{Communications in Statistics}, 3, 1-27. 30 | } 31 | 32 | \author{Christian Hennig 33 | \email{christian.hennig@unibo.it} 34 | \url{https://www.unibo.it/sitoweb/christian.hennig/en}} 35 | 36 | \seealso{\code{\link{cluster.stats}}} 37 | 38 | \examples{ 39 | set.seed(98765) 40 | iriss <- iris[sample(150,20),-5] 41 | km <- kmeans(iriss,3) 42 | round(calinhara(iriss,km$cluster),digits=2) 43 | } 44 | 45 | 46 | 47 | \keyword{cluster}% __ONLY ONE__ keyword per line 48 | -------------------------------------------------------------------------------- /man/can.Rd: -------------------------------------------------------------------------------- 1 | \name{can} 2 | \alias{can} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Generation of the tuning constant for regression fixed point clusters} 5 | \description{ 6 | Generates tuning constants \code{ca} 7 | for \code{\link{fixreg}} dependent on 8 | the number of points and variables of the dataset. 9 | 10 | Only thought for use in \code{\link{fixreg}}. 11 | 12 | } 13 | \usage{ 14 | can(n, p) 15 | } 16 | %- maybe also `usage' for other objects documented here. 17 | \arguments{ 18 | \item{n}{positive integer. Number of points.} 19 | \item{p}{positive integer. Number of independent variables.} 20 | } 21 | \details{ 22 | The formula is 23 | \eqn{3+33/(n*2^{-(p-1)/2})^{1/3}+2900000/(n*2^{-(p-1)/2})^3}. For 24 | justification cf. Hennig (2002). 25 | } 26 | \value{ 27 | A number. 28 | } 29 | \references{ 30 | Hennig, C. (2002) Fixed point clusters for linear regression: 31 | computation and comparison, \emph{Journal of 32 | Classification} 19, 249-276. 33 | } 34 | 35 | \author{Christian Hennig 36 | \email{christian.hennig@unibo.it} 37 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 38 | 39 | \seealso{\code{\link{fixreg}}} 40 | 41 | \examples{ 42 | can(429,3) 43 | } 44 | \keyword{arith}% at least one, from doc/KEYWORDS 45 | 46 | -------------------------------------------------------------------------------- /man/cat2bin.Rd: -------------------------------------------------------------------------------- 1 | \name{cat2bin} 2 | \alias{cat2bin} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Recode nominal variables to binary variables} 5 | \description{ 6 | Recodes a dataset with nominal variables so that the nominal 7 | variables are replaced by binary variables for the categories. 8 | } 9 | \usage{ 10 | cat2bin(x,categorical=NULL) 11 | } 12 | %- maybe also `usage' for other objects documented here. 13 | \arguments{ 14 | \item{x}{data matrix or data frame. The data need to be organised 15 | case-wise, i.e., if there are categorical variables only, and 15 16 | cases with values c(1,1,2) on the 3 variables, the data matrix needs 17 | 15 rows with values 1 1 2. (Categorical variables could take numbers 18 | or strings or anything that can be coerced to factor levels as values.)} 19 | \item{categorical}{vector of numbers of variables to be recoded.} 20 | } 21 | 22 | \value{ 23 | A list with components 24 | \item{data}{data matrix with variables specified in \code{categorical} 25 | replaced by 0-1 variables, one for each category.} 26 | \item{variableinfo}{list of lists. One list for every variable in the 27 | original dataset, with four components each, namely \code{type} 28 | (\code{"categorical"} or \code{"not recoded"}), \code{levels} 29 | (levels of nominal recoded variables in order of binary variable in 30 | output dataset), \code{ncat} (number of categories for recoded 31 | variables), \code{varnum} (number of variables in output dataset 32 | belonging to this original variable).} 33 | } 34 | 35 | \author{Christian Hennig 36 | \email{christian.hennig@unibo.it} 37 | \url{https://www.unibo.it/sitoweb/christian.hennig/en}} 38 | 39 | \seealso{\code{\link{discrete.recode}}} 40 | 41 | \examples{ 42 | set.seed(776655) 43 | v1 <- rnorm(20) 44 | v2 <- rnorm(20) 45 | d1 <- sample(1:5,20,replace=TRUE) 46 | d2 <- sample(1:4,20,replace=TRUE) 47 | ldata <-cbind(v1,v2,d1,d2) 48 | lc <- cat2bin(ldata,categorical=3:4) 49 | } 50 | 51 | 52 | 53 | \keyword{manip}% __ONLY ONE__ keyword per line 54 | -------------------------------------------------------------------------------- /man/cdbw.Rd: -------------------------------------------------------------------------------- 1 | \name{cdbw} 2 | \alias{cdbw} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{CDbw-index for cluster validation} 5 | \description{ 6 | CDbw-index for cluster validation, as defined in Halkidi and 7 | Vazirgiannis (2008), Halkidi et al. (2015). 8 | } 9 | \usage{ 10 | cdbw(x,clustering,r=10,s=seq(0.1,0.8,by=0.1), 11 | clusterstdev=TRUE,trace=FALSE) 12 | 13 | } 14 | %- maybe also `usage' for other objects documented here. 15 | \arguments{ 16 | \item{x}{something that can be coerced into a numerical 17 | matrix. Euclidean dataset.} 18 | \item{clustering}{vector of integers with length \code{=nrow(x)}; 19 | indicating the cluster for each observation.} 20 | \item{r}{integer. Number of cluster border representatives.} 21 | \item{s}{numerical vector of shrinking factors (between 0 and 1).} 22 | \item{clusterstdev}{logical. If \code{TRUE}, the neighborhood radius 23 | for intra-cluster density is the within-cluster estimated squared 24 | distance from the mean of the cluster; otherwise it is the average of 25 | these over all clusters.} 26 | \item{trace}{logical. If \code{TRUE}, results are printed for the 27 | steps to compute the index.} 28 | } 29 | 30 | \value{ 31 | List with components (see Halkidi and Vazirgiannis (2008), Halkidi et 32 | al. (2015) for details) 33 | \item{cdbw}{value of CDbw index (the higher the better).} 34 | \item{cohesion}{cohesion.} 35 | \item{compactness}{compactness.} 36 | \item{sep}{separation.} 37 | } 38 | 39 | 40 | \references{ 41 | Halkidi, M. and Vazirgiannis, M. (2008) A density-based cluster 42 | validity approach using multi-representatives. \emph{Pattern 43 | Recognition Letters} 29, 773-786. 44 | 45 | Halkidi, M., Vazirgiannis, M. and Hennig, C. (2015) Method-independent 46 | indices for cluster validation. In C. Hennig, M. Meila, F. Murtagh, 47 | R. Rocci (eds.) \emph{Handbook of Cluster Analysis}, CRC 48 | Press/Taylor \code{&} Francis, Boca Raton. 49 | } 50 | 51 | \author{Christian Hennig 52 | \email{christian.hennig@unibo.it} 53 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 54 | } 55 | \examples{ 56 | options(digits=3) 57 | iriss <- as.matrix(iris[c(1:5,51:55,101:105),-5]) 58 | irisc <- as.numeric(iris[c(1:5,51:55,101:105),5]) 59 | cdbw(iriss,irisc) 60 | } 61 | \keyword{cluster}% at least one, from doc/KEYWORDS 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /man/cgrestandard.Rd: -------------------------------------------------------------------------------- 1 | \name{cgrestandard} 2 | \alias{cgrestandard} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Standardise cluster validation statistics by random clustering results} 5 | \description{ 6 | Standardises cluster validity statistics as produced by 7 | \code{\link{clustatsum}} relative to results that were achieved by 8 | random clusterings on the same data by 9 | \code{\link{randomclustersim}}. The aim is to make differences between 10 | values comparable between indexes, see Hennig (2019), Akhanli and 11 | Hennig (2020). 12 | 13 | This is mainly for use within \code{\link{clusterbenchstats}}. 14 | } 15 | \usage{ 16 | cgrestandard(clusum,clusim,G,percentage=FALSE, 17 | useallmethods=FALSE, 18 | useallg=FALSE, othernc=list()) 19 | } 20 | %- maybe also `usage' for other objects documented here. 21 | \arguments{ 22 | \item{clusum}{object of class "valstat", see \code{\link{clusterbenchstats}}.} 23 | \item{clusim}{list; output object of \code{\link{randomclustersim}}, 24 | see there.} 25 | \item{G}{vector of integers. Numbers of clusters to consider.} 26 | \item{percentage}{logical. If \code{FALSE}, standardisation is done to 27 | mean zero and standard deviation 1 using the random clusterings. If 28 | \code{TRUE}, the output is the percentage of simulated values below 29 | the result (more precisely, this number plus one divided by the 30 | total plus one).} 31 | \item{useallmethods}{logical. If \code{FALSE}, only random clustering 32 | results from \code{clusim} are used for standardisation. If 33 | \code{TRUE}, also clustering results from other methods as given in 34 | \code{clusum} are used.} 35 | \item{useallg}{logical. If \code{TRUE}, standardisation uses results 36 | from all numbers of clusters in \code{G}. If \code{FALSE}, 37 | standardisation of results for a specific number of cluster only 38 | uses results from that number of clusters.} 39 | \item{othernc}{list of integer vectors of length 2. This allows the 40 | incorporation of methods that bring forth other numbers of clusters 41 | than those in \code{G}, for example because a method may have 42 | automatically estimated a number of clusters. The first number is 43 | the number of the clustering method (the order is determined by 44 | argument \code{clustermethod} in 45 | \code{\link{clusterbenchstats}}), the second number is the 46 | number of clusters. Results specified here are only standardised in 47 | \code{useallg=TRUE}.} 48 | } 49 | 50 | \details{ 51 | \code{cgrestandard} will add a statistic named \code{dmode} to the 52 | input set of validation statistics, which is defined as 53 | \code{0.75*dindex+0.25*highdgap}, aggregating these two closely 54 | related statistics, see \code{\link{clustatsum}}. 55 | } 56 | 57 | \value{ 58 | List of class \code{"valstat"}, see 59 | \code{\link{valstat.object}}, with standardised results as 60 | explained above. 61 | } 62 | \references{ 63 | Hennig, C. (2019) Cluster validation by measurement of clustering 64 | characteristics relevant to the user. In C. H. Skiadas (ed.) 65 | \emph{Data Analysis and Applications 1: Clustering and Regression, 66 | Modeling-estimating, Forecasting and Data Mining, Volume 2}, Wiley, 67 | New York 1-24, 68 | \url{https://arxiv.org/abs/1703.09282} 69 | 70 | Akhanli, S. and Hennig, C. (2020) Calibrating and aggregating cluster 71 | validity indexes for context-adapted comparison of clusterings. 72 | \emph{Statistics and Computing}, 30, 1523-1544, 73 | \url{https://link.springer.com/article/10.1007/s11222-020-09958-2}, \url{https://arxiv.org/abs/2002.01822} 74 | 75 | 76 | } 77 | \author{Christian Hennig 78 | \email{christian.hennig@unibo.it} 79 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 80 | } 81 | 82 | \seealso{ 83 | \code{\link{valstat.object}}, \code{\link{clusterbenchstats}}, \code{\link{stupidkcentroids}}, \code{\link{stupidknn}}, \code{\link{stupidkfn}}, \code{\link{stupidkaven}}, \code{\link{clustatsum}} 84 | } 85 | 86 | \examples{ 87 | 88 | set.seed(20000) 89 | options(digits=3) 90 | face <- rFace(10,dMoNo=2,dNoEy=0,p=2) 91 | dif <- dist(face) 92 | clusum <- list() 93 | clusum[[2]] <- list() 94 | cl12 <- kmeansCBI(face,2) 95 | cl13 <- kmeansCBI(face,3) 96 | cl22 <- claraCBI(face,2) 97 | cl23 <- claraCBI(face,2) 98 | ccl12 <- clustatsum(dif,cl12$partition) 99 | ccl13 <- clustatsum(dif,cl13$partition) 100 | ccl22 <- clustatsum(dif,cl22$partition) 101 | ccl23 <- clustatsum(dif,cl23$partition) 102 | clusum[[1]] <- list() 103 | clusum[[1]][[2]] <- ccl12 104 | clusum[[1]][[3]] <- ccl13 105 | clusum[[2]][[2]] <- ccl22 106 | clusum[[2]][[3]] <- ccl23 107 | clusum$maxG <- 3 108 | clusum$minG <- 2 109 | clusum$method <- c("kmeansCBI","claraCBI") 110 | clusum$name <- c("kmeansCBI","claraCBI") 111 | clusim <- randomclustersim(dist(face),G=2:3,nnruns=1,kmruns=1, 112 | fnruns=1,avenruns=1,monitor=FALSE) 113 | cgr <- cgrestandard(clusum,clusim,2:3) 114 | cgr2 <- cgrestandard(clusum,clusim,2:3,useallg=TRUE) 115 | cgr3 <- cgrestandard(clusum,clusim,2:3,percentage=TRUE) 116 | print(str(cgr)) 117 | print(str(cgr2)) 118 | print(cgr3[[1]][[2]]) 119 | } 120 | \keyword{multivariate}% at least one, from doc/KEYWORDS 121 | \keyword{cluster}% __ONLY ONE__ keyword per line 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /man/classifdist.Rd: -------------------------------------------------------------------------------- 1 | \name{classifdist} 2 | \alias{classifdist} 3 | \alias{classifnp} 4 | %- Also NEED an `\alias' for EACH other topic documented here. 5 | \title{Classification of unclustered points} 6 | \description{ 7 | Various methods for classification of unclustered points from 8 | clustered points for use within functions \code{nselectboot} 9 | and \code{prediction.strength}. 10 | } 11 | \usage{ 12 | classifdist(cdist,clustering, 13 | method="averagedist", 14 | centroids=NULL,nnk=1) 15 | 16 | classifnp(data,clustering, 17 | method="centroid",cdist=NULL, 18 | centroids=NULL,nnk=1) 19 | } 20 | 21 | %- maybe also `usage' for other objects documented here. 22 | \arguments{ 23 | \item{cdist}{dissimilarity matrix or \code{dist}-object. Necessary for 24 | \code{classifdist} but optional for \code{classifnp} and there only 25 | used if \code{method="averagedist"} (if not provided, \code{dist} is 26 | applied to \code{data}).} 27 | \item{data}{something that can be coerced into a an 28 | \code{n*p}-data matrix.} 29 | \item{clustering}{integer vector. Gives the cluster number (between 1 30 | and k for k clusters) for clustered points and should be -1 for 31 | points to be classified.} 32 | \item{method}{one of \code{"averagedist", "centroid", "qda", 33 | "knn"}. See details.} 34 | \item{centroids}{for \code{classifnp} a k times p matrix of cluster 35 | centroids. For \code{classifdist} a vector of numbers of centroid 36 | objects as provided by \code{\link[cluster]{pam}}. Only used if 37 | \code{method="centroid"}; in that case mandatory for 38 | \code{classifdist} but optional for \code{classifnp}, where cluster mean 39 | vectors are computed if \code{centroids=NULL}.} 40 | \item{nnk}{number of nearest neighbours if \code{method="knn"}.} 41 | } 42 | 43 | \details{ 44 | \code{classifdist} is for data given as dissimilarity matrix, 45 | \code{classifnp} is for data given as n times p data matrix. 46 | The following methods are supported: 47 | \describe{ 48 | \item{"centroid"}{assigns observations to the cluster with closest 49 | cluster centroid as specified in argument \code{centroids} (this 50 | is associated to k-means and pam/clara-clustering).} 51 | \item{"qda"}{only in \code{classifnp}. Classifies by quadratic 52 | discriminant analysis (this is associated to Gaussian clusters 53 | with flexible covariance matrices), calling 54 | \code{\link[MASS]{qda}} with default settings. If 55 | \code{\link[MASS]{qda}} gives an error (usually because a class 56 | was too small), \code{\link[MASS]{lda}} is used.} 57 | \item{"lda"}{only in \code{classifnp}. Classifies by linear 58 | discriminant analysis (this is associated to Gaussian clusters 59 | with equal covariance matrices), calling 60 | \code{\link[MASS]{lda}} with default settings.} 61 | \item{"averagedist"}{assigns to the cluster to which an observation 62 | has the minimum average dissimilarity to all points in the cluster 63 | (this is associated with average linkage clustering).} 64 | \item{"knn"}{classifies by \code{nnk} nearest neighbours (for 65 | \code{nnk=1}, this is associated with single linkage clustering). 66 | Calls \code{\link[class]{knn}} in \code{classifnp}.} 67 | \item{"fn"}{classifies by the minimum distance to the farthest 68 | neighbour. This is associated with complete linkage clustering).} 69 | } 70 | } 71 | 72 | \value{ 73 | An integer vector giving cluster numbers for all observations; those 74 | for the observations already clustered in the input are the same as in 75 | the input. 76 | } 77 | 78 | \author{Christian Hennig 79 | \email{christian.hennig@unibo.it} 80 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 81 | } 82 | \seealso{ 83 | \code{\link{prediction.strength}}, \code{\link{nselectboot}} 84 | } 85 | \examples{ 86 | set.seed(20000) 87 | x1 <- rnorm(50) 88 | y <- rnorm(100) 89 | x2 <- rnorm(40,mean=20) 90 | x3 <- rnorm(10,mean=25,sd=100) 91 | x <-cbind(c(x1,x2,x3),y) 92 | truec <- c(rep(1,50),rep(2,40),rep(3,10)) 93 | topredict <- c(1,2,51,52,91) 94 | clumin <- truec 95 | clumin[topredict] <- -1 96 | 97 | classifnp(x,clumin, method="averagedist") 98 | classifnp(x,clumin, method="qda") 99 | classifdist(dist(x),clumin, centroids=c(3,53,93),method="centroid") 100 | classifdist(dist(x),clumin,method="knn") 101 | 102 | } 103 | \keyword{cluster}% at least one, from doc/KEYWORDS 104 | \keyword{multivariate} 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /man/clucols.Rd: -------------------------------------------------------------------------------- 1 | \name{clucols} 2 | \alias{clucols} 3 | \alias{clugrey} 4 | \alias{clusym} 5 | %- Also NEED an `\alias' for EACH other topic documented here. 6 | \title{Sets of colours and symbols for cluster plotting} 7 | \description{ 8 | \code{clucols} gives out a vector of different random colours. 9 | \code{clugrey} gives out a vector of equidistant grey scales. 10 | \code{clusym} is a vector of different symbols starting from "1", 11 | "2",... 12 | } 13 | \usage{ 14 | clucols(i, seed=NULL) 15 | clugrey(i,max=0.9) 16 | clusym 17 | } 18 | %- maybe also `usage' for other objects documented here. 19 | \arguments{ 20 | \item{i}{integer. Length of output vector (number of clusters).} 21 | \item{seed}{integer. Random seed.} 22 | \item{max}{between 0 and 1. Maximum grey scale value, see 23 | \code{\link{grey}} (close to 1 is bright).} 24 | } 25 | 26 | \value{ 27 | \code{clucols} gives out a vector of different random colours. 28 | \code{clugrey} gives out a vector of equidistant grey scales. 29 | \code{clusym} is a vector of different characters starting from "1", 30 | "2",... 31 | } 32 | 33 | \author{Christian Hennig 34 | \email{christian.hennig@unibo.it} 35 | \url{https://www.unibo.it/sitoweb/christian.hennig/en}} 36 | 37 | \examples{ 38 | set.seed(112233) 39 | require(MASS) 40 | require(flexmix) 41 | data(Cars93) 42 | Cars934 <- Cars93[,c(3,5,8,10)] 43 | cc <- 44 | discrete.recode(Cars934,xvarsorted=FALSE,continuous=c(2,3),discrete=c(1,4)) 45 | fcc <- flexmix(cc$data~1,k=3, 46 | model=lcmixed(continuous=2,discrete=2,ppdim=c(6,3),diagonal=TRUE)) 47 | plot(Cars934[,c(2,3)],col=clucols(3)[fcc@cluster],pch=clusym[fcc@cluster]) 48 | } 49 | 50 | \keyword{cluster}% __ONLY ONE__ keyword per line 51 | -------------------------------------------------------------------------------- /man/clujaccard.Rd: -------------------------------------------------------------------------------- 1 | \name{clujaccard} 2 | \alias{clujaccard} 3 | 4 | %- Also NEED an `\alias' for EACH other topic documented here. 5 | \title{Jaccard similarity between logical vectors} 6 | \description{ 7 | Jaccard similarity between logical or 0-1 vectors: 8 | \code{sum(c1 & c2)/sum(c1 | c2)}. 9 | } 10 | \usage{ 11 | clujaccard(c1,c2,zerobyzero=NA) 12 | } 13 | \arguments{ 14 | \item{c1}{logical or 0-1-vector.} 15 | \item{c2}{logical or 0-1-vector (same length).} 16 | \item{zerobyzero}{result if \code{sum(c1 | c2)=0}.} 17 | } 18 | 19 | \value{ 20 | Numeric between 0 and 1. 21 | } 22 | \author{Christian Hennig 23 | \email{christian.hennig@unibo.it} 24 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 25 | } 26 | \examples{ 27 | c1 <- rep(TRUE,10) 28 | c2 <- c(FALSE,rep(TRUE,9)) 29 | clujaccard(c1,c2) 30 | } 31 | \keyword{cluster}% at least one, from doc/KEYWORDS 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /man/clusexpect.Rd: -------------------------------------------------------------------------------- 1 | \name{clusexpect} 2 | \alias{clusexpect} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Expected value of the number of times a fixed point 5 | cluster is found} 6 | \description{ 7 | A rough approximation of the expectation of the number of times a well 8 | separated fixed point 9 | cluster (FPC) of size \code{n} is found in \code{ir} fixed point 10 | iterations of \code{\link{fixreg}}. 11 | } 12 | \usage{ 13 | clusexpect(n, p, cn, ir) 14 | } 15 | %- maybe also `usage' for other objects documented here. 16 | \arguments{ 17 | \item{n}{positive integer. Total number of points.} 18 | \item{p}{positive integer. Number of independent variables.} 19 | \item{cn}{positive integer smaller or equal to \code{n}. 20 | Size of the FPC.} 21 | \item{ir}{positive integer. Number of fixed point iterations.} 22 | } 23 | \details{ 24 | The approximation is based on the assumption that a well separated FPC 25 | is found iff all \code{p+2} points of the initial coinfiguration come 26 | from the FPC. The value is \code{ir} times the probability for 27 | this. For a discussion of this assumption cf. Hennig (2002). 28 | } 29 | \value{ 30 | A number. 31 | } 32 | 33 | \references{ 34 | Hennig, C. (2002) Fixed point clusters for linear regression: 35 | computation and comparison, \emph{Journal of 36 | Classification} 19, 249-276. 37 | } 38 | 39 | \author{Christian Hennig 40 | \email{christian.hennig@unibo.it} 41 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 42 | 43 | \seealso{\code{\link{fixreg}}} 44 | 45 | \examples{ 46 | round(clusexpect(500,4,150,2000),digits=2) 47 | } 48 | \keyword{univar}% at least one, from doc/KEYWORDS 49 | \keyword{cluster} 50 | -------------------------------------------------------------------------------- /man/cluster.magazine.Rd: -------------------------------------------------------------------------------- 1 | \name{cluster.magazine} 2 | \alias{cluster.magazine} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Run many clustering methods on many numbers of clusters} 5 | \description{ 6 | Runs a user-specified set of clustering methods (CBI-functions, see 7 | \code{\link{kmeansCBI}} with several numbers of clusters on a dataset 8 | with unified output. 9 | } 10 | \usage{ 11 | cluster.magazine(data,G,diss = inherits(data, "dist"), 12 | scaling=TRUE, clustermethod, 13 | distmethod=rep(TRUE,length(clustermethod)), 14 | ncinput=rep(TRUE,length(clustermethod)), 15 | clustermethodpars, 16 | trace=TRUE) 17 | 18 | } 19 | %- maybe also `usage' for other objects documented here. 20 | \arguments{ 21 | \item{data}{data matrix or \code{dist}-object.} 22 | \item{G}{vector of integers. Numbers of clusters to consider.} 23 | \item{diss}{logical. If \code{TRUE}, the data matrix is assumed to be 24 | a distance/dissimilariy matrix, otherwise it's observations times 25 | variables.} 26 | \item{scaling}{either a logical or a numeric vector of length equal to 27 | the number of columns of \code{data}. If \code{FALSE}, data won't be 28 | scaled, otherwise \code{scaling} is passed on to \code{\link{scale}} as 29 | argument\code{scale}.} 30 | \item{clustermethod}{vector of strings specifying names of 31 | CBI-functions (see \code{\link{kmeansCBI}}). These are the 32 | clustering methods to be applied.} 33 | \item{distmethod}{vector of logicals, of the same length as 34 | \code{clustermethod}. \code{TRUE} means that the clustering method 35 | operates on distances. If \code{diss=TRUE}, all entries have to be 36 | \code{TRUE}. Otherwise, if an entry is true, the corresponding 37 | method will be applied on \code{dist(data)}.} 38 | \item{ncinput}{vector of logicals, of the same length as 39 | \code{clustermethod}. \code{TRUE} indicates that the corresponding 40 | clustering method requires the number of clusters as input and will 41 | not estimate the number of clusters itself.} 42 | \item{clustermethodpars}{list of the same length as 43 | \code{clustermethod}. Specifies parameters for all involved 44 | clustering methods. Its jth entry is passed to clustermethod number 45 | k. Can be an empty entry in case all defaults are used for a 46 | clustering method. The number of clusters does not need to be 47 | specified here.} 48 | \item{trace}{logical. If \code{TRUE}, some runtime information is 49 | printed.} 50 | } 51 | 52 | % \details{ 53 | % } 54 | \value{ 55 | List of lists comprising 56 | \item{output}{Two-dimensional list. The first list index i is the number 57 | of the clustering method (ordering as specified in 58 | \code{clustermethod}), the second list index j is the number of 59 | clusters. This stores the full output of clustermethod i run on 60 | number of clusters j.} 61 | \item{clustering}{Two-dimensional list. The first list index i is the number 62 | of the clustering method (ordering as specified in 63 | \code{clustermethod}), the second list index j is the number of 64 | clusters. This stores the clustering integer vector (i.e., the 65 | \code{partition}-component of the CBI-function, see 66 | \code{\link{kmeansCBI}}) of clustermethod i run on 67 | number of clusters j.} 68 | \item{noise}{Two-dimensional list. The first list index i is the number 69 | of the clustering method (ordering as specified in 70 | \code{clustermethod}), the second list index j is the number of 71 | clusters. List entries are single logicals. If \code{TRUE}, the 72 | clustering method estimated some noise, i.e., points not belonging 73 | to any cluster, which in the clustering vector are indicated by the 74 | highest number (number of clusters plus one in case that the number 75 | of clusters was fixed).} 76 | \item{othernc}{list of integer vectors of length 2. The first number is 77 | the number of the clustering method (the order is determined by 78 | argument \code{clustermethod}), the second number is the 79 | number of clusters for those methods that estimate the number of 80 | clusters themselves and estimate a number that is smaller than 81 | \code{min(G)} or larger than \code{max(G)}.} 82 | } 83 | \references{ 84 | Hennig, C. (2017) Cluster validation by measurement of clustering 85 | characteristics relevant to the user. In C. H. Skiadas (ed.) 86 | \emph{Proceedings of ASMDA 2017}, 501-520, 87 | \url{https://arxiv.org/abs/1703.09282} 88 | 89 | 90 | } 91 | \author{Christian Hennig 92 | \email{christian.hennig@unibo.it} 93 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 94 | } 95 | 96 | \seealso{ 97 | \code{\link{clusterbenchstats}}, \code{\link{kmeansCBI}} 98 | } 99 | 100 | \examples{ 101 | 102 | set.seed(20000) 103 | options(digits=3) 104 | face <- rFace(10,dMoNo=2,dNoEy=0,p=2) 105 | clustermethod=c("kmeansCBI","hclustCBI","hclustCBI") 106 | # A clustering method can be used more than once, with different 107 | # parameters 108 | clustermethodpars <- list() 109 | clustermethodpars[[2]] <- clustermethodpars[[3]] <- list() 110 | clustermethodpars[[2]]$method <- "complete" 111 | clustermethodpars[[3]]$method <- "average" 112 | cmf <- cluster.magazine(face,G=2:3,clustermethod=clustermethod, 113 | distmethod=rep(FALSE,3),clustermethodpars=clustermethodpars) 114 | print(str(cmf)) 115 | 116 | } 117 | \keyword{multivariate}% at least one, from doc/KEYWORDS 118 | \keyword{cluster}% __ONLY ONE__ keyword per line 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /man/cluster.varstats.Rd: -------------------------------------------------------------------------------- 1 | \name{cluster.varstats} 2 | \alias{cluster.varstats} 3 | \alias{print.varwisetables} 4 | %- Also NEED an `\alias' for EACH other topic documented here. 5 | \title{Variablewise statistics for clusters} 6 | \description{ 7 | This function gives some helpful variable-wise information for cluster 8 | interpretation, given a clustering and a data set. The output object 9 | contains some tables. For categorical variables, tables compare 10 | clusterwise distributions with overall distributions. Continuous 11 | variables are categorised for this. 12 | 13 | If desired, tables, histograms, some standard statistics of 14 | continuous variables and validation plots as available through 15 | \code{\link{discrproj}} (Hennig 2004) are given out on the fly. 16 | } 17 | \usage{ 18 | cluster.varstats(clustering,vardata,contdata=vardata, 19 | clusterwise=TRUE, 20 | tablevar=NULL,catvar=NULL, 21 | quantvar=NULL, catvarcats=10, 22 | proportions=FALSE, 23 | projmethod="none",minsize=ncol(contdata)+2, 24 | ask=TRUE,rangefactor=1) 25 | 26 | \method{print}{varwisetables}(x,digits=3,...) 27 | } 28 | %- maybe also `usage' for other objects documented here. 29 | \arguments{ 30 | \item{clustering}{vector of integers. Clustering (needs to be in 31 | standard coding, 1,2,...).} 32 | \item{vardata}{data matrix or data frame of which variables are 33 | summarised.} 34 | \item{contdata}{variable matrix or data frame, normally all or some 35 | variables from \code{vardata}, on which cluster visualisation by 36 | projection methods is performed unless \code{projmethod="none"}. It 37 | should make sense to interpret these variables in a quantitative 38 | (interval-scaled) way.} 39 | \item{clusterwise}{logical. If \code{FALSE}, only the output tables 40 | are computed but no more detail and graphs are given on the fly.} 41 | \item{tablevar}{vector of integers. Numbers of variables treated as 42 | categorical (i.e., no histograms and statistics, just tables) if 43 | \code{clusterwise=TRUE}. Note 44 | that an error will be produced by factor type variables unless they 45 | are declared as categorical here.} 46 | \item{catvar}{vector of integers. Numbers of variables to be 47 | categorised by proportional quantiles for table computation. 48 | Recommended for all continuous variables.} 49 | \item{quantvar}{vector of integers. Variables for which means, 50 | standard deviations and quantiles should be given out if 51 | \code{clusterwise=TRUE}.} 52 | \item{catvarcats}{integer. Number of categories used for 53 | categorisation of variables specified in \code{quantvar}.} 54 | \item{proportions}{logical. If \code{TRUE}, output tables contain 55 | proportions, otherwise numbers of observations.} 56 | \item{projmethod}{one of \code{"none"}, \code{"dc"}, \code{"bc"}, 57 | \code{"vbc"}, \code{"mvdc"}, \code{"adc"}, \code{"awc"} (recommended 58 | if not \code{"none"}), \code{"arc"}, \code{"nc"}, \code{"wnc"}, 59 | \code{"anc"}. Cluster validation projection method introduced in 60 | Hennig (2004), passed on as \code{method} argument in 61 | \code{\link{discrproj}}.} 62 | \item{minsize}{integer. Projection is not carried out for clusters 63 | with fewer points than this. (If this is chosen smaller, it may lead 64 | to errors with some projection methods.)} 65 | \item{ask}{logical. If \code{TRUE}, \code{par(ask=TRUE)} is set in the 66 | beginning to prompt the user before plots and \code{par(ask=FALSE)} 67 | in the end.} 68 | \item{rangefactor}{numeric. Factor by which to multiply the range for 69 | projection plot ranges.} 70 | \item{x}{an object of class \code{"varwisetables"}, output object of 71 | \code{cluster.varstats}.} 72 | \item{digits}{integer. Number of digits after the decimal point to 73 | print out.} 74 | \item{...}{not used.} 75 | } 76 | 77 | \value{ 78 | An object of class \code{"varwisetables"}, which is a 79 | list with a table for each variable, giving (categorised) marginal 80 | distributions by cluster. 81 | } 82 | 83 | \references{ 84 | Hennig, C. (2004) Asymmetric linear dimension reduction for classification. 85 | Journal of Computational and Graphical Statistics 13, 930-945 . 86 | } 87 | 88 | \author{Christian Hennig 89 | \email{christian.hennig@unibo.it} 90 | \url{https://www.unibo.it/sitoweb/christian.hennig/en}} 91 | 92 | \examples{ 93 | set.seed(112233) 94 | options(digits=3) 95 | require(MASS) 96 | require(flexmix) 97 | data(Cars93) 98 | Cars934 <- Cars93[,c(3,5,8,10)] 99 | cc <- 100 | discrete.recode(Cars934,xvarsorted=FALSE,continuous=c(2,3),discrete=c(1,4)) 101 | fcc <- flexmix(cc$data~1,k=2, 102 | model=lcmixed(continuous=2,discrete=2,ppdim=c(6,3),diagonal=TRUE)) 103 | cv <- 104 | cluster.varstats(fcc@cluster,Cars934, contdata=Cars934[,c(2,3)], 105 | tablevar=c(1,4),catvar=c(2,3),quantvar=c(2,3),projmethod="awc", 106 | ask=FALSE) 107 | print(cv) 108 | } 109 | 110 | \keyword{cluster}% __ONLY ONE__ keyword per line 111 | -------------------------------------------------------------------------------- /man/cmahal.Rd: -------------------------------------------------------------------------------- 1 | \name{cmahal} 2 | \alias{cmahal} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Generation of tuning constant for Mahalanobis fixed point clusters.} 5 | \description{ 6 | Generates tuning constants \code{ca} 7 | for \code{\link{fixmahal}} dependent on 8 | the number of points and variables of the current fixed point cluster 9 | (FPC). 10 | 11 | This is experimental and only thought for use in \code{\link{fixmahal}}. 12 | } 13 | \usage{ 14 | cmahal(n, p, nmin, cmin, nc1, c1 = cmin, q = 1) 15 | } 16 | %- maybe also `usage' for other objects documented here. 17 | \arguments{ 18 | \item{n}{positive integer. Number of points.} 19 | \item{p}{positive integer. Number of variables.} 20 | \item{nmin}{integer larger than 1. Smallest number of points for which 21 | \code{ca} is computed. For smaller FPC sizes, \code{ca} is set to 22 | the value for \code{nmin}.} 23 | \item{cmin}{positive number. Minimum value for \code{ca}.} 24 | \item{nc1}{positive integer. Number of points at which \code{ca=c1}.} 25 | \item{c1}{positive numeric. Tuning constant for \code{cmahal}. 26 | Value for \code{ca} for FPC size equal to \code{nc1}.} 27 | \item{q}{numeric between 0 and 1. 1 for steepest possible descent of 28 | \code{ca} as function of the FPC size. Should presumably always be 1.} 29 | } 30 | \details{ 31 | Some experiments suggest that the tuning constant \code{ca} should 32 | decrease with increasing FPC size and increase with increasing 33 | \code{p} in \code{\link{fixmahal}}. This is to prevent too small 34 | meaningless FPCs while maintaining the significant larger 35 | ones. \code{cmahal} with \code{q=1} computes \code{ca} in such a way 36 | that as long as \code{ca>cmin}, the decrease in \code{n} is as steep 37 | as possible in order to maintain the validity of the convergence 38 | theorem in Hennig and Christlieb (2002). 39 | } 40 | \value{ 41 | A numeric vector of length \code{n}, giving the values for \code{ca} 42 | for all FPC sizes smaller or equal to \code{n}. 43 | } 44 | \references{ 45 | Hennig, C. and Christlieb, N. (2002) Validating visual clusters in 46 | large datasets: Fixed point clusters of spectral features, 47 | \emph{Computational Statistics and Data Analysis} 40, 723-739. 48 | } 49 | \author{Christian Hennig 50 | \email{christian.hennig@unibo.it} 51 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 52 | 53 | \seealso{\code{\link{fixmahal}}} 54 | 55 | \examples{ 56 | plot(1:100,cmahal(100,3,nmin=5,cmin=qchisq(0.99,3),nc1=90), 57 | xlab="FPC size", ylab="cmahal") 58 | } 59 | \keyword{cluster}% at least one, from doc/KEYWORDS 60 | 61 | -------------------------------------------------------------------------------- /man/concomp.Rd: -------------------------------------------------------------------------------- 1 | \name{con.comp} 2 | \alias{con.comp} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Connectivity components of an undirected graph} 5 | \description{ 6 | Computes the connectivity components of an undirected graph from a 7 | matrix giving the edges. 8 | } 9 | \usage{ 10 | con.comp(comat) 11 | } 12 | %- maybe also `usage' for other objects documented here. 13 | \arguments{ 14 | \item{comat}{a symmetric logical or 0-1 matrix, where \code{comat[i,j]=TRUE} 15 | means that there is an edge between vertices \code{i} and 16 | \code{j}. The diagonal is ignored.} 17 | } 18 | \details{ 19 | The "depth-first search" algorithm of Cormen, Leiserson and Rivest 20 | (1990, p. 477) is used. 21 | } 22 | \value{ 23 | An integer vector, giving the number of the connectivity component for 24 | each vertice. 25 | } 26 | \references{ 27 | Cormen, T. H., Leiserson, C. E. and Rivest, R. L. (1990), \emph{Introduction 28 | to Algorithms}, Cambridge: MIT Press. 29 | } 30 | 31 | \author{Christian Hennig 32 | \email{christian.hennig@unibo.it} 33 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 34 | } 35 | 36 | \seealso{ 37 | \code{\link{hclust}}, \code{\link{cutree}} for cutted single linkage 38 | trees (often equivalent). 39 | } 40 | 41 | \examples{ 42 | set.seed(1000) 43 | x <- rnorm(20) 44 | m <- matrix(0,nrow=20,ncol=20) 45 | for(i in 1:20) 46 | for(j in 1:20) 47 | m[i,j] <- abs(x[i]-x[j]) 48 | d <- m<0.2 49 | cc <- con.comp(d) 50 | max(cc) # number of connectivity components 51 | plot(x,cc) 52 | # The same should be produced by 53 | # cutree(hclust(as.dist(m),method="single"),h=0.2). 54 | } 55 | \keyword{array}% at least one, from doc/KEYWORDS 56 | \keyword{cluster}% __ONLY ONE__ keyword per line 57 | 58 | 59 | -------------------------------------------------------------------------------- /man/confusion.Rd: -------------------------------------------------------------------------------- 1 | \name{confusion} 2 | \alias{confusion} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Misclassification probabilities in mixtures} 5 | \description{ 6 | Estimates a misclassification probability in a mixture distribution 7 | between two mixture components from estimated posterior probabilities 8 | regardless of component parameters, see Hennig (2010). 9 | } 10 | \usage{ 11 | confusion(z,pro,i,j,adjustprobs=FALSE) 12 | } 13 | %- maybe also `usage' for other objects documented here. 14 | \arguments{ 15 | \item{z}{matrix of posterior probabilities for observations (rows) to 16 | belong to mixture components (columns), so entries need to sum up to 17 | 1 for each row.} 18 | \item{pro}{vector of component proportions, need to sum up to 1.} 19 | \item{i}{integer. Component number.} 20 | \item{j}{integer. Component number.} 21 | \item{adjustprobs}{logical. If \code{TRUE}, probabilities are 22 | initially standardised so that those for components \code{i} and 23 | \code{j} add up to one (i.e., if they were the only components).} 24 | } 25 | 26 | \value{ 27 | Estimated probability that an observation generated by component 28 | \code{j} is classified to component \code{i} by maximum a posteriori rule. 29 | } 30 | 31 | \references{ 32 | Hennig, C. (2010) Methods for merging Gaussian mixture components, 33 | \emph{Advances in Data Analysis and Classification}, 4, 3-34. 34 | } 35 | \author{Christian Hennig 36 | \email{christian.hennig@unibo.it} 37 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 38 | } 39 | \examples{ 40 | set.seed(12345) 41 | m <- rpois(20,lambda=5) 42 | dim(m) <- c(5,4) 43 | pro <- apply(m,2,sum) 44 | pro <- pro/sum(pro) 45 | m <- m/apply(m,1,sum) 46 | round(confusion(m,pro,1,2),digits=2) 47 | } 48 | \keyword{cluster}% at least one, from doc/KEYWORDS 49 | \keyword{multivariate} 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /man/cov.wml.Rd: -------------------------------------------------------------------------------- 1 | \name{cov.wml} 2 | \alias{cov.wml} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Weighted Covariance Matrices (Maximum Likelihood)} 5 | \description{ 6 | Returns a list containing estimates of the weighted covariance 7 | matrix and the mean of the data, and optionally of the (weighted) 8 | correlation matrix. The 9 | covariance matrix is divided by the sum of the weights, 10 | corresponding to \code{n} and the ML-estimator in the case of equal 11 | weights, as opposed to \code{n-1} for \code{\link{cov.wt}}. 12 | } 13 | \usage{ 14 | cov.wml(x, wt = rep(1/nrow(x), nrow(x)), cor = FALSE, center = TRUE) 15 | } 16 | %- maybe also `usage' for other objects documented here. 17 | \arguments{ 18 | \item{x}{a matrix or data frame. As usual, rows are observations and 19 | columns are variables.} 20 | \item{wt}{a non-negative and non-zero vector of weights for each 21 | observation. Its length must equal the number of rows of 22 | \code{x}.} 23 | \item{cor}{A logical indicating whether the estimated correlation 24 | weighted matrix will be returned as well.} 25 | \item{center}{Either a logical or a numeric vector specifying the centers 26 | to be used when computing covariances. If \code{TRUE}, the 27 | (weighted) mean of each variable is used, if `\code{FALSE}, zero is 28 | used. If \code{center} is numeric, its length must equal the 29 | number of columns of \code{x}.} 30 | } 31 | \value{ 32 | A list containing the following named components: 33 | \item{cov}{the estimated (weighted) covariance matrix.} 34 | \item{center}{an estimate for the center (mean) of the data.} 35 | \item{n.obs}{the number of observations (rows) in \code{x}.} 36 | \item{wt}{the weights used in the estimation. Only returned if given 37 | as an argument.} 38 | \item{cor}{the estimated correlation matrix. Only returned if `cor' is 39 | `TRUE'.} 40 | } 41 | \author{Christian Hennig 42 | \email{christian.hennig@unibo.it} 43 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 44 | 45 | \seealso{\code{\link{cov.wt}}, \code{\link{cov}}, \code{\link{var}}} 46 | 47 | \examples{ 48 | x <- c(1,2,3,4,5,6,7,8,9,10) 49 | y <- c(1,2,3,8,7,6,5,8,9,10) 50 | cov.wml(cbind(x,y),wt=c(0,0,0,1,1,1,1,1,0,0)) 51 | cov.wt(cbind(x,y),wt=c(0,0,0,1,1,1,1,1,0,0)) 52 | } 53 | \keyword{multivariate}% at least one, from doc/KEYWORDS 54 | -------------------------------------------------------------------------------- /man/cvnn.Rd: -------------------------------------------------------------------------------- 1 | \name{cvnn} 2 | \alias{cvnn} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Cluster validation based on nearest neighbours} 5 | \description{ 6 | Cluster validity index based on nearest neighbours as defined in Liu 7 | et al. (2013) with a correction explained in Halkidi et al. (2015). 8 | } 9 | \usage{ 10 | cvnn(d=NULL,clusterings,k=5) 11 | } 12 | %- maybe also `usage' for other objects documented here. 13 | \arguments{ 14 | \item{d}{dissimilarity matrix or \code{dist}-object.} 15 | \item{clusterings}{list of vectors of integers with length \code{=nrow(d)}; 16 | indicating the cluster for each observation for several clusterings 17 | (list elements) to be compared.} 18 | \item{k}{integer. Number of nearest neighbours.} 19 | } 20 | 21 | \value{ 22 | List with components (see Liu et al. (2013), Halkidi et al. (2015) for 23 | details) 24 | \item{cvnnindex}{vector of index values for the various clusterings, 25 | see Liu et al. (2013), the lower the better.} 26 | \item{sep}{vector of separation values.} 27 | \item{comp}{vector of compactness values.} 28 | } 29 | 30 | \references{ 31 | Halkidi, M., Vazirgiannis, M. and Hennig, C. (2015) Method-independent 32 | indices for cluster validation. In C. Hennig, M. Meila, F. Murtagh, 33 | R. Rocci (eds.) \emph{Handbook of Cluster Analysis}, CRC 34 | Press/Taylor \code{&} Francis, Boca Raton. 35 | 36 | Liu, Y, Li, Z., Xiong, H., Gao, X., Wu, J. and Wu, S. (2013) 37 | Understanding and enhancement of internal clustering validation 38 | measures. \emph{IEEE Transactions on Cybernetics} 43, 982-994. 39 | 40 | } 41 | 42 | \author{Christian Hennig 43 | \email{christian.hennig@unibo.it} 44 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 45 | } 46 | \examples{ 47 | options(digits=3) 48 | iriss <- as.matrix(iris[c(1:10,51:55,101:105),-5]) 49 | irisc <- as.numeric(iris[c(1:10,51:55,101:105),5]) 50 | print(cvnn(dist(iriss),list(irisc,rep(1:4,5)))) 51 | } 52 | \keyword{cluster}% at least one, from doc/KEYWORDS 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /man/cweight.Rd: -------------------------------------------------------------------------------- 1 | \name{cweight} 2 | \alias{cweight} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Weight function for AWC} 5 | \description{ 6 | For use in \code{awcoord} only. 7 | } 8 | \usage{ 9 | cweight(x, ca) 10 | 11 | } 12 | %- maybe also `usage' for other objects documented here. 13 | \arguments{ 14 | \item{x}{numerical.} 15 | \item{ca}{numerical.} 16 | } 17 | % \details{ 18 | % } 19 | \value{ 20 | \code{ca/x} if smaller than 1, else 1. 21 | } 22 | 23 | \author{Christian Hennig 24 | \email{christian.hennig@unibo.it} 25 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 26 | } 27 | 28 | \seealso{ 29 | \code{\link{awcoord}} 30 | } 31 | 32 | \examples{ 33 | cweight(4,1) 34 | } 35 | \keyword{arith}% at least one, from doc/KEYWORDS 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /man/dbscan.Rd: -------------------------------------------------------------------------------- 1 | \name{dbscan} 2 | \alias{dbscan} 3 | \alias{print.dbscan} 4 | \alias{plot.dbscan} 5 | \alias{predict.dbscan} 6 | \title{DBSCAN density reachability and connectivity clustering} 7 | \description{ 8 | Generates a density based clustering of arbitrary shape as introduced 9 | in Ester et al. (1996). 10 | } 11 | \usage{ 12 | dbscan(data, eps, MinPts = 5, scale = FALSE, method = c("hybrid", "raw", 13 | "dist"), seeds = TRUE, showplot = FALSE, countmode = NULL) 14 | \method{print}{dbscan}(x, ...) 15 | \method{plot}{dbscan}(x, data, ...) 16 | \method{predict}{dbscan}(object, data, newdata = NULL, 17 | predict.max=1000, ...) 18 | } 19 | \arguments{ 20 | \item{data}{data matrix, data.frame, dissimilarity matrix or 21 | \code{dist}-object. Specify \code{method="dist"} if the data should 22 | be interpreted as dissimilarity matrix or object. Otherwise 23 | Euclidean distances will be used.} 24 | \item{eps}{ Reachability distance, see Ester et al. (1996). } 25 | \item{MinPts}{ Reachability minimum no. of points, see Ester et al. (1996). } 26 | \item{scale}{ scale the data if \code{TRUE}. } 27 | \item{method}{ "dist" treats data as distance matrix (relatively fast 28 | but memory expensive), "raw" treats data as raw data and avoids 29 | calculating a distance matrix (saves memory but may be slow), 30 | "hybrid" expects also raw data, but calculates partial distance 31 | matrices (very fast with moderate memory requirements).} 32 | \item{seeds}{FALSE to not include the \code{isseed}-vector in the 33 | \code{dbscan}-object.} 34 | \item{showplot}{ 0 = no plot, 1 = plot per iteration, 2 = plot per 35 | subiteration. } 36 | \item{countmode}{ NULL or vector of point numbers at which to report 37 | progress. } 38 | \item{x}{object of class \code{dbscan}.} 39 | \item{object}{object of class \code{dbscan}.} 40 | \item{newdata}{ matrix or data.frame with raw data to predict. } 41 | \item{predict.max}{ max. batch size for predictions. } 42 | \item{...}{Further arguments transferred to plot methods.} 43 | } 44 | \details{ 45 | Clusters require a minimum no of points (MinPts) within a maximum distance 46 | (eps) around one of its members (the seed). 47 | Any point within eps around any point which satisfies the seed condition 48 | is a cluster member (recursively). 49 | Some points may not belong to any clusters (noise). 50 | 51 | We have clustered a 100.000 x 2 dataset in 40 minutes on a Pentium M 1600 52 | MHz. 53 | 54 | \code{print.dbscan} shows a statistic of the number of points 55 | belonging to the clusters that are seeds and border points. 56 | 57 | \code{plot.dbscan} distinguishes between seed and border points by 58 | plot symbol. 59 | 60 | } 61 | \value{ 62 | \code{predict.dbscan} gives out a vector of predicted clusters for the 63 | points in \code{newdata}. 64 | 65 | \code{dbscan} gives out 66 | an object of class 'dbscan' which is a LIST with components 67 | \item{cluster}{integer vector coding cluster membership with noise 68 | observations (singletons) coded as 0 } 69 | \item{isseed}{logical vector indicating whether a point is a seed (not 70 | border, not noise)} 71 | \item{eps}{parameter eps} 72 | \item{MinPts}{parameter MinPts} 73 | } 74 | \references{ Martin Ester, Hans-Peter Kriegel, Joerg Sander, Xiaowei Xu 75 | (1996). A Density-Based Algorithm for Discovering Clusters in Large Spatial 76 | Databases with Noise. Institute for Computer Science, University of Munich. 77 | Proceedings of 2nd International Conference on Knowledge Discovery and Data 78 | Mining (KDD-96). } 79 | \author{Jens Oehlschlaegel, based on a draft by Christian Hennig.} 80 | \note{ this is a simplified version of the original algorithm (no K-D-trees 81 | used), thus we have \eqn{o(n^2)} instead of \eqn{o(n*log(n))} } 82 | \examples{ 83 | set.seed(665544) 84 | n <- 600 85 | x <- cbind(runif(10, 0, 10)+rnorm(n, sd=0.2), runif(10, 0, 10)+rnorm(n, 86 | sd=0.2)) 87 | par(bg="grey40") 88 | ds <- dbscan(x, 0.2) 89 | # run with showplot=1 to see how dbscan works. 90 | ds 91 | plot(ds, x) 92 | 93 | x2 <- matrix(0,nrow=4,ncol=2) 94 | x2[1,] <- c(5,2) 95 | x2[2,] <- c(8,3) 96 | x2[3,] <- c(4,4) 97 | x2[4,] <- c(9,9) 98 | predict(ds, x, x2) 99 | 100 | n <- 600 101 | x <- cbind((1:3)+rnorm(n, sd=0.2), (1:3)+rnorm(n, sd=0.2)) 102 | 103 | # Not run, but results from my machine are 0.105 - 0.068 - 0.255: 104 | # system.time(ds <- dbscan(x, 0.3, countmode=NULL, method="raw"))[3] 105 | # system.time(dsb <- dbscan(x, 0.3, countmode=NULL, method="hybrid"))[3] 106 | # system.time(dsc <- dbscan(dist(x), 0.3, countmode=NULL, 107 | # method="dist"))[3] 108 | } 109 | \keyword{multivariate} 110 | \keyword{cluster} 111 | 112 | -------------------------------------------------------------------------------- /man/dipp.tantrum.Rd: -------------------------------------------------------------------------------- 1 | \name{dipp.tantrum} 2 | \alias{dipp.tantrum} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Simulates p-value for dip test} 5 | \description{ 6 | Simulates p-value for dip test (see \code{\link[diptest]{dip}}) 7 | in the way suggested by Tantrum, Murua and Stuetzle (2003) from the 8 | clostest unimodal distribution determined by kernel density estimation 9 | with bandwith chosen so that the density just becomes unimodal. This is 10 | less conservative (and in fact sometimes anti-conservative) than the 11 | values from \code{\link[diptest]{dip.test}}. 12 | } 13 | \usage{ 14 | dipp.tantrum(xdata,d,M=100) 15 | } 16 | %- maybe also `usage' for other objects documented here. 17 | \arguments{ 18 | \item{xdata}{numeric vector. One-dimensional dataset.} 19 | \item{d}{numeric. Value of dip statistic.} 20 | \item{M}{integer. Number of artificial datasets generated in order to 21 | estimate the p-value.} 22 | } 23 | 24 | \value{ 25 | List with components 26 | \item{p.value}{approximated p-value.} 27 | \item{bw}{borderline unimodality bandwith in \code{\link{density}} 28 | with default settings.} 29 | \item{dv}{vector of dip statistic values from simulated artificial data.} 30 | } 31 | 32 | \references{ 33 | J. A. Hartigan and P. M. Hartigan (1985) The Dip Test of 34 | Unimodality, \emph{Annals of Statistics}, 13, 70-84. 35 | 36 | Tantrum, J., Murua, A. and Stuetzle, W. (2003) Assessment and 37 | Pruning of Hierarchical Model Based Clustering, \emph{Proceedings of the 38 | ninth ACM SIGKDD international conference on Knowledge discovery and 39 | data mining}, Washington, D.C., 197-205. 40 | } 41 | \author{Christian Hennig 42 | \email{christian.hennig@unibo.it} 43 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 44 | } 45 | \examples{ 46 | # not run, requires package diptest 47 | # x <- runif(100) 48 | # d <- dip(x) 49 | # dt <- dipp.tantrum(x,d,M=10) 50 | } 51 | \keyword{cluster}% at least one, from doc/KEYWORDS 52 | % \keyword{multivariate} 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /man/diptest.multi.Rd: -------------------------------------------------------------------------------- 1 | \name{diptest.multi} 2 | \alias{diptest.multi} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Diptest for discriminant coordinate projection} 5 | \description{ 6 | Diptest (Hartigan and Hartigan, 1985, see \code{\link[diptest]{dip}}) 7 | for data projected in discriminant coordinate separating optimally two 8 | class means (see \code{discrcoord}) as suggested by Tantrum, Murua and 9 | Stuetzle (2003). 10 | } 11 | \usage{ 12 | diptest.multi(xdata,class,pvalue="uniform",M=100) 13 | } 14 | %- maybe also `usage' for other objects documented here. 15 | \arguments{ 16 | \item{xdata}{matrix. Potentially multidimensional dataset.} 17 | \item{class}{vector of integers giving class numbers for observations.} 18 | \item{pvalue}{\code{"uniform"} or \code{"tantrum"}. Defines whether 19 | the p-value is computed from a uniform null model as suggested in 20 | Hartigan and Hartigan (1985, using \code{\link[diptest]{dip.test}}) or as 21 | suggested in Tantrum et al. (2003, using \code{dipp.tantrum}).} 22 | \item{M}{integer. Number of artificial datasets generated in order to 23 | estimate the p-value if \code{pvalue="tantrum"}.} 24 | } 25 | 26 | \value{ 27 | The resulting p-value. 28 | } 29 | 30 | \references{ 31 | J. A. Hartigan and P. M. Hartigan (1985) The Dip Test of 32 | Unimodality, \emph{Annals of Statistics}, 13, 70-84. 33 | 34 | Tantrum, J., Murua, A. and Stuetzle, W. (2003) Assessment and 35 | Pruning of Hierarchical Model Based Clustering, \emph{Proceedings of the 36 | ninth ACM SIGKDD international conference on Knowledge discovery and 37 | data mining}, Washington, D.C., 197-205. 38 | } 39 | \author{Christian Hennig 40 | \email{christian.hennig@unibo.it} 41 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 42 | } 43 | \examples{ 44 | require(diptest) 45 | x <- cbind(runif(100),runif(100)) 46 | partition <- 1+(x[,1]<0.5) 47 | d1 <- diptest.multi(x,partition) 48 | d2 <- diptest.multi(x,partition,pvalue="tantrum",M=10) 49 | } 50 | \keyword{cluster}% at least one, from doc/KEYWORDS 51 | \keyword{multivariate} 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /man/discrcoord.Rd: -------------------------------------------------------------------------------- 1 | \name{discrcoord} 2 | \alias{discrcoord} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Discriminant coordinates/canonical variates} 5 | \description{ 6 | Computes discriminant coordinates, sometimes referred to as "canonical 7 | variates" as described in Seber (1984). 8 | } 9 | \usage{ 10 | discrcoord(xd, clvecd, pool = "n", ...) 11 | } 12 | %- maybe also `usage' for other objects documented here. 13 | \arguments{ 14 | \item{xd}{the data matrix; a numerical object which can be coerced 15 | to a matrix.} 16 | \item{clvecd}{integer vector of class numbers; length must equal 17 | \code{nrow(xd)}.} 18 | \item{pool}{string. Determines how the within classes 19 | covariance is pooled. "n" means that the class covariances are 20 | weighted corresponding to the number of points in each class 21 | (default). "equal" means that all classes get equal weight.} 22 | \item{...}{no effect} 23 | } 24 | \details{ 25 | The matrix T (see Seber (1984), p. 270) is inverted by use of 26 | \code{\link{tdecomp}}, which can be expected to give 27 | reasonable results for singular within-class covariance matrices. 28 | } 29 | \value{ 30 | List with the following components 31 | \item{ev}{eigenvalues in descending order.} 32 | \item{units}{columns are coordinates of projection basis vectors. 33 | New points \code{x} can be projected onto the projection basis vectors 34 | by \code{x \%*\% units}} 35 | \item{proj}{projections of \code{xd} onto \code{units}.} 36 | } 37 | \references{ 38 | Seber, G. A. F. (1984). \emph{Multivariate Observations}. New York: Wiley. 39 | } 40 | \author{Christian Hennig 41 | \email{christian.hennig@unibo.it} 42 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 43 | } 44 | 45 | \seealso{ 46 | \code{\link{plotcluster}} for straight forward discriminant plots. 47 | 48 | \code{\link{batcoord}} for discriminating projections for two classes, 49 | so that also the differences in variance are shown (\code{discrcoord} is 50 | based only on differences in mean). 51 | 52 | \code{\link{rFace}} for generation of the example data used below. 53 | } 54 | 55 | \examples{ 56 | set.seed(4634) 57 | face <- rFace(600,dMoNo=2,dNoEy=0) 58 | grface <- as.integer(attr(face,"grouping")) 59 | dcf <- discrcoord(face,grface) 60 | plot(dcf$proj,col=grface) 61 | # ...done in one step by function plotcluster. 62 | } 63 | \keyword{multivariate}% at least one, from doc/KEYWORDS 64 | \keyword{classif}% __ONLY ONE__ keyword per line 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /man/discrete.recode.Rd: -------------------------------------------------------------------------------- 1 | \name{discrete.recode} 2 | \alias{discrete.recode} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Recodes mixed variables dataset} 5 | \description{ 6 | Recodes a dataset with mixed continuous and categorical variables so 7 | that the continuous variables come first and the categorical variables 8 | have standard coding 1, 2, 3,... (in lexicographical ordering of 9 | values coerced to strings). 10 | } 11 | \usage{ 12 | discrete.recode(x,xvarsorted=TRUE,continuous=0,discrete) 13 | } 14 | %- maybe also `usage' for other objects documented here. 15 | \arguments{ 16 | \item{x}{data matrix or data frame (not a tibble). 17 | The data need to be organised 18 | case-wise, i.e., if there are categorical variables only, and 15 19 | cases with values c(1,1,2) on the 3 variables, the data matrix needs 20 | 15 rows with values 1 1 2. (Categorical variables could take numbers 21 | or strings or anything that can be coerced to factor levels as values.)} 22 | \item{xvarsorted}{logical. If \code{TRUE}, the continuous variables 23 | are assumed to be the first ones, and the categorical variables to 24 | be behind them.} 25 | \item{continuous}{vector of integers giving positions of the 26 | continuous variables. If \code{xvarsorted=TRUE}, a single integer, 27 | number of continuous variables.} 28 | \item{discrete}{vector of integers giving positions of the 29 | categorical variables (the variables need to be coded in such a way that 30 | \code{\link{data.matrix}} converts them to something numeric). If 31 | \code{xvarsorted=TRUE}, a single integer, number of categorical variables.} 32 | } 33 | 34 | \value{ 35 | A list with components 36 | \item{data}{data matrix with continuous variables first and 37 | categorical variables in standard coding behind them.} 38 | \item{ppdim}{vector of categorical variable-wise numbers of 39 | categories.} 40 | \item{discretelevels}{list of levels of the categorical variables 41 | belonging to what is treated by \code{flexmixedruns} as category 42 | 1, 2, 3 etc.} 43 | \item{continuous}{number of continuous variables.} 44 | \item{discrete}{number of categorical variables.} 45 | } 46 | 47 | \author{Christian Hennig 48 | \email{christian.hennig@unibo.it} 49 | \url{https://www.unibo.it/sitoweb/christian.hennig/en}} 50 | 51 | \seealso{\code{\link{lcmixed}}} 52 | 53 | \examples{ 54 | set.seed(776655) 55 | v1 <- rnorm(20) 56 | v2 <- rnorm(20) 57 | d1 <- sample(c(2,4,6,8),20,replace=TRUE) 58 | d2 <- sample(1:4,20,replace=TRUE) 59 | ldata <- cbind(v1,d1,v2,d2) 60 | lc <- 61 | discrete.recode(ldata,xvarsorted=FALSE,continuous=c(1,3),discrete=c(2,4)) 62 | require(MASS) 63 | data(Cars93) 64 | Cars934 <- Cars93[,c(3,5,8,10)] 65 | cc <- discrete.recode(Cars934,xvarsorted=FALSE,continuous=c(2,3),discrete=c(1,4)) 66 | } 67 | 68 | \keyword{manip}% __ONLY ONE__ keyword per line 69 | -------------------------------------------------------------------------------- /man/discrproj.Rd: -------------------------------------------------------------------------------- 1 | \name{discrproj} 2 | \alias{discrproj} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Linear dimension reduction for classification} 5 | \description{ 6 | An interface for ten methods of linear dimension reduction in order 7 | to separate the groups optimally in the projected data. Includes 8 | classical discriminant coordinates, methods to project differences in 9 | mean and covariance structure, asymmetric methods (separation of a 10 | homogeneous class from a heterogeneous one), local neighborhood-based 11 | methods and methods based on robust covariance matrices. 12 | } 13 | \usage{ 14 | discrproj(x, clvecd, method="dc", clnum=NULL, ignorepoints=FALSE, 15 | ignorenum=0, ...) 16 | } 17 | %- maybe also `usage' for other objects documented here. 18 | \arguments{ 19 | \item{x}{the data matrix; a numerical object which can be coerced 20 | to a matrix.} 21 | \item{clvecd}{vector of class numbers which can be coerced into 22 | integers; length must equal 23 | \code{nrow(xd)}.} 24 | \item{method}{one of 25 | \describe{ 26 | \item{"dc"}{usual discriminant coordinates, see \code{\link{discrcoord}},} 27 | \item{"bc"}{Bhattacharyya coordinates, first coordinate showing 28 | mean differences, second showing covariance matrix differences, 29 | see \code{\link{batcoord}},} 30 | \item{"vbc"}{variance dominated Bhattacharyya coordinates, 31 | see \code{\link{batcoord}},} 32 | \item{"mvdc"}{added meana and variance differences optimizing 33 | coordinates, see \code{\link{mvdcoord}},} 34 | \item{"adc"}{asymmetric discriminant coordinates, see 35 | \code{\link{adcoord}},} 36 | \item{"awc"}{asymmetric discriminant coordinates with weighted 37 | observations, see \code{\link{awcoord}},} 38 | \item{"arc"}{asymmetric discriminant coordinates with weighted 39 | observations and robust MCD-covariance matrix, 40 | see \code{\link{awcoord}},} 41 | \item{"nc"}{neighborhood based coordinates, 42 | see \code{\link{ncoord}},} 43 | \item{"wnc"}{neighborhood based coordinates with weighted neighborhoods, 44 | see \code{\link{ncoord}},} 45 | \item{"anc"}{asymmetric neighborhood based coordinates, 46 | see \code{\link{ancoord}}.} 47 | } 48 | Note that "bc", "vbc", "adc", "awc", "arc" and "anc" assume that 49 | there are only two classes.} 50 | \item{clnum}{integer. Number of the class which is attempted to plot 51 | homogeneously by "asymmetric methods", which are the methods 52 | assuming that there are only two classes, as indicated above.} 53 | \item{ignorepoints}{logical. If \code{TRUE}, points with label 54 | \code{ignorenum} in \code{clvecd} are ignored in the computation for 55 | \code{method} and are only projected afterwards onto the resulting 56 | units. If \code{pch=NULL}, the plot symbol for these points is "N".} 57 | \item{ignorenum}{one of the potential values of the components of 58 | \code{clvecd}. Only has effect if \code{ignorepoints=TRUE}, see above.} 59 | \item{...}{additional parameters passed to the 60 | projection methods.} 61 | } 62 | % \details{ 63 | % } 64 | 65 | \value{ 66 | \code{discrproj} returns the output of the chosen projection method, 67 | which is a list with at least the components \code{ev, units, proj}. 68 | For detailed informations see the help pages of the projection methods. 69 | \item{ev}{eigenvalues in descending order, usually indicating portion 70 | of information in the corresponding direction.} 71 | \item{units}{columns are coordinates of projection basis vectors. 72 | New points \code{x} can be projected onto the projection basis vectors 73 | by \code{x \%*\% units}} 74 | \item{proj}{projections of \code{xd} onto \code{units}.} 75 | } 76 | 77 | \references{ 78 | Hennig, C. (2004) Asymmetric linear dimension reduction for classification. 79 | Journal of Computational and Graphical Statistics 13, 930-945 . 80 | 81 | Hennig, C. (2005) A method for visual cluster validation. In: 82 | Weihs, C. and Gaul, W. (eds.): Classification - The Ubiquitous 83 | Challenge. Springer, Heidelberg 2005, 153-160. 84 | 85 | Seber, G. A. F. (1984). \emph{Multivariate Observations}. New York: Wiley. 86 | 87 | Fukunaga (1990). \emph{Introduction to Statistical Pattern 88 | Recognition} (2nd ed.). Boston: Academic Press. 89 | } 90 | \author{Christian Hennig 91 | \email{christian.hennig@unibo.it} 92 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 93 | 94 | \seealso{ 95 | \code{\link{discrcoord}}, \code{\link{batcoord}}, 96 | \code{\link{mvdcoord}}, \code{\link{adcoord}}, 97 | \code{\link{awcoord}}, \code{\link{ncoord}}, 98 | \code{\link{ancoord}}. 99 | 100 | \code{\link{rFace}} for generation of the example data used below. 101 | } 102 | 103 | \examples{ 104 | set.seed(4634) 105 | face <- rFace(300,dMoNo=2,dNoEy=0,p=3) 106 | grface <- as.integer(attr(face,"grouping")) 107 | 108 | # The abs in the following is there to unify the output, 109 | # because eigenvectors are defined only up to their sign. 110 | # Statistically it doesn't make sense to compute absolute values. 111 | round(abs(discrproj(face,grface, method="nc")$units),digits=2) 112 | round(abs(discrproj(face,grface, method="wnc")$units),digits=2) 113 | round(abs(discrproj(face,grface, clnum=1, method="arc")$units),digits=2) 114 | } 115 | \keyword{multivariate}% at least one, from doc/KEYWORDS 116 | \keyword{classif}% __ONLY ONE__ keyword per line 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /man/distancefactor.Rd: -------------------------------------------------------------------------------- 1 | \name{distancefactor} 2 | \alias{distancefactor} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Factor for dissimilarity of mixed type data} 5 | \description{ 6 | Computes a factor that can be used to standardise ordinal categorical 7 | variables and binary dummy variables coding categories of nominal scaled 8 | variables for Euclidean 9 | dissimilarity computation in mixed type data. See Hennig and Liao (2013). 10 | } 11 | \usage{ 12 | distancefactor(cat,n=NULL, catsizes=NULL,type="categorical", 13 | normfactor=2,qfactor=ifelse(type=="categorical",1/2, 14 | 1/(1+1/(cat-1)))) 15 | 16 | } 17 | %- maybe also `usage' for other objects documented here. 18 | \arguments{ 19 | \item{cat}{integer. Number of categories of the variable to be standardised. 20 | Note that for \code{type="categorical"} the number of categories of 21 | the original variable is required, although the 22 | \code{distancefactor} is used to standardise dummy 23 | variables for the categories.} 24 | \item{n}{integer. Number of data points.} 25 | \item{catsizes}{vector of integers giving numbers of observations per 26 | category. One of \code{n} and \code{catsizes} must be supplied. If 27 | \code{catsizes=NULL}, \code{rep(round(n/cat),cat)} is used (this may 28 | be appropriate as well if numbers of observations of categories are 29 | unequal, if the researcher decides that the dissimilarity measure 30 | should not be influenced by empirical category sizes.} 31 | \item{type}{\code{"categorical"} if the factor is used for dummy 32 | variables belonging to a nominal variable, \code{"ordinal"} if the 33 | factor is used for an ordinal variable ind standard Likert coding.} 34 | \item{normfactor}{numeric. Factor on which standardisation is based. 35 | As a default, this is \code{E(X_1-X_2)^2=2} for independent unit 36 | variance variables.} 37 | \item{qfactor}{numeric. Factor q in Hennig and Liao (2013) to 38 | adjust for clumping effects due to discreteness.} 39 | } 40 | 41 | \value{ 42 | A factor by which to multiply the variable in order to make it 43 | comparable to a unit variance continuous variable when aggregated in 44 | Euclidean fashion for dissimilarity computation, so that expected 45 | effective difference between two realisations of the variable equals 46 | \code{qfactor*normfactor}. 47 | } 48 | 49 | 50 | \references{ 51 | Hennig, C. and Liao, T. (2013) How to find an appropriate clustering 52 | for mixed-type variables with application to socio-economic 53 | stratification, \emph{Journal of the Royal Statistical Society, Series 54 | C Applied Statistics}, 62, 309-369. 55 | 56 | 57 | } 58 | 59 | \author{Christian Hennig 60 | \email{christian.hennig@unibo.it} 61 | \url{https://www.unibo.it/sitoweb/christian.hennig/en}} 62 | 63 | \seealso{\code{\link{lcmixed}}, \code{\link[cluster]{pam}}} 64 | 65 | \examples{ 66 | set.seed(776655) 67 | d1 <- sample(1:5,20,replace=TRUE) 68 | d2 <- sample(1:4,20,replace=TRUE) 69 | ldata <- cbind(d1,d2) 70 | lc <- cat2bin(ldata,categorical=1)$data 71 | lc[,1:5] <- lc[,1:5]*distancefactor(5,20,type="categorical") 72 | lc[,6] <- lc[,6]*distancefactor(4,20,type="ordinal") 73 | } 74 | 75 | \keyword{cluster}% __ONLY ONE__ keyword per line 76 | -------------------------------------------------------------------------------- /man/distcritmulti.Rd: -------------------------------------------------------------------------------- 1 | \name{distcritmulti} 2 | \alias{distcritmulti} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Distance based validity criteria for large data sets} 5 | \description{ 6 | Approximates average silhouette width or the Pearson version of 7 | Hubert's gamma criterion by hacking the 8 | dataset into pieces and averaging the subset-wise values, see Hennig 9 | and Liao (2013). 10 | } 11 | \usage{ 12 | distcritmulti(x,clustering,part=NULL,ns=10,criterion="asw", 13 | fun="dist",metric="euclidean", 14 | count=FALSE,seed=NULL,...) 15 | } 16 | %- maybe also `usage' for other objects documented here. 17 | \arguments{ 18 | \item{x}{cases times variables data matrix.} 19 | \item{clustering}{vector of integers indicating the clustering.} 20 | \item{part}{vector of integer subset sizes; sum should be smaller or 21 | equal to the number of cases of \code{x}. If \code{NULL}, subset sizes are 22 | chosen approximately equal.} 23 | \item{ns}{integer. Number of subsets, only used if \code{part==NULL}.} 24 | \item{criterion}{\code{"asw"} or \code{"pearsongamma"}, specifies 25 | whether the average silhouette width or the Pearson version of 26 | Hubert's gamma is computed.} 27 | \item{fun}{\code{"dist"} or \code{"daisy"}, specifies 28 | which function is used for computing dissimilarities.} 29 | \item{metric}{passed on to \code{\link{dist}} (as argument 30 | \code{method}) or \code{\link[cluster]{daisy}} to determine which 31 | dissimilarity is used.} 32 | \item{count}{logical. if \code{TRUE}, the subset number just processed 33 | is printed.} 34 | \item{seed}{integer, random seed. (If \code{NULL}, result depends on 35 | random numbers.)} 36 | \item{...}{further arguments to be passed on to \code{\link{dist}} or 37 | \code{\link[cluster]{daisy}}.} 38 | } 39 | 40 | \value{ 41 | A list with components \code{crit.overall,crit.sub,crit.sd,part}. 42 | \item{crit.overall}{value of criterion.} 43 | \item{crit.sub}{vector of subset-wise criterion values.} 44 | \item{crit.sd}{standard deviation of \code{crit.sub}, can be used to 45 | assess stability.} 46 | \item{subsets}{list of case indexes in subsets.} 47 | } 48 | 49 | 50 | \references{ 51 | Halkidi, M., Batistakis, Y., Vazirgiannis, M. (2001) On Clustering 52 | Validation Techniques, \emph{Journal of Intelligent Information 53 | Systems}, 17, 107-145. 54 | 55 | Hennig, C. and Liao, T. (2013) How to find an appropriate clustering 56 | for mixed-type variables with application to socio-economic 57 | stratification, \emph{Journal of the Royal Statistical Society, Series 58 | C Applied Statistics}, 62, 309-369. 59 | 60 | Kaufman, L. and Rousseeuw, P.J. (1990). "Finding Groups in Data: 61 | An Introduction to Cluster Analysis". Wiley, New York. 62 | } 63 | 64 | \author{Christian Hennig 65 | \email{christian.hennig@unibo.it} 66 | \url{https://www.unibo.it/sitoweb/christian.hennig/en}} 67 | 68 | \seealso{\code{\link{cluster.stats}}, \code{\link[cluster]{silhouette}}} 69 | 70 | \examples{ 71 | set.seed(20000) 72 | options(digits=3) 73 | face <- rFace(50,dMoNo=2,dNoEy=0,p=2) 74 | clustering <- as.integer(attr(face,"grouping")) 75 | distcritmulti(face,clustering,ns=3,seed=100000,criterion="pearsongamma") 76 | } 77 | 78 | \keyword{cluster}% __ONLY ONE__ keyword per line 79 | -------------------------------------------------------------------------------- /man/distrsimilarity.Rd: -------------------------------------------------------------------------------- 1 | \name{distrsimilarity} 2 | \alias{distrsimilarity} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Similarity of within-cluster distributions to normal and uniform} 5 | \description{ 6 | Two measures of dissimilarity between the within-cluster distributions of 7 | a dataset and normal or uniform distribution. For the normal it's the 8 | Kolmogorov dissimilarity between the Mahalanobis distances to the 9 | center and a chi-squared distribution. For the uniform it is the 10 | Kolmogorov distance between the distance to the kth nearest neighbour 11 | and a Gamma distribution (this is based on Byers and Raftery (1998)). 12 | The clusterwise values are aggregated by weighting with the cluster sizes. 13 | } 14 | \usage{ 15 | distrsimilarity(x,clustering,noisecluster = FALSE, 16 | distribution=c("normal","uniform"),nnk=2, 17 | largeisgood=FALSE,messages=FALSE) 18 | } 19 | %- maybe also `usage' for other objects documented here. 20 | \arguments{ 21 | \item{x}{the data matrix; a numerical object which can be coerced 22 | to a matrix.} 23 | \item{clustering}{integer vector of class numbers; length must equal 24 | \code{nrow(x)}, numbers must go from 1 to the number of clusters.} 25 | \item{noisecluster}{logical. If \code{TRUE}, the cluster with the 26 | largest number is ignored for the computations.} 27 | \item{distribution}{vector of \code{"normal", "uniform"} or 28 | both. Indicates which of the two dissimilarities is/are computed.} 29 | \item{nnk}{integer. Number of nearest neighbors to use for 30 | dissimilarity to the uniform.} 31 | \item{largeisgood}{logical. If \code{TRUE}, dissimilarities are 32 | transformed to \code{1-d} (this means that larger values indicate a 33 | better fit).} 34 | \item{messages}{logical. If \code{TRUE}, warnings are given if 35 | within-cluster covariance matrices are not invertible (in which case 36 | all within-cluster Mahalanobis distances are set to zero).} 37 | } 38 | \note{ 39 | It is very hard to capture similarity to a multivariate normal or 40 | uniform in a single value, and both used here have their 41 | shortcomings. Particularly, the dissimilarity to the uniform can still 42 | indicate a good fit if there are holes or it's a uniform distribution 43 | concentrated on several not connected sets. 44 | } 45 | % \details{ 46 | % } 47 | \value{ 48 | List with the following components 49 | \item{kdnorm}{Kolmogorov distance between distribution of 50 | within-cluster Mahalanobis 51 | distances and appropriate chi-squared distribution, aggregated over 52 | clusters (I am grateful to Agustin Mayo-Iscar for the idea).} 53 | \item{kdunif}{Kolmogorov distance between distribution of distances to 54 | \code{nnk}th nearest within-cluster neighbor and appropriate 55 | Gamma-distribution, see Byers and Raftery (1998), aggregated over 56 | clusters.} 57 | \item{kdnormc}{vector of cluster-wise Kolmogorov distances between 58 | distribution of within-cluster Mahalanobis 59 | distances and appropriate chi-squared distribution.} 60 | \item{kdunifc}{vector of cluster-wise Kolmogorov distances between 61 | distribution of distances to \code{nnk}th nearest within-cluster 62 | neighbor and appropriate Gamma-distribution.} 63 | \item{xmahal}{vector of Mahalanobs distances to the respective cluster 64 | center.} 65 | \item{xdknn}{vector of distance to \code{nnk}th nearest within-cluster 66 | neighbor.} 67 | } 68 | \references{ 69 | Byers, S. and Raftery, A. E. (1998) Nearest-Neighbor Clutter 70 | Removal for Estimating Features in Spatial Point Processes, 71 | \emph{Journal of the American Statistical Association}, 93, 577-584. 72 | 73 | Hennig, C. (2017) Cluster validation by measurement of clustering 74 | characteristics relevant to the user. In C. H. Skiadas (ed.) 75 | \emph{Proceedings of ASMDA 2017}, 501-520, 76 | \url{https://arxiv.org/abs/1703.09282} 77 | 78 | 79 | } 80 | \author{Christian Hennig 81 | \email{christian.hennig@unibo.it} 82 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 83 | } 84 | 85 | \seealso{ 86 | \code{\link{cqcluster.stats}},\code{\link{cluster.stats}} 87 | for more cluster validity statistics. 88 | } 89 | 90 | \examples{ 91 | set.seed(20000) 92 | options(digits=3) 93 | face <- rFace(200,dMoNo=2,dNoEy=0,p=2) 94 | km3 <- kmeans(face,3) 95 | distrsimilarity(face,km3$cluster) 96 | } 97 | \keyword{multivariate}% at least one, from doc/KEYWORDS 98 | \keyword{classif}% __ONLY ONE__ keyword per line 99 | \keyword{cluster}% __ONLY ONE__ keyword per line 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /man/dridgeline.Rd: -------------------------------------------------------------------------------- 1 | \name{dridgeline} 2 | \alias{dridgeline} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Density along the ridgeline} 5 | \description{ 6 | Computes the density of a two-component Gaussian mixture along the 7 | ridgeline (Ray and Lindsay, 2005), along which 8 | all its density extrema are located. 9 | } 10 | \usage{ 11 | dridgeline(alpha=seq(0,1,0.001), prop, 12 | mu1, mu2, Sigma1, Sigma2, showplot=FALSE, ...) 13 | } 14 | %- maybe also `usage' for other objects documented here. 15 | \arguments{ 16 | \item{alpha}{sequence of values between 0 and 1 for which the density 17 | is computed.} 18 | \item{prop}{mixture proportion of first component.} 19 | \item{mu1}{mean vector of component 1.} 20 | \item{mu2}{mean vector of component 2.} 21 | \item{Sigma1}{covariance matrix of component 1.} 22 | \item{Sigma2}{covariance matrix of component 2.} 23 | \item{showplot}{logical. If \code{TRUE}, the density is plotted 24 | against \code{alpha}.} 25 | \item{...}{further arguments to be passed on to plot.} 26 | } 27 | 28 | \value{ 29 | Vector of density values for values of \code{alpha}. 30 | } 31 | 32 | \references{ 33 | Ray, S. and Lindsay, B. G. (2005) The Topography of Multivariate 34 | Normal Mixtures, \emph{Annals of Statistics}, 33, 2042-2065. 35 | } 36 | \author{Christian Hennig 37 | \email{christian.hennig@unibo.it} 38 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 39 | } 40 | \examples{ 41 | q <- dridgeline(seq(0,1,0.1),0.5,c(1,1),c(2,5),diag(2),diag(2)) 42 | } 43 | \keyword{cluster}% at least one, from doc/KEYWORDS 44 | \keyword{multivariate} 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /man/dudahart2.Rd: -------------------------------------------------------------------------------- 1 | \name{dudahart2} 2 | \alias{dudahart2} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Duda-Hart test for splitting} 5 | \description{ 6 | Duda-Hart test for whether a data set should be split into two 7 | clusters. 8 | } 9 | \usage{ 10 | dudahart2(x,clustering,alpha=0.001) 11 | } 12 | %- maybe also `usage' for other objects documented here. 13 | \arguments{ 14 | \item{x}{data matrix or data frame.} 15 | \item{clustering}{vector of integers. Clustering into two clusters.} 16 | \item{alpha}{numeric between 0 and 1. Significance level (recommended 17 | to be small if this is used for estimating the number of clusters).} 18 | } 19 | 20 | \value{ 21 | A list with components 22 | \item{p.value}{p-value against null hypothesis of homogemeity.} 23 | \item{dh}{ratio of within-cluster sum of squares for two clusters and 24 | overall sum of squares.} 25 | \item{compare}{critical value for \code{dh} at level \code{alpha}.} 26 | \item{cluster1}{\code{FALSE} if the null hypothesis of homogemeity is 27 | rejected.} 28 | \item{alpha}{see above.} 29 | \item{z}{\code{1-alpha}-quantile of a standard Gaussian.} 30 | } 31 | 32 | \references{ 33 | Duda, R. O. and Hart, P. E. (1973) \emph{Pattern Classification and 34 | Scene Analysis}. Wiley, New York. 35 | } 36 | 37 | \author{Christian Hennig 38 | \email{christian.hennig@unibo.it} 39 | \url{https://www.unibo.it/sitoweb/christian.hennig/en}} 40 | 41 | \seealso{\code{\link{cluster.stats}}} 42 | 43 | \examples{ 44 | options(digits=2) 45 | set.seed(98765) 46 | iriss <- iris[sample(150,20),-5] 47 | km <- kmeans(iriss,2) 48 | dudahart2(iriss,km$cluster) 49 | } 50 | 51 | \keyword{cluster}% __ONLY ONE__ keyword per line 52 | -------------------------------------------------------------------------------- /man/extract.mixturepars.Rd: -------------------------------------------------------------------------------- 1 | \name{extract.mixturepars} 2 | \alias{extract.mixturepars} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Extract parameters for certain components from mclust} 5 | \description{ 6 | Extracts parameters of certain mixture components from the output of 7 | \code{\link[mclust]{summary.mclustBIC}} and updates proportions so that 8 | they sum up to 1. 9 | } 10 | \usage{ 11 | extract.mixturepars(mclustsum,compnumbers,noise=FALSE) 12 | } 13 | %- maybe also `usage' for other objects documented here. 14 | \arguments{ 15 | \item{mclustsum}{output object of \code{\link[mclust]{summary.mclustBIC}}.} 16 | \item{compnumbers}{vector of integers. Numbers of mixture components.} 17 | \item{noise}{logical. Should be \code{TRUE} if a noise component was fitted by 18 | \code{\link[mclust]{mclustBIC}}.} 19 | } 20 | 21 | \value{ 22 | Object as component \code{parameters} of 23 | \code{\link[mclust]{summary.mclustBIC}}-output, but for specified 24 | components only. (Orientation information from all components is kept.) 25 | } 26 | 27 | \author{Christian Hennig 28 | \email{christian.hennig@unibo.it} 29 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 30 | } 31 | \examples{ 32 | set.seed(98765) 33 | require(mclust) 34 | iriss <- iris[sample(150,20),-5] 35 | irisBIC <- mclustBIC(iriss,G=5,modelNames="VEV") 36 | siris <- summary(irisBIC,iriss) 37 | emp <- extract.mixturepars(siris,2) 38 | emp$pro 39 | round(emp$mean,digits=1) 40 | emp$variance$modelName 41 | round(emp$variance$scale,digits=2) 42 | 43 | } 44 | \keyword{cluster}% at least one, from doc/KEYWORDS 45 | \keyword{multivariate} 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /man/findrep.Rd: -------------------------------------------------------------------------------- 1 | \name{findrep} 2 | \alias{findrep} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Finding representatives for cluster border} 5 | \description{ 6 | Finds representative objects for the border of a cluster and the 7 | within-cluster variance as defined in the framework of the \code{\link{cdbw}} 8 | cluster validation index (and meant to be used in that context). 9 | } 10 | \usage{ 11 | findrep(x,xcen,clustering,cluster,r,p=ncol(x),n=nrow(x), 12 | nc=sum(clustering==cluster)) 13 | 14 | } 15 | %- maybe also `usage' for other objects documented here. 16 | \arguments{ 17 | \item{x}{matrix. Euclidean dataset.} 18 | \item{xcen}{mean vector of cluster.} 19 | \item{clustering}{vector of integers with length \code{=nrow(x)}; 20 | indicating the cluster for each observation.} 21 | \item{cluster}{integer. Number of cluster to be treated.} 22 | \item{r}{integer. Number of representatives.} 23 | \item{p}{integer. Number of dimensions.} 24 | \item{n}{integer. Number of observations.} 25 | \item{nc}{integer. Number of observations in \code{cluster}.} 26 | } 27 | 28 | \value{ 29 | List with components 30 | \item{repc}{vector of index of representatives (out of all 31 | observations).} 32 | \item{repx}{vector of index of representatives (out of only the 33 | observations in \code{cluster}).} 34 | \item{maxr}{number of representatives (this can be smaller than 35 | \code{r} if fewer pairwise different observations are in 36 | \code{cluster}.} 37 | \item{wvar}{estimated average within-cluster squared distance to mean.} 38 | } 39 | 40 | 41 | \references{ 42 | Halkidi, M. and Vazirgiannis, M. (2008) A density-based cluster 43 | validity approach using multi-representatives. \emph{Pattern 44 | Recognition Letters} 29, 773-786. 45 | 46 | Halkidi, M., Vazirgiannis, M. and Hennig, C. (2015) Method-independent 47 | indices for cluster validation. In C. Hennig, M. Meila, F. Murtagh, 48 | R. Rocci (eds.) \emph{Handbook of Cluster Analysis}, CRC 49 | Press/Taylor \code{&} Francis, Boca Raton. 50 | 51 | 52 | } 53 | 54 | \seealso{ 55 | \code{\link{cdbw}} 56 | } 57 | 58 | \author{Christian Hennig 59 | \email{christian.hennig@unibo.it} 60 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 61 | } 62 | \examples{ 63 | options(digits=3) 64 | iriss <- as.matrix(iris[c(1:5,51:55,101:105),-5]) 65 | irisc <- as.numeric(iris[c(1:5,51:55,101:105),5]) 66 | findrep(iriss,colMeans(iriss),irisc,cluster=1,r=2) 67 | } 68 | \keyword{cluster}% at least one, from doc/KEYWORDS 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /man/fpclusters.Rd: -------------------------------------------------------------------------------- 1 | \name{fpclusters} 2 | \alias{fpclusters} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Extracting clusters from fixed point cluster objects} 5 | \description{ 6 | \code{fpclusters} is a generic function which extracts the 7 | representative fixed point clusters (FPCs) 8 | from FPC objects generated by \code{\link{fixmahal}} and 9 | \code{\link{fixreg}}. For documentation and examples see 10 | \code{\link{fixmahal}} and \code{\link{fixreg}}. 11 | } 12 | \usage{ 13 | fpclusters(object, ...) 14 | } 15 | %- maybe also `usage' for other objects documented here. 16 | \arguments{ 17 | \item{object}{object of class \code{rfpc} or \code{mfpc}.} 18 | \item{...}{further arguments depending on the method.} 19 | } 20 | 21 | \value{ 22 | a list of logical or numerical vectors indicating or giving the 23 | weights of the cluster memberships. 24 | } 25 | 26 | \author{Christian Hennig 27 | \email{christian.hennig@unibo.it} 28 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 29 | } 30 | 31 | \seealso{\code{\link{fixmahal}}, \code{\link{fixreg}}} 32 | 33 | \keyword{cluster}% at least one, from doc/KEYWORDS 34 | 35 | -------------------------------------------------------------------------------- /man/itnumber.Rd: -------------------------------------------------------------------------------- 1 | \name{itnumber} 2 | \alias{itnumber} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Number of regression fixed point cluster iterations} 5 | \description{ 6 | Computes the number of fixed point iterations needed by 7 | \code{\link{fixreg}} to find \code{mtf} times 8 | a fixed point cluster (FPC) of size 9 | \code{cn} with an approximated probability of \code{prob}. 10 | 11 | Thought for use within \code{\link{fixreg}}. 12 | } 13 | \usage{ 14 | itnumber(n, p, cn, mtf, prob = 0.95, maxir = 20000) 15 | } 16 | %- maybe also `usage' for other objects documented here. 17 | \arguments{ 18 | \item{n}{positive integer. Total number of points.} 19 | \item{p}{positive integer. Number of independent variables.} 20 | \item{cn}{positive integer smaller or equal to \code{n}. 21 | Size of the FPC.} 22 | \item{mtf}{positive integer.} 23 | \item{prob}{number between 0 and 1.} 24 | \item{maxir}{positive integer. \code{itnumber} is set to this value if 25 | it would otherwise be larger.} 26 | } 27 | \details{ 28 | The computation is based on the binomial distribution with probability 29 | given by \code{\link{clusexpect}} with \code{ir=1}. 30 | } 31 | \value{ 32 | An integer. 33 | } 34 | \references{ 35 | Hennig, C. (2002) Fixed point clusters for linear regression: 36 | computation and comparison, \emph{Journal of 37 | Classification} 19, 249-276. 38 | } 39 | 40 | \author{Christian Hennig 41 | \email{christian.hennig@unibo.it} 42 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 43 | 44 | \seealso{\code{\link{fixreg}}, \code{\link{clusexpect}}} 45 | 46 | \examples{ 47 | itnumber(500,4,150,2) 48 | } 49 | \keyword{univar}% at least one, from doc/KEYWORDS 50 | \keyword{cluster}% __ONLY ONE__ keyword per line 51 | -------------------------------------------------------------------------------- /man/jittervar.Rd: -------------------------------------------------------------------------------- 1 | \name{jittervar} 2 | \alias{jittervar} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Jitter variables in a data matrix} 5 | \description{ 6 | Jitters some variables in a data matrix. 7 | } 8 | \usage{ 9 | jittervar(x,jitterv=NULL,factor=1) 10 | } 11 | %- maybe also `usage' for other objects documented here. 12 | \arguments{ 13 | \item{x}{data matrix or data frame.} 14 | \item{jitterv}{vector of numbers of variables to be jittered.} 15 | \item{factor}{numeric. Passed on to \code{\link{jitter}}. See the 16 | documentation there. The higher, the more jittering.} 17 | } 18 | 19 | \value{ 20 | data matrix or data frame with jittered variables. 21 | } 22 | 23 | \author{Christian Hennig 24 | \email{christian.hennig@unibo.it} 25 | \url{https://www.unibo.it/sitoweb/christian.hennig/en}} 26 | 27 | \seealso{\code{\link{jitter}}} 28 | 29 | \examples{ 30 | set.seed(776655) 31 | v1 <- rnorm(20) 32 | v2 <- rnorm(20) 33 | d1 <- sample(1:5,20,replace=TRUE) 34 | d2 <- sample(1:4,20,replace=TRUE) 35 | ldata <- cbind(v1,v2,d1,d2) 36 | jv <- jittervar(ldata,jitterv=3:4) 37 | } 38 | 39 | \keyword{manip}% __ONLY ONE__ keyword per line 40 | -------------------------------------------------------------------------------- /man/kmeansruns.Rd: -------------------------------------------------------------------------------- 1 | \name{kmeansruns} 2 | \alias{kmeansruns} 3 | 4 | %- Also NEED an `\alias' for EACH other topic documented here. 5 | \title{k-means with estimating k and initialisations} 6 | \description{ 7 | This calls the function \code{\link{kmeans}} to perform a k-means 8 | clustering, but initializes the k-means algorithm several times with 9 | random points from the data set as means. Furthermore, it is more 10 | robust against the occurrence of empty clusters in the algorithm and 11 | it estimates the number of clusters by either the Calinski Harabasz 12 | index (\code{\link{calinhara}}) or average silhouette width (see 13 | \code{\link[cluster]{pam.object}}). The Duda-Hart test 14 | (\code{\link{dudahart2}}) is applied to decide whether there should be 15 | more than one cluster (unless 1 is excluded as number of clusters). 16 | } 17 | \usage{ 18 | kmeansruns(data,krange=2:10,criterion="ch", 19 | iter.max=100,runs=100, 20 | scaledata=FALSE,alpha=0.001, 21 | critout=FALSE,plot=FALSE,...) 22 | } 23 | \arguments{ 24 | \item{data}{A numeric matrix of data, or an object that can be coerced to 25 | such a matrix (such as a numeric vector or a data frame with 26 | all numeric columns). } 27 | \item{krange}{integer vector. Numbers of clusters which are to be 28 | compared by the average silhouette width criterion. Note: average 29 | silhouette width and Calinski-Harabasz can't estimate number of 30 | clusters \code{nc=1}. If 1 is included, a Duda-Hart test is applied 31 | and 1 is estimated if this is not significant.} 32 | \item{criterion}{one of \code{"asw"} or \code{"ch"}. Determines 33 | whether average silhouette width or Calinski-Harabasz is applied.} 34 | \item{iter.max}{integer. The maximum number of iterations allowed.} 35 | \item{runs}{integer. Number of starts of the k-means algorithm.} 36 | \item{scaledata}{logical. If \code{TRUE}, the variables are centered 37 | and scaled to unit variance before execution.} 38 | \item{alpha}{numeric between 0 and 1, tuning constant for 39 | \code{\link{dudahart2}} (only used for 1-cluster test).} 40 | \item{critout}{logical. If \code{TRUE}, the criterion value is printed 41 | out for every number of clusters.} 42 | \item{plot}{logical. If \code{TRUE}, every clustering resulting from a 43 | run of the algorithm is plotted.} 44 | \item{...}{further arguments to be passed on to \code{\link{kmeans}}.} 45 | } 46 | 47 | \value{ 48 | The output of the optimal run of the \code{\link{kmeans}}-function 49 | with added components \code{bestk} and \code{crit}. 50 | A list with components 51 | \item{cluster}{A vector of integers indicating the cluster to which each 52 | point is allocated.} 53 | \item{centers}{A matrix of cluster centers.} 54 | \item{withinss}{The within-cluster sum of squares for each cluster.} 55 | \item{size}{The number of points in each cluster.} 56 | \item{bestk}{The optimal number of clusters.} 57 | \item{crit}{Vector with values of the \code{criterion} for all used numbers of 58 | clusters (0 if number not tried).} 59 | } 60 | \author{Christian Hennig 61 | \email{christian.hennig@unibo.it} 62 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 63 | } 64 | 65 | \references{ 66 | Calinski, T., and Harabasz, J. (1974) A Dendrite Method for Cluster 67 | Analysis, \emph{Communications in Statistics}, 3, 1-27. 68 | 69 | Duda, R. O. and Hart, P. E. (1973) \emph{Pattern Classification and 70 | Scene Analysis}. Wiley, New York. 71 | 72 | Hartigan, J. A. and Wong, M. A. (1979). A K-means clustering 73 | algorithm. \emph{Applied Statistics}, 28, 100-108. 74 | 75 | Kaufman, L. and Rousseeuw, P.J. (1990). "Finding Groups in Data: 76 | An Introduction to Cluster Analysis". Wiley, New York. 77 | } 78 | \seealso{ 79 | \code{\link{kmeans}}, \code{\link{pamk}}, 80 | \code{\link{calinhara}}, \code{\link{dudahart2}}) 81 | } 82 | \examples{ 83 | options(digits=3) 84 | set.seed(20000) 85 | face <- rFace(50,dMoNo=2,dNoEy=0,p=2) 86 | pka <- kmeansruns(face,krange=1:5,critout=TRUE,runs=2,criterion="asw") 87 | pkc <- kmeansruns(face,krange=1:5,critout=TRUE,runs=2,criterion="ch") 88 | } 89 | \keyword{cluster}% at least one, from doc/KEYWORDS 90 | \keyword{multivariate} 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /man/lcmixed.Rd: -------------------------------------------------------------------------------- 1 | \name{lcmixed} 2 | \alias{lcmixed} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{flexmix method for mixed Gaussian/multinomial mixtures} 5 | \description{ 6 | \code{lcmixed} is a method for the 7 | \code{\link[flexmix]{flexmix}}-function in package 8 | \code{flexmix}. It provides the necessary information to run an 9 | EM-algorithm for maximum likelihood estimation for a latent class 10 | mixture (clustering) model where some variables are continuous 11 | and modelled within the mixture components by Gaussian distributions 12 | and some variables are categorical and modelled within components by 13 | independent multinomial distributions. \code{lcmixed} can be called 14 | within \code{flexmix}. The function \code{\link{flexmixedruns}} is a wrapper 15 | function that can be run to apply \code{lcmixed}. 16 | 17 | Note that at least one categorical variable is needed, but it is 18 | possible to use data without continuous variable. 19 | 20 | There are further format restrictions to the data (see below in the 21 | documentation of \code{continuous} and \code{discrete}), which 22 | can be ignored when running \code{lcmixed} through 23 | \code{\link{flexmixedruns}}. 24 | } 25 | \usage{ 26 | lcmixed( formula = .~. , continuous, discrete, ppdim, 27 | diagonal = TRUE, pred.ordinal=FALSE, printlik=FALSE ) 28 | } 29 | %- maybe also `usage' for other objects documented here. 30 | \arguments{ 31 | \item{formula}{a formula to specify response and explanatory 32 | variables. For \code{lcmixed} this always has the form \code{x~1}, 33 | where \code{x} is a matrix or data frome of all variables to be 34 | involved, because regression and explanatory variables are not 35 | implemented.} 36 | \item{continuous}{number of continuous variables. Note that the 37 | continuous variables always need to be the first variables in the 38 | matrix or data frame.} 39 | \item{discrete}{number of categorical variables. Always the last 40 | variables in the matrix or data frame. Note that categorical 41 | variables always must be coded as integers 1,2,3, etc. without 42 | interruption.} 43 | \item{ppdim}{vector of integers specifying the number of (in the data) 44 | existing categories for each categorical variable.} 45 | \item{diagonal}{logical. If \code{TRUE}, Gaussian models are fitted 46 | restricted to diagonal covariance matrices. Otherwise, covariance 47 | matrices are unrestricted. \code{TRUE} is consistent with the 48 | "within class independence" assumption for the multinomial variables.} 49 | \item{pred.ordinal}{logical. If \code{FALSE}, the within-component 50 | predicted value for categorical variables is the probability mode, 51 | otherwise it is the mean of the standard (1,2,3,...) scores, which 52 | may be better for ordinal variables.} 53 | \item{printlik}{logical. If \code{TRUE}, the loglikelihood is printed 54 | out whenever computed.} 55 | } 56 | 57 | \details{ 58 | The data need to be organised case-wise, i.e., if there are 59 | categorical variables only, and 15 cases with values c(1,1,2) on the 60 | 3 variables, the data matrix needs 15 rows with values 1 1 2. 61 | 62 | General documentation on flexmix methods can be found in Chapter 4 of 63 | Friedrich Leisch's "FlexMix: A General Framework for Finite Mixture 64 | Models and Latent Class Regression in R", 65 | \url{https://CRAN.R-project.org/package=flexmix} 66 | } 67 | 68 | \value{ 69 | An object of class \code{FLXMC} (not documented; only used 70 | internally by \code{flexmix}). 71 | } 72 | 73 | \references{ 74 | Hennig, C. and Liao, T. (2013) How to find an appropriate clustering 75 | for mixed-type variables with application to socio-economic 76 | stratification, \emph{Journal of the Royal Statistical Society, Series 77 | C Applied Statistics}, 62, 309-369. 78 | 79 | } 80 | 81 | \author{Christian Hennig 82 | \email{christian.hennig@unibo.it} 83 | \url{https://www.unibo.it/sitoweb/christian.hennig/en}} 84 | 85 | 86 | \seealso{ 87 | \code{\link{flexmixedruns}}, \code{\link[flexmix]{flexmix}}, 88 | \code{\link[flexmix]{flexmix-class}}, 89 | \code{\link{discrete.recode}}, which recodes a dataset into the format 90 | required by \code{lcmixed} 91 | } 92 | 93 | \examples{ 94 | set.seed(112233) 95 | options(digits=3) 96 | require(MASS) 97 | require(flexmix) 98 | data(Cars93) 99 | Cars934 <- Cars93[,c(3,5,8,10)] 100 | cc <- 101 | discrete.recode(Cars934,xvarsorted=FALSE,continuous=c(2,3),discrete=c(1,4)) 102 | fcc <- flexmix(cc$data~1,k=2, 103 | model=lcmixed(continuous=2,discrete=2,ppdim=c(6,3),diagonal=TRUE)) 104 | summary(fcc) 105 | } 106 | \keyword{cluster}% __ONLY ONE__ keyword per line 107 | -------------------------------------------------------------------------------- /man/localshape.Rd: -------------------------------------------------------------------------------- 1 | \name{localshape} 2 | \alias{localshape} 3 | \title{Local shape matrix} 4 | \description{ 5 | This computes a matrix formalising 'local shape', i.e., aggregated 6 | standardised variance/covariance in a Mahalanobis neighbourhood of the data 7 | points. This can be used for finding clusters when used as one of the 8 | covariance matrices in 9 | Invariant Coordinate Selection (function \code{ics} in package 10 | \code{ICS}), see Hennig's 11 | discussion and rejoinder of Tyler et al. (2009). 12 | } 13 | \usage{ 14 | localshape(xdata,proportion=0.1,mscatter="mcd",mcdalpha=0.8, 15 | covstandard="det") 16 | } 17 | \arguments{ 18 | \item{xdata}{objects times variables data matrix.} 19 | \item{proportion}{proportion of points to be considered as neighbourhood.} 20 | \item{mscatter}{"mcd" or "cov"; specified minimum covariance 21 | determinant or 22 | classical covariance matrix to be used for Mahalanobis distance 23 | computation.} 24 | \item{mcdalpha}{if \code{mscatter="mcd"}, this is the alpha parameter 25 | to be used by the MCD covariance matrix, i.e. one minus the 26 | asymptotic breakdown point, see \code{\link[robustbase]{covMcd}}.} 27 | \item{covstandard}{one of "trace", "det" or "none", determining by 28 | what constant the pointwise neighbourhood covariance matrices are 29 | standardised. "det" makes the affine equivariant, as noted in the 30 | discussion rejoinder of Tyler et al. (2009).} 31 | } 32 | 33 | \value{ 34 | The local shape matrix. 35 | } 36 | 37 | \references{ 38 | Tyler, D. E., Critchley, F., Duembgen, L., Oja, H. (2009) 39 | Invariant coordinate selection (with discussion). 40 | \emph{Journal of the Royal Statistical Society, Series B}, 549-592. 41 | } 42 | \author{Christian Hennig 43 | \email{christian.hennig@unibo.it} 44 | \url{https://www.unibo.it/sitoweb/christian.hennig/en}} 45 | \examples{ 46 | options(digits=3) 47 | data(iris) 48 | localshape(iris[,-5],mscatter="cov") 49 | } 50 | \keyword{multivariate} 51 | 52 | -------------------------------------------------------------------------------- /man/mahalanodisc.Rd: -------------------------------------------------------------------------------- 1 | \name{mahalanodisc} 2 | \alias{mahalanodisc} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Mahalanobis for AWC} 5 | \description{ 6 | Vector of Mahalanobis distances or their root. For use in \code{awcoord} only. 7 | } 8 | \usage{ 9 | mahalanodisc(x2, mg, covg, modus="square") 10 | 11 | } 12 | %- maybe also `usage' for other objects documented here. 13 | \arguments{ 14 | \item{x2}{numerical data matrix.} 15 | \item{mg}{mean vector.} 16 | \item{covg}{covariance matrix.} 17 | \item{modus}{"md" (roots of Mahalanobis distances) or "square" 18 | (original squared form of Mahalanobis distances).} 19 | } 20 | \details{ 21 | The covariance matrix 22 | is inverted by use of 23 | \code{\link{solvecov}}, which can be expected to give 24 | reasonable results for singular within-class covariance matrices. 25 | } 26 | % \details{ 27 | % } 28 | \value{ 29 | vector of (rooted) Mahalanobis distances. 30 | } 31 | 32 | \author{Christian Hennig 33 | \email{christian.hennig@unibo.it} 34 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 35 | } 36 | 37 | \seealso{ 38 | \code{\link{awcoord}}, \code{\link{solvecov}} 39 | } 40 | 41 | \examples{ 42 | options(digits=3) 43 | x <- cbind(rnorm(50),rnorm(50)) 44 | mahalanodisc(x,c(0,0),cov(x)) 45 | mahalanodisc(x,c(0,0),matrix(0,ncol=2,nrow=2)) 46 | } 47 | \keyword{multivariate}% at least one, from doc/KEYWORDS 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /man/mahalanofix.Rd: -------------------------------------------------------------------------------- 1 | \name{mahalanofix} 2 | \alias{mahalanofix} 3 | \alias{mahalanofuz} 4 | %- Also NEED an `\alias' for EACH other topic documented here. 5 | \title{Mahalanobis distances from center of indexed points} 6 | \description{ 7 | Computes the vector of (classical or robust) 8 | Mahalanobis distances of all points of \code{x} 9 | to the center of the points indexed (or weighted) 10 | by \code{gv}. The latter also determine 11 | the covariance matrix. 12 | 13 | Thought for use within \code{\link{fixmahal}}. 14 | } 15 | \usage{ 16 | mahalanofix(x, n = nrow(as.matrix(x)), p = ncol(as.matrix(x)), gv = 17 | rep(1, times = n), cmax = 1e+10, method = "ml") 18 | 19 | mahalanofuz(x, n = nrow(as.matrix(x)), p = ncol(as.matrix(x)), 20 | gv = rep(1, times=n), cmax = 1e+10) 21 | } 22 | %- maybe also `usage' for other objects documented here. 23 | \arguments{ 24 | \item{x}{a numerical data matrix, rows are points, columns are variables.} 25 | \item{n}{positive integer. Number of points.} 26 | \item{p}{positive integer. Number of variables.} 27 | \item{gv}{for \code{mahalanofix} 28 | a logical or 0-1 vector of length \code{n}. For \code{mahalanofuz} a 29 | numerical vector with values between 0 and 1.} 30 | \item{cmax}{positive number. used in \code{\link{solvecov}} if 31 | covariance matrix is singular.} 32 | \item{method}{\code{"ml"}, \code{"classical"}, 33 | \code{"mcd"} or \code{"mve"}. Method to compute the covariance 34 | matrix estimator. See \code{\link[MASS]{cov.rob}}, \code{\link{fixmahal}}.} 35 | 36 | } 37 | \details{ 38 | \code{\link{solvecov}} is used to invert the covariance matrix. The methods 39 | \code{"mcd"} and \code{"mve"} in \code{mahalanofix} do not work properly 40 | with point constellations with singular covariance matrices! 41 | } 42 | \value{ 43 | A list of the following components: 44 | \item{md}{vector of Mahalanobis distances.} 45 | \item{mg}{mean of the points indexed by \code{gv}, weighted mean in 46 | \code{mahalanofuz}.} 47 | \item{covg}{covariance matrix of the points indexed by \code{gv}, 48 | weighted covariance matrix in \code{mahalanofuz}.} 49 | \item{covinv}{\code{covg} inverted by \code{\link{solvecov}}.} 50 | \item{coll}{logical. If \code{TRUE}, \code{covg} has been 51 | (numerically) singular.} 52 | } 53 | \author{Christian Hennig 54 | \email{christian.hennig@unibo.it} 55 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 56 | 57 | \note{Methods \code{"mcd"} and \code{"mve"} require library \code{lqs}.} 58 | 59 | \seealso{\code{\link{fixmahal}}, \code{\link{solvecov}}, \code{\link[MASS]{cov.rob}}} 60 | 61 | \examples{ 62 | x <- c(1,2,3,4,5,6,7,8,9,10) 63 | y <- c(1,2,3,8,7,6,5,8,9,10) 64 | mahalanofix(cbind(x,y),gv=c(0,0,0,1,1,1,1,1,0,0)) 65 | mahalanofix(cbind(x,y),gv=c(0,0,0,1,1,1,1,0,0,0)) 66 | mahalanofix(cbind(x,y),gv=c(0,0,0,1,1,1,1,1,0,0),method="mcd") 67 | mahalanofuz(cbind(x,y),gv=c(0,0,0.5,0.5,1,1,1,0.5,0.5,0)) 68 | } 69 | \keyword{multivariate}% at least one, from doc/KEYWORDS 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /man/mahalconf.Rd: -------------------------------------------------------------------------------- 1 | \name{mahalconf} 2 | \alias{mahalconf} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Mahalanobis fixed point clusters initial configuration} 5 | \description{ 6 | Generates an initial configuration of \code{startn} points from 7 | dataset \code{x} for the \code{\link{fixmahal}} 8 | fixed point iteration. 9 | 10 | Thought only for use within \code{\link{fixmahal}}. 11 | } 12 | \usage{ 13 | mahalconf(x, no, startn, covall, plot) 14 | } 15 | %- maybe also `usage' for other objects documented here. 16 | \arguments{ 17 | \item{x}{numerical matrix. Rows are points, columns are variables.} 18 | \item{no}{integer between 1 and \code{nrow(x)}. Number of the first 19 | point of the configuration.} 20 | \item{startn}{integer between 1 and \code{nrow(x)}.} 21 | \item{covall}{covariance matrix for the computation of the first 22 | Mahalanobis distances.} 23 | \item{plot}{a string. If equal to \code{"start"} or \code{"both"},the 24 | first two variables and the first \code{ncol(x)+1} points are plotted.} 25 | } 26 | \details{ 27 | \code{mahalconf} first chooses the \eqn{p} (number of variables) 28 | nearest points to point no. \code{no} in terms of the Mahalanobis 29 | distance w.r.t. \code{covall}, so that there are \eqn{p+1} points. 30 | In every further step, the covariance 31 | matrix of the current configuration is computed and the nearest point 32 | in terms of the new Mahalanobis distance is 33 | added. \code{\link{solvecov}} is used to invert singular covariance 34 | matrices. 35 | } 36 | \value{ 37 | A logical vector of length \code{nrow(x)}. 38 | } 39 | 40 | \author{Christian Hennig 41 | \email{christian.hennig@unibo.it} 42 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 43 | 44 | \seealso{\code{\link{fixmahal}}, \code{\link{solvecov}}} 45 | 46 | \examples{ 47 | set.seed(4634) 48 | face <- rFace(600,dMoNo=2,dNoEy=0,p=2) 49 | mahalconf(face,no=200,startn=20,covall=cov(face),plot="start") 50 | } 51 | \keyword{multivariate}% at least one, from doc/KEYWORDS 52 | \keyword{cluster} 53 | -------------------------------------------------------------------------------- /man/mergeparameters.Rd: -------------------------------------------------------------------------------- 1 | \name{mergeparameters} 2 | \alias{mergeparameters} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{New parameters from merging two Gaussian mixture components} 5 | \description{ 6 | Re-computes pointwise posterior probabilities, mean and covariance 7 | matrix for a mixture component obtained by merging two mixture 8 | components in a Gaussian mixture. 9 | } 10 | \usage{ 11 | mergeparameters(xdata, j1, j2, probs, muarray,Sigmaarray, z) 12 | } 13 | %- maybe also `usage' for other objects documented here. 14 | \arguments{ 15 | \item{xdata}{data (something that can be coerced into a matrix).} 16 | \item{j1}{integer. Number of first mixture component to be merged.} 17 | \item{j2}{integer. Number of second mixture component to be merged.} 18 | \item{probs}{vector of component proportions (for all components; 19 | should sum up to one).} 20 | \item{muarray}{matrix of component means (rows).} 21 | \item{Sigmaarray}{array of component covariance matrices (third 22 | dimension refers to component number).} 23 | \item{z}{matrix of observation- (row-)wise posterior probabilities of 24 | belonging to the components (columns).} 25 | } 26 | 27 | \value{ 28 | List with components 29 | \item{probs}{see above; sum of probabilities for original components 30 | \code{j1} and \code{j2} is now \code{probs[j1]}. Note that generally, 31 | also for the further components, values for the merged component are 32 | in place \code{j1} and values in place \code{j2} are not changed. This 33 | means that in order to have only the information for the new mixture 34 | after merging, the entries in places \code{j2} need to be suppressed.} 35 | \item{muarray}{see above; weighted mean of means of component 36 | \code{j1} and \code{j2} is now in place \code{j1}.} 37 | \item{Sigmaarray}{see above; weighted covariance matrix handled as 38 | above.} 39 | \item{z}{see above; original entries for columns \code{j1} and 40 | \code{j2} are summed up and now in column \code{j1}.} 41 | } 42 | 43 | \references{ 44 | Hennig, C. (2010) Methods for merging Gaussian mixture components, 45 | \emph{Advances in Data Analysis and Classification}, 4, 3-34. 46 | } 47 | \author{Christian Hennig 48 | \email{christian.hennig@unibo.it} 49 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 50 | } 51 | \examples{ 52 | options(digits=3) 53 | set.seed(98765) 54 | require(mclust) 55 | iriss <- iris[sample(150,20),-5] 56 | irisBIC <- mclustBIC(iriss) 57 | siris <- summary(irisBIC,iriss) 58 | probs <- siris$parameters$pro 59 | muarray <- siris$parameters$mean 60 | Sigmaarray <- siris$parameters$variance$sigma 61 | z <- siris$z 62 | mpi <- mergeparameters(iriss,1,2,probs,muarray,Sigmaarray,z) 63 | mpi$probs 64 | mpi$muarray 65 | } 66 | \keyword{multivariate} 67 | \keyword{cluster} 68 | 69 | 70 | -------------------------------------------------------------------------------- /man/minsize.Rd: -------------------------------------------------------------------------------- 1 | \name{minsize} 2 | \alias{minsize} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Minimum size of regression fixed point cluster} 5 | \description{ 6 | Computes the minimum size of a fixed point cluster (FPC) which is 7 | found at least \code{mtf} times with approximated 8 | probability \code{prob} by 9 | \code{ir} fixed point iterations of \code{\link{fixreg}}. 10 | 11 | Thought for use within \code{\link{fixreg}}. 12 | } 13 | \usage{ 14 | minsize(n, p, ir, mtf, prob = 0.5) 15 | } 16 | %- maybe also `usage' for other objects documented here. 17 | \arguments{ 18 | \item{n}{positive integer. Total number of points.} 19 | \item{p}{positive integer. Number of independent variables.} 20 | \item{ir}{positive integer. Number of fixed point iterations.} 21 | \item{mtf}{positive integer.} 22 | \item{prob}{numerical between 0 and 1.} 23 | } 24 | \details{ 25 | The computation is based on the binomial distribution with probability 26 | given by \code{\link{clusexpect}} with \code{ir=1}. 27 | } 28 | \value{ 29 | An integer. 30 | } 31 | \references{ 32 | Hennig, C. (2002) Fixed point clusters for linear regression: 33 | computation and comparison, \emph{Journal of 34 | Classification} 19, 249-276. 35 | } 36 | 37 | \author{Christian Hennig 38 | \email{christian.hennig@unibo.it} 39 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 40 | 41 | \seealso{\code{\link{fixreg}}, \code{\link{clusexpect}}, 42 | \code{\link{itnumber}}} 43 | 44 | \examples{ 45 | minsize(500,4,7000,2) 46 | } 47 | \keyword{univar}% at least one, from doc/KEYWORDS 48 | \keyword{cluster}% __ONLY ONE__ keyword per line 49 | -------------------------------------------------------------------------------- /man/mixdens.Rd: -------------------------------------------------------------------------------- 1 | \name{mixdens} 2 | \alias{mixdens} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Density of multivariate Gaussian mixture, mclust parameterisation} 5 | \description{ 6 | Computes density values for data from a mixture of multivariate Gaussian 7 | distributions with parameters based on the way models are specified 8 | and parameters are stored in package mclust. 9 | } 10 | \usage{ 11 | mixdens(modelName,data,parameters) 12 | } 13 | %- maybe also `usage' for other objects documented here. 14 | \arguments{ 15 | \item{modelName}{an mclust model name. 16 | See \code{\link[mclust]{mclustModelNames}}.} 17 | \item{data}{data matrix; density values are computed for every 18 | observation (row).} 19 | \item{parameters}{parameters of Gaussian mixture in the format used in 20 | the output of \code{\link[mclust]{summary.mclustBIC}}.} 21 | } 22 | 23 | \value{ 24 | Vector of density values for the observations. 25 | } 26 | 27 | \author{Christian Hennig 28 | \email{christian.hennig@unibo.it} 29 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 30 | } 31 | \examples{ 32 | set.seed(98765) 33 | require(mclust) 34 | iriss <- iris[sample(150,20),-5] 35 | irisBIC <- mclustBIC(iriss) 36 | siris <- summary(irisBIC,iriss) 37 | round(mixdens(siris$modelName,iriss,siris$parameters),digits=2) 38 | } 39 | \keyword{cluster}% at least one, from doc/KEYWORDS 40 | \keyword{multivariate} 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /man/mixpredictive.Rd: -------------------------------------------------------------------------------- 1 | \name{mixpredictive} 2 | \alias{mixpredictive} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Prediction strength of merged Gaussian mixture} 5 | \description{ 6 | Computes the prediction strength of clustering by 7 | merging Gaussian mixture components, see \code{\link{mergenormals}}. 8 | The predictive strength is 9 | defined according to Tibshirani and Walther (2005), carried out as 10 | described in Hennig (2010), see details. 11 | } 12 | \usage{ 13 | mixpredictive(xdata, Gcomp, Gmix, M=50, ...) 14 | } 15 | %- maybe also `usage' for other objects documented here. 16 | \arguments{ 17 | \item{xdata}{data (something that can be coerced into a matrix).} 18 | \item{Gcomp}{integer. Number of components of the underlying Gaussian mixture.} 19 | \item{Gmix}{integer. Number of clusters after merging Gaussian components.} 20 | \item{M}{integer. Number of times the dataset is divided into two 21 | halves.} 22 | \item{...}{further arguments that can potentially arrive in calls but 23 | are currently not used.} 24 | } 25 | 26 | \value{ 27 | List with components 28 | \item{predcorr}{vector of length \code{M} with relative frequencies of 29 | correct predictions (clusterwise minimum).} 30 | \item{mean.pred}{mean of \code{predcorr}.} 31 | } 32 | 33 | \details{ 34 | The prediction strength for a certain number of clusters \code{Gmix} under a 35 | random partition of the dataset in halves A and B is defined as 36 | follows. Both halves are clustered with \code{Gmix} 37 | clusters. Then the points of 38 | A are classified to the clusters of B. This is done by use of the 39 | maximum a posteriori rule for mixtures as in Hennig (2010), 40 | differently from Tibshirani and Walther (2005). A pair of points A in 41 | the same A-cluster is defined to be correctly predicted if both points 42 | are classified into the same cluster on B. The same is done with the 43 | points of B relative to the clustering on A. The prediction strength 44 | for each of the clusterings is the minimum (taken over all clusters) 45 | relative frequency of correctly predicted pairs of points of that 46 | cluster. The final mean prediction strength statistic is the mean over 47 | all 2M clusterings. 48 | } 49 | 50 | \references{ 51 | Hennig, C. (2010) Methods for merging Gaussian mixture components, 52 | \emph{Advances in Data Analysis and Classification}, 4, 3-34. 53 | 54 | Tibshirani, R. and Walther, G. (2005) Cluster Validation by 55 | Prediction Strength, \emph{Journal of Computational and Graphical 56 | Statistics}, 14, 511-528. 57 | } 58 | 59 | \author{Christian Hennig 60 | \email{christian.hennig@unibo.it} 61 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 62 | } 63 | 64 | \seealso{ 65 | \code{\link{prediction.strength}} for Tibshirani and Walther's 66 | original method. 67 | \code{\link{mergenormals}} for the clustering method applied here. 68 | } 69 | 70 | \examples{ 71 | set.seed(98765) 72 | iriss <- iris[sample(150,20),-5] 73 | mp <- mixpredictive(iriss,2,2,M=2) 74 | } 75 | \keyword{cluster}% at least one, from doc/KEYWORDS 76 | \keyword{multivariate} 77 | 78 | -------------------------------------------------------------------------------- /man/mvdcoord.Rd: -------------------------------------------------------------------------------- 1 | \name{mvdcoord} 2 | \alias{mvdcoord} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Mean/variance differences discriminant coordinates} 5 | \description{ 6 | Discriminant projections as defined in Young, Marco and Odell (1987). 7 | The principle is to maximize the projection of a matrix consisting of 8 | the differences between the means of all classes and the first mean 9 | and the differences between the covariance matrices of all classes and 10 | the forst covariance matrix. 11 | } 12 | \usage{ 13 | mvdcoord(xd, clvecd, clnum=1, sphere="mcd", ...) 14 | } 15 | %- maybe also `usage' for other objects documented here. 16 | \arguments{ 17 | \item{xd}{the data matrix; a numerical object which can be coerced 18 | to a matrix.} 19 | \item{clvecd}{integer vector of class numbers; length must equal 20 | \code{nrow(xd)}.} 21 | \item{clnum}{integer. Number of the class to which all differences are 22 | computed.} 23 | \item{sphere}{a covariance matrix or one of 24 | "mve", "mcd", "classical", "none". The matrix used for sphering the 25 | data. "mcd" and "mve" are robust covariance matrices as implemented 26 | in \code{\link[MASS]{cov.rob}}. "classical" refers to the classical 27 | covariance matrix. "none" means no sphering and use of the raw 28 | data.} 29 | \item{...}{no effect} 30 | } 31 | % \details{ 32 | % } 33 | \value{ 34 | List with the following components 35 | \item{ev}{eigenvalues in descending order.} 36 | \item{units}{columns are coordinates of projection basis vectors. 37 | New points \code{x} can be projected onto the projection basis vectors 38 | by \code{x \%*\% units}} 39 | \item{proj}{projections of \code{xd} onto \code{units}.} 40 | } 41 | \references{ 42 | Young, D. M., Marco, V. R. and Odell, P. L. (1987). Quadratic 43 | discrimination: some results on optimal low-dimensional 44 | representation, \emph{Journal of Statistical Planning and Inference}, 45 | 17, 307-319. 46 | } 47 | \author{Christian Hennig 48 | \email{christian.hennig@unibo.it} 49 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 50 | } 51 | 52 | \seealso{ 53 | \code{\link{plotcluster}} for straight forward discriminant plots. 54 | \code{\link{discrproj}} for alternatives. 55 | \code{\link{rFace}} for generation of the example data used below. 56 | } 57 | 58 | \examples{ 59 | set.seed(4634) 60 | face <- rFace(300,dMoNo=2,dNoEy=0,p=3) 61 | grface <- as.integer(attr(face,"grouping")) 62 | mcf <- mvdcoord(face,grface) 63 | plot(mcf$proj,col=grface) 64 | # ...done in one step by function plotcluster. 65 | } 66 | \keyword{multivariate}% at least one, from doc/KEYWORDS 67 | \keyword{classif}% __ONLY ONE__ keyword per line 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /man/ncoord.Rd: -------------------------------------------------------------------------------- 1 | \name{ncoord} 2 | \alias{ncoord} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Neighborhood based discriminant coordinates} 5 | \description{ 6 | Neighborhood based discriminant coordinates as defined in Hastie and 7 | Tibshirani (1996) and a robustified version as defined in Hennig (2003). 8 | The principle is to maximize the projection of a between 9 | classes covariance matrix, which is defined by averaging the 10 | between classes covariance matrices in the neighborhoods of all points. 11 | } 12 | \usage{ 13 | ncoord(xd, clvecd, nn=50, weighted=FALSE, 14 | sphere="mcd", orderall=TRUE, countmode=1000, ...) 15 | } 16 | %- maybe also `usage' for other objects documented here. 17 | \arguments{ 18 | \item{xd}{the data matrix; a numerical object which can be coerced 19 | to a matrix.} 20 | \item{clvecd}{integer vector of class numbers; length must equal 21 | \code{nrow(xd)}.} 22 | \item{nn}{integer. Number of points which belong to the neighborhood 23 | of each point (including the point itself).} 24 | \item{weighted}{logical. \code{FALSE} corresponds to the original 25 | method of Hastie and Tibshirani (1996). If \code{TRUE}, 26 | the between classes 27 | covariance matrices B are weighted by w/trace B, where w is some 28 | weight depending on the sizes of the 29 | classes in the neighborhood. Division by trace B reduces the effect 30 | of outliers. \code{TRUE} cooresponds to WNC as defined in Hennig 31 | (2003).} 32 | \item{sphere}{a covariance matrix or one of 33 | "mve", "mcd", "classical", "none". The matrix used for sphering the 34 | data. "mcd" and "mve" are robust covariance matrices as implemented 35 | in \code{\link[MASS]{cov.rob}}. "classical" refers to the classical 36 | covariance matrix. "none" means no sphering and use of the raw 37 | data.} 38 | \item{orderall}{logical. By default, the neighborhoods are computed by 39 | ordering all points each time. If \code{FALSE}, the neighborhoods 40 | are computed by selecting \code{nn} times the nearest point from the 41 | remaining points, which may be faster sometimes.} 42 | \item{countmode}{optional positive integer. Every \code{countmode} 43 | algorithm runs \code{ncoord} shows a message.} 44 | \item{...}{no effect} 45 | } 46 | % \details{ 47 | % } 48 | \value{ 49 | List with the following components 50 | \item{ev}{eigenvalues in descending order.} 51 | \item{units}{columns are coordinates of projection basis vectors. 52 | New points \code{x} can be projected onto the projection basis vectors 53 | by \code{x \%*\% units}} 54 | \item{proj}{projections of \code{xd} onto \code{units}.} 55 | } 56 | \references{ 57 | Hastie, T. and Tibshirani, R. (1996). Discriminant adaptive nearest 58 | neighbor classification. \emph{IEEE Transactions on Pattern Analysis 59 | and Machine Intelligence} 18, 607-616. 60 | 61 | Hennig, C. (2004) Asymmetric linear dimension reduction for classification. 62 | Journal of Computational and Graphical Statistics 13, 930-945 . 63 | 64 | Hennig, C. (2005) A method for visual cluster validation. In: 65 | Weihs, C. and Gaul, W. (eds.): Classification - The Ubiquitous 66 | Challenge. Springer, Heidelberg 2005, 153-160. 67 | } 68 | \author{Christian Hennig 69 | \email{christian.hennig@unibo.it} 70 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 71 | } 72 | 73 | \seealso{ 74 | \code{\link{plotcluster}} for straight forward discriminant plots. 75 | \code{\link{discrproj}} for alternatives. 76 | \code{\link{rFace}} for generation of the example data used below. 77 | } 78 | 79 | \examples{ 80 | set.seed(4634) 81 | face <- rFace(600,dMoNo=2,dNoEy=0) 82 | grface <- as.integer(attr(face,"grouping")) 83 | ncf <- ncoord(face,grface) 84 | plot(ncf$proj,col=grface) 85 | ncf2 <- ncoord(face,grface,weighted=TRUE) 86 | plot(ncf2$proj,col=grface) 87 | # ...done in one step by function plotcluster. 88 | } 89 | \keyword{multivariate}% at least one, from doc/KEYWORDS 90 | \keyword{classif}% __ONLY ONE__ keyword per line 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /man/neginc.Rd: -------------------------------------------------------------------------------- 1 | \name{neginc} 2 | \alias{neginc} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Neg-entropy normality index for cluster validation} 5 | \description{ 6 | Cluster validity index based on the neg-entropy distances of 7 | within-cluster distributions to normal distribution, see 8 | Lago-Fernandez and Corbacho (2010). 9 | } 10 | \usage{ 11 | neginc(x,clustering) 12 | } 13 | %- maybe also `usage' for other objects documented here. 14 | \arguments{ 15 | \item{x}{something that can be coerced into a numerical 16 | matrix. Euclidean dataset.} 17 | \item{clustering}{vector of integers with length \code{=nrow(x)}; 18 | indicating the cluster for each observation.} 19 | } 20 | 21 | \value{ 22 | Index value, see 23 | Lago-Fernandez and Corbacho (2010). The lower (i.e., the more 24 | negative) the better. 25 | } 26 | 27 | \references{ 28 | Lago-Fernandez, L. F. and Corbacho, F. (2010) Normality-based 29 | validation for crisp clustering. \emph{Pattern Recognition} 43, 782-795. 30 | } 31 | 32 | \author{Christian Hennig 33 | \email{christian.hennig@unibo.it} 34 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 35 | } 36 | \examples{ 37 | options(digits=3) 38 | iriss <- as.matrix(iris[c(1:10,51:55,101:105),-5]) 39 | irisc <- as.numeric(iris[c(1:10,51:55,101:105),5]) 40 | neginc(iriss,irisc) 41 | } 42 | \keyword{cluster}% at least one, from doc/KEYWORDS 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /man/nselectboot.Rd: -------------------------------------------------------------------------------- 1 | \name{nselectboot} 2 | \alias{nselectboot} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Selection of the number of clusters via bootstrap} 5 | \description{ 6 | Selection of the number of clusters via bootstrap as explained in Fang 7 | and Wang (2012). Several times 2 bootstrap samples are drawn from the 8 | data and the number of clusters is chosen by optimising an instability 9 | estimation from these pairs. 10 | 11 | In principle all clustering methods can be used that have a 12 | CBI-wrapper, see \code{\link{clusterboot}}, 13 | \code{\link{kmeansCBI}}. However, the currently implemented 14 | classification methods are not necessarily suitable for all of them, 15 | see argument \code{classification}. 16 | } 17 | \usage{ 18 | nselectboot(data,B=50,distances=inherits(data,"dist"), 19 | clustermethod=NULL, 20 | classification="averagedist",centroidname = NULL, 21 | krange=2:10, count=FALSE,nnk=1, 22 | largeisgood=FALSE,...) 23 | } 24 | 25 | %- maybe also `usage' for other objects documented here. 26 | \arguments{ 27 | \item{data}{something that can be coerced into a matrix. The data 28 | matrix - either an \code{n*p}-data matrix (or data frame) or an 29 | \code{n*n}-dissimilarity matrix (or \code{dist}-object).} 30 | \item{B}{integer. Number of resampling runs.} 31 | \item{distances}{logical. If \code{TRUE}, the data is interpreted as 32 | dissimilarity matrix. If \code{data} is a \code{dist}-object, 33 | \code{distances=TRUE} automatically, otherwise 34 | \code{distances=FALSE} by default. This means that you have to set 35 | it to \code{TRUE} manually if \code{data} is a dissimilarity matrix.} 36 | \item{clustermethod}{an interface function (the function name, not a 37 | string containing the name, has to be provided!). This defines the 38 | clustering method. See the "Details"-section of \code{\link{clusterboot}} 39 | and \code{\link{kmeansCBI}} for the format. Clustering methods for 40 | \code{nselectboot} must have a \code{k}-argument for the number of 41 | clusters and must otherwise follow the specifications in 42 | \code{\link{clusterboot}}. Note that \code{nselectboot} won't work 43 | with CBI-functions that implicitly already estimate the number of 44 | clusters such as \code{\link{pamkCBI}}; use \code{\link{claraCBI}} 45 | if you want to run it for pam/clara clustering. 46 | } 47 | \item{classification}{string. 48 | This determines how non-clustered points are classified to given 49 | clusters. Options are explained in \code{\link{classifdist}} (if 50 | \code{distances=TRUE}) and \code{\link{classifnp}} (otherwise). 51 | Certain classification methods are connected to certain clustering 52 | methods. \code{classification="averagedist"} is recommended for 53 | average linkage, \code{classification="centroid"} is recommended for 54 | k-means, clara and pam (with distances it will work with 55 | \code{\link{claraCBI}} only), \code{classification="knn"} with 56 | \code{nnk=1} is recommended for single linkage and 57 | \code{classification="qda"} is recommended for Gaussian mixtures 58 | with flexible covariance matrices. 59 | } 60 | \item{centroidname}{string. Indicates the name of the component of 61 | \code{CBIoutput$result} that contains the cluster centroids in case of 62 | \code{classification="centroid"}, where \code{CBIoutput} is the 63 | output object of \code{clustermethod}. If \code{clustermethod} is 64 | \code{kmeansCBI} or \code{claraCBI}, centroids are recognised 65 | automatically if \code{centroidname=NULL}. If 66 | \code{centroidname=NULL} and \code{distances=FALSE}, cluster means 67 | are computed as the cluster centroids.} 68 | \item{krange}{integer vector; numbers of clusters to be tried.} 69 | \item{count}{logical. If \code{TRUE}, numbers of clusters and 70 | bootstrap runs are printed.} 71 | \item{nnk}{number of nearest neighbours if 72 | \code{classification="knn"}, see \code{\link{classifdist}} (if 73 | \code{distances=TRUE}) and \code{\link{classifnp}} (otherwise).} 74 | \item{largeisgood}{logical. If \code{TRUE}, output component 75 | \code{stabk} is taken as one minus the original instability value 76 | so that larger values of \code{stabk} are better.} 77 | \item{...}{arguments to be passed on to the clustering method.} 78 | } 79 | 80 | \value{ 81 | \code{nselectboot} returns a list with components 82 | \code{kopt,stabk,stab}. 83 | \item{kopt}{optimal number of clusters.} 84 | \item{stabk}{mean instability values for numbers of clusters (or one 85 | minus this if \code{largeisgood=TRUE}).} 86 | \item{stab}{matrix of instability values for all bootstrap runs and 87 | numbers of clusters.} 88 | } 89 | \references{ 90 | Fang, Y. and Wang, J. (2012) Selection of the number of clusters via 91 | the bootstrap method. \emph{Computational Statistics and Data 92 | Analysis}, 56, 468-477. 93 | } 94 | \author{Christian Hennig 95 | \email{christian.hennig@unibo.it} 96 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 97 | } 98 | \seealso{ 99 | \code{\link{classifdist}}, \code{\link{classifnp}}, 100 | \code{\link{clusterboot}},\code{\link{kmeansCBI}} 101 | } 102 | \examples{ 103 | set.seed(20000) 104 | face <- rFace(50,dMoNo=2,dNoEy=0,p=2) 105 | nselectboot(dist(face),B=2,clustermethod=disthclustCBI, 106 | method="average",krange=5:7) 107 | nselectboot(dist(face),B=2,clustermethod=claraCBI, 108 | classification="centroid",krange=5:7) 109 | nselectboot(face,B=2,clustermethod=kmeansCBI, 110 | classification="centroid",krange=5:7) 111 | # Of course use larger B in a real application. 112 | } 113 | \keyword{cluster}% at least one, from doc/KEYWORDS 114 | \keyword{multivariate} 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /man/pamk.Rd: -------------------------------------------------------------------------------- 1 | \name{pamk} 2 | \alias{pamk} 3 | 4 | %- Also NEED an `\alias' for EACH other topic documented here. 5 | \title{Partitioning around medoids with estimation of number of clusters} 6 | \description{ 7 | This calls the function \code{\link[cluster]{pam}} or 8 | \code{\link[cluster]{clara}} to perform a 9 | partitioning around medoids clustering with the number of clusters 10 | estimated by optimum average silhouette width (see 11 | \code{\link[cluster]{pam.object}}) or Calinski-Harabasz 12 | index (\code{\link{calinhara}}). The Duda-Hart test 13 | (\code{\link{dudahart2}}) is applied to decide whether there should be 14 | more than one cluster (unless 1 is excluded as number of clusters or 15 | data are dissimilarities). 16 | } 17 | \usage{ 18 | pamk(data,krange=2:10,criterion="asw", usepam=TRUE, 19 | scaling=FALSE, alpha=0.001, diss=inherits(data, "dist"), 20 | critout=FALSE, ns=10, seed=NULL, ...) 21 | } 22 | \arguments{ 23 | \item{data}{a data matrix or data frame or something that can be 24 | coerced into a matrix, or dissimilarity matrix or 25 | object. See \code{\link[cluster]{pam}} for more information.} 26 | \item{krange}{integer vector. Numbers of clusters which are to be 27 | compared by the average silhouette width criterion. Note: average 28 | silhouette width and Calinski-Harabasz can't estimate number of 29 | clusters \code{nc=1}. If 1 is included, a Duda-Hart test is applied 30 | and 1 is estimated if this is not significant.} 31 | \item{criterion}{one of \code{"asw"}, \code{"multiasw"} or 32 | \code{"ch"}. Determines whether average silhouette width (as given 33 | out by \code{\link[cluster]{pam}}/\code{\link[cluster]{clara}}, or 34 | as computed by \code{\link{distcritmulti}} if \code{"multiasw"} is 35 | specified; recommended for large data sets with \code{usepam=FALSE}) 36 | or Calinski-Harabasz is applied. Note that the original 37 | Calinski-Harabasz index is not defined for dissimilarities; if 38 | dissimilarity data is run with \code{criterion="ch"}, the 39 | dissimilarity-based generalisation in Hennig and Liao (2013) is 40 | used.} 41 | \item{usepam}{logical. If \code{TRUE}, \code{\link[cluster]{pam}} is 42 | used, otherwise \code{\link[cluster]{clara}} (recommended for large 43 | datasets with 2,000 or more observations; dissimilarity matrices can 44 | not be used with \code{\link[cluster]{clara}}).} 45 | \item{scaling}{either a logical value or a numeric vector of length 46 | equal to the number of variables. If \code{scaling} is a numeric 47 | vector with length equal to the number of variables, then each 48 | variable is divided by the corresponding value from \code{scaling}. 49 | If \code{scaling} is \code{TRUE} then scaling is done by dividing 50 | the (centered) variables by their root-mean-square, and if 51 | \code{scaling} is \code{FALSE}, no scaling is done.} 52 | \item{alpha}{numeric between 0 and 1, tuning constant for 53 | \code{\link{dudahart2}} (only used for 1-cluster test).} 54 | \item{diss}{logical flag: if \code{TRUE} (default for \code{dist} or 55 | \code{dissimilarity}-objects), then \code{data} will be considered 56 | as a dissimilarity matrix (and the potential number of clusters 1 57 | will be ignored). If \code{FALSE}, then \code{data} will 58 | be considered as a matrix of observations by variables.} 59 | \item{critout}{logical. If \code{TRUE}, the criterion value is printed 60 | out for every number of clusters.} 61 | \item{ns}{passed on to \code{\link{distcritmulti}} if 62 | \code{criterion="multiasw"}.} 63 | \item{seed}{passed on to \code{\link{distcritmulti}} if 64 | \code{criterion="multiasw"}.} 65 | \item{...}{further arguments to be transferred to 66 | \code{\link[cluster]{pam}} or \code{\link[cluster]{clara}}.} 67 | } 68 | 69 | \note{ 70 | \code{\link[cluster]{clara}} and \code{\link[cluster]{pam}} 71 | can handle \code{NA}-entries (see their documentation) but 72 | \code{\link{dudahart2}} cannot. Therefore \code{NA} should not occur 73 | if 1 is in \code{krange}. 74 | } 75 | 76 | \value{ 77 | A list with components 78 | \item{pamobject}{The output of the optimal run of the 79 | \code{\link[cluster]{pam}}-function.} 80 | \item{nc}{the optimal number of clusters.} 81 | \item{crit}{vector of criterion values for numbers of 82 | clusters. \code{crit[1]} is the p-value of the Duda-Hart test 83 | if 1 is in \code{krange} and \code{diss=FALSE}.} 84 | } 85 | \author{Christian Hennig 86 | \email{christian.hennig@unibo.it} 87 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 88 | } 89 | \references{ 90 | Calinski, R. B., and Harabasz, J. (1974) A Dendrite Method for Cluster 91 | Analysis, \emph{Communications in Statistics}, 3, 1-27. 92 | 93 | Duda, R. O. and Hart, P. E. (1973) \emph{Pattern Classification and 94 | Scene Analysis}. Wiley, New York. 95 | 96 | Hennig, C. and Liao, T. (2013) How to find an appropriate clustering 97 | for mixed-type variables with application to socio-economic 98 | stratification, \emph{Journal of the Royal Statistical Society, Series 99 | C Applied Statistics}, 62, 309-369. 100 | 101 | Kaufman, L. and Rousseeuw, P.J. (1990). "Finding Groups in Data: 102 | An Introduction to Cluster Analysis". Wiley, New York. 103 | } 104 | \seealso{ 105 | \code{\link[cluster]{pam}}, \code{\link[cluster]{clara}} 106 | \code{\link{distcritmulti}} 107 | } 108 | \examples{ 109 | options(digits=3) 110 | set.seed(20000) 111 | face <- rFace(50,dMoNo=2,dNoEy=0,p=2) 112 | pk1 <- pamk(face,krange=1:5,criterion="asw",critout=TRUE) 113 | pk2 <- pamk(face,krange=1:5,criterion="multiasw",ns=2,critout=TRUE) 114 | # "multiasw" is better for larger data sets, use larger ns then. 115 | pk3 <- pamk(face,krange=1:5,criterion="ch",critout=TRUE) 116 | } 117 | \keyword{cluster}% at least one, from doc/KEYWORDS 118 | \keyword{multivariate} 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /man/piridge.Rd: -------------------------------------------------------------------------------- 1 | \name{piridge} 2 | \alias{piridge} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Ridgeline Pi-function} 5 | \description{ 6 | The Pi-function is given in (6) in Ray and Lindsay, 2005. Equating it 7 | to the mixture proportion yields locations of two-component Gaussian 8 | mixture density extrema. 9 | } 10 | \usage{ 11 | piridge(alpha, mu1, mu2, Sigma1, Sigma2, showplot=FALSE) 12 | } 13 | %- maybe also `usage' for other objects documented here. 14 | \arguments{ 15 | \item{alpha}{sequence of values between 0 and 1 for which the Pi-function 16 | is computed.} 17 | \item{mu1}{mean vector of component 1.} 18 | \item{mu2}{mean vector of component 2.} 19 | \item{Sigma1}{covariance matrix of component 1.} 20 | \item{Sigma2}{covariance matrix of component 2.} 21 | \item{showplot}{logical. If \code{TRUE}, the Pi-function is plotted 22 | against \code{alpha}.} 23 | } 24 | 25 | \value{ 26 | Vector of values of the Pi-function for values of \code{alpha}. 27 | } 28 | 29 | \references{ 30 | Ray, S. and Lindsay, B. G. (2005) The Topography of Multivariate 31 | Normal Mixtures, \emph{Annals of Statistics}, 33, 2042-2065. 32 | } 33 | \author{Christian Hennig 34 | \email{christian.hennig@unibo.it} 35 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 36 | } 37 | \examples{ 38 | q <- piridge(seq(0,1,0.1),c(1,1),c(2,5),diag(2),diag(2)) 39 | } 40 | \keyword{cluster}% at least one, from doc/KEYWORDS 41 | \keyword{multivariate} 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /man/piridge.zeroes.Rd: -------------------------------------------------------------------------------- 1 | \name{piridge.zeroes} 2 | \alias{piridge.zeroes} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Extrema of two-component Gaussian mixture} 5 | \description{ 6 | By use of the Pi-function in Ray and Lindsay, 2005, locations of 7 | two-component Gaussian mixture density extrema or saddlepoints are computed. 8 | } 9 | \usage{ 10 | piridge.zeroes(prop, mu1, mu2, Sigma1, Sigma2, alphamin=0, 11 | alphamax=1,by=0.001) 12 | } 13 | %- maybe also `usage' for other objects documented here. 14 | \arguments{ 15 | \item{prop}{proportion of mixture component 1.} 16 | \item{mu1}{mean vector of component 1.} 17 | \item{mu2}{mean vector of component 2.} 18 | \item{Sigma1}{covariance matrix of component 1.} 19 | \item{Sigma2}{covariance matrix of component 2.} 20 | \item{alphamin}{minimum alpha value.} 21 | \item{alphamax}{maximum alpha value.} 22 | \item{by}{interval between alpha-values where to look for extrema.} 23 | } 24 | 25 | \value{ 26 | list with components 27 | \item{number.zeroes}{number of zeroes of Pi-function, i.e., 28 | extrema or saddlepoints of density.} 29 | \item{estimated.roots}{estimated \code{alpha}-values at which extrema 30 | or saddlepoints occur.} 31 | } 32 | 33 | \references{ 34 | Ray, S. and Lindsay, B. G. (2005) The Topography of Multivariate 35 | Normal Mixtures, \emph{Annals of Statistics}, 33, 2042-2065. 36 | } 37 | \author{Christian Hennig 38 | \email{christian.hennig@unibo.it} 39 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 40 | } 41 | \examples{ 42 | q <- piridge.zeroes(0.2,c(1,1),c(2,5),diag(2),diag(2),by=0.1) 43 | } 44 | \keyword{cluster}% at least one, from doc/KEYWORDS 45 | \keyword{multivariate} 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /man/plotcluster.Rd: -------------------------------------------------------------------------------- 1 | \name{plotcluster} 2 | \alias{plotcluster} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Discriminant projection plot.} 5 | \description{ 6 | Plots to distinguish given classes by ten available projection 7 | methods. Includes classical discriminant 8 | coordinates, methods to project differences in 9 | mean and covariance structure, asymmetric methods (separation of a 10 | homogeneous class from a heterogeneous one), local neighborhood-based 11 | methods and methods based on robust covariance matrices. 12 | One-dimensional data is plotted against the cluster number. 13 | } 14 | \usage{ 15 | plotcluster(x, clvecd, clnum=NULL, 16 | method=ifelse(is.null(clnum),"dc","awc"), 17 | bw=FALSE, 18 | ignorepoints=FALSE, ignorenum=0, pointsbyclvecd=TRUE, 19 | xlab=NULL, ylab=NULL, 20 | pch=NULL, col=NULL, ...) 21 | } 22 | %- maybe also `usage' for other objects documented here. 23 | \arguments{ 24 | \item{x}{the data matrix; a numerical object which can be coerced 25 | to a matrix.} 26 | \item{clvecd}{vector of class numbers which can be coerced into 27 | integers; length must equal 28 | \code{nrow(xd)}.} 29 | \item{method}{one of 30 | \describe{ 31 | \item{"dc"}{usual discriminant coordinates, see \code{\link{discrcoord}},} 32 | \item{"bc"}{Bhattacharyya coordinates, first coordinate showing 33 | mean differences, second showing covariance matrix differences, 34 | see \code{\link{batcoord}},} 35 | \item{"vbc"}{variance dominated Bhattacharyya coordinates, 36 | see \code{\link{batcoord}},} 37 | \item{"mvdc"}{added mean and variance differences optimizing 38 | coordinates, see \code{\link{mvdcoord}},} 39 | \item{"adc"}{asymmetric discriminant coordinates, see 40 | \code{\link{adcoord}},} 41 | \item{"awc"}{asymmetric discriminant coordinates with weighted 42 | observations, see \code{\link{awcoord}},} 43 | \item{"arc"}{asymmetric discriminant coordinates with weighted 44 | observations and robust MCD-covariance matrix, 45 | see \code{\link{awcoord}},} 46 | \item{"nc"}{neighborhood based coordinates, 47 | see \code{\link{ncoord}},} 48 | \item{"wnc"}{neighborhood based coordinates with weighted neighborhoods, 49 | see \code{\link{ncoord}},} 50 | \item{"anc"}{asymmetric neighborhood based coordinates, 51 | see \code{\link{ancoord}}.} 52 | } 53 | Note that "bc", "vbc", "adc", "awc", "arc" and "anc" assume that 54 | there are only two classes.} 55 | \item{clnum}{integer. Number of the class which is attempted to plot 56 | homogeneously by "asymmetric methods", which are the methods 57 | assuming that there are only two classes, as indicated above. 58 | \code{clnum} is ignored for methods "dc" and "nc".} 59 | \item{bw}{logical. If \code{TRUE}, the classes are distinguished by 60 | symbols, and the default color is black/white. 61 | If \code{FALSE}, the classes are distinguished by 62 | colors, and the default symbol is \code{pch=1}.} 63 | \item{ignorepoints}{logical. If \code{TRUE}, points with label 64 | \code{ignorenum} in \code{clvecd} are ignored in the computation for 65 | \code{method} and are only projected afterwards onto the resulting 66 | units. If \code{pch=NULL}, the plot symbol for these points is "N".} 67 | \item{ignorenum}{one of the potential values of the components of 68 | \code{clvecd}. Only has effect if \code{ignorepoints=TRUE}, see above.} 69 | \item{pointsbyclvecd}{logical. If \code{TRUE} and \code{pch=NULL} 70 | and/or \code{col=NULL}, some hopefully suitable 71 | plot symbols (numbers and letters) and colors are chosen to 72 | distinguish the values of \code{clvecd}, starting with "1"/"black" 73 | for the cluster with the smallest \code{clvecd}-code (note that 74 | colors for clusters with numbers larger than minimum number 75 | \code{+3} are drawn at random from all available colors). 76 | \code{FALSE} produces 77 | potentially less reasonable (but nonrandom) standard colors and symbols if 78 | \code{method} is "dc" or "nc", and will only distinguish whether 79 | \code{clvecd=clnum} or not for the other methods.} 80 | \item{xlab}{label for x-axis. If \code{NULL}, a default text is used.} 81 | \item{ylab}{label for y-axis. If \code{NULL}, a default text is used.} 82 | \item{pch}{plotting symbol, see \code{\link{par}}. 83 | If \code{NULL}, the default is used.} 84 | \item{col}{plotting color, see \code{\link{par}}. 85 | If \code{NULL}, the default is used.} 86 | \item{...}{additional parameters passed to \code{plot} or the 87 | projection methods.} 88 | } 89 | % \details{ 90 | 91 | % } 92 | \note{ 93 | For some of the asymmetric methods, the area in the plot 94 | occupied by the "homogeneous class" (see \code{clnum} above) may be 95 | very small, and it may make sense to run \code{plotcluster} a second 96 | time specifying plot parameters \code{xlim} and \code{ylim} in a 97 | suitable way. It often makes sense to magnify the plot region 98 | containing the homogeneous class in this way 99 | so that its separation from the rest can be 100 | seen more clearly. 101 | } 102 | 103 | 104 | \references{ 105 | Hennig, C. (2004) Asymmetric linear dimension reduction for classification. 106 | Journal of Computational and Graphical Statistics 13, 930-945 . 107 | 108 | Hennig, C. (2005) A method for visual cluster validation. In: 109 | Weihs, C. and Gaul, W. (eds.): Classification - The Ubiquitous 110 | Challenge. Springer, Heidelberg 2005, 153-160. 111 | 112 | Seber, G. A. F. (1984). \emph{Multivariate Observations}. New York: Wiley. 113 | 114 | Fukunaga (1990). \emph{Introduction to Statistical Pattern 115 | Recognition} (2nd ed.). Boston: Academic Press. 116 | } 117 | \author{Christian Hennig 118 | \email{christian.hennig@unibo.it} 119 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 120 | 121 | \seealso{ 122 | \code{\link{discrcoord}}, \code{\link{batcoord}}, 123 | \code{\link{mvdcoord}}, \code{\link{adcoord}}, 124 | \code{\link{awcoord}}, \code{\link{ncoord}}, 125 | \code{\link{ancoord}}. 126 | 127 | \code{\link{discrproj}} is an interface to all these projection methods. 128 | 129 | \code{\link{rFace}} for generation of the example data used below. 130 | } 131 | 132 | \examples{ 133 | set.seed(4634) 134 | face <- rFace(300,dMoNo=2,dNoEy=0) 135 | grface <- as.integer(attr(face,"grouping")) 136 | plotcluster(face,grface) 137 | plotcluster(face,grface==1) 138 | plotcluster(face,grface, clnum=1, method="vbc") 139 | } 140 | \keyword{multivariate}% at least one, from doc/KEYWORDS 141 | \keyword{classif}% __ONLY ONE__ keyword per line 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /man/rFace.Rd: -------------------------------------------------------------------------------- 1 | \name{rFace} 2 | \alias{rFace} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{"Face-shaped" clustered benchmark datasets} 5 | \description{ 6 | Generates "face-shaped" clustered benchmark datasets. 7 | This is based on a collaboration with Martin Maechler. 8 | } 9 | \usage{ 10 | rFace(n, p = 6, nrep.top = 2, smile.coef = 0.6, dMoNo = 1.2, dNoEy = 1) 11 | } 12 | %- maybe also `usage' for other objects documented here. 13 | \arguments{ 14 | \item{n}{integer greater or equal to 10. Number of points.} 15 | \item{p}{integer greater or equal to 2. Dimension.} 16 | \item{nrep.top}{integer. Number of repetitions of the hair-top point.} 17 | \item{smile.coef}{numeric. Coefficient for quadratic term used for 18 | generation of mouth-points. Positive values=>smile.} 19 | \item{dMoNo}{number. Distance from mouth to nose.} 20 | \item{dNoEy}{number. Minimum vertical distance from mouth to eyes.} 21 | } 22 | \details{ 23 | The function generates a nice benchmark example for cluster 24 | analysis. 25 | There are six "clusters" in this data, of which the first five are 26 | clearly homogeneous patterns, but with different distributional 27 | shapes and different qualities of separation. The clusters are 28 | distinguished only in the first two dimensions. The attribute 29 | \code{grouping} is a factor giving the cluster numbers, see below. 30 | The sixth group of 31 | points corresponds to some hairs, and is rather a collection of 32 | outliers than a cluster in itself. This group contains 33 | \code{nrep.top+2} points. Of the remaining points, 20\% belong to 34 | cluster 1, the chin (quadratic function plus noise). 35 | 10\% belong to cluster 2, the right eye (Gaussian). 30\% belong to 36 | cluster 3, the mouth (Gaussian/squared Gaussian). 37 | 20\% belong to cluster 4, the nose (Gaussian/gamma), and 38 | 20\% belong to cluster 5, the left eye (uniform). 39 | 40 | The distributions of the further 41 | variables are homogeneous over 42 | all points. The third dimension is exponentially distributed, the 43 | fourth dimension is Cauchy distributed, all further distributions are 44 | Gaussian. 45 | 46 | Please consider the source code for exact generation of the clusters. 47 | } 48 | \value{ 49 | An \code{n} times \code{p} numeric matrix with attributes 50 | \item{grouping}{a factor giving the cluster memberships of the points.} 51 | \item{indexlist}{a list of six vectors containing the indices of points 52 | belonging to the six groups.} 53 | } 54 | 55 | \author{ 56 | 57 | Christian Hennig 58 | \email{christian.hennig@unibo.it} 59 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 60 | 61 | \examples{ 62 | set.seed(4634) 63 | face <- rFace(600,dMoNo=2,dNoEy=0) 64 | grface <- as.integer(attr(face,"grouping")) 65 | plot(face, col = grface) 66 | # pairs(face, col = grface, main ="rFace(600,dMoNo=2,dNoEy=0)") 67 | } 68 | \keyword{data}% at least one, from doc/KEYWORDS 69 | -------------------------------------------------------------------------------- /man/randcmatrix.Rd: -------------------------------------------------------------------------------- 1 | \name{randcmatrix} 2 | \alias{randcmatrix} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Random partition matrix} 5 | \description{ 6 | For use within \code{regmix}. Generates a random 7 | 0-1-matrix with \code{n} rows 8 | and \code{cln} columns so that every row contains exactly one one and 9 | every columns contains at least \code{p+3} ones. 10 | } 11 | \usage{ 12 | randcmatrix(n,cln,p) 13 | } 14 | %- maybe also `usage' for other objects documented here. 15 | \arguments{ 16 | \item{n}{positive integer. Number of rows.} 17 | \item{cln}{positive integer. Number of columns.} 18 | \item{p}{positive integer. See above.} 19 | } 20 | \value{ 21 | An \code{n*cln}-matrix. 22 | } 23 | \author{Christian Hennig 24 | \email{christian.hennig@unibo.it} 25 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 26 | \seealso{ 27 | \code{\link{regmix}} 28 | } 29 | \examples{ 30 | set.seed(111) 31 | randcmatrix(10,2,1) 32 | } 33 | \keyword{cluster}% at least one, from doc/KEYWORDS 34 | 35 | -------------------------------------------------------------------------------- /man/randconf.Rd: -------------------------------------------------------------------------------- 1 | \name{randconf} 2 | \alias{randconf} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Generate a sample indicator vector} 5 | \description{ 6 | Generates a logical vector of length \code{n} with \code{p TRUE}s. 7 | } 8 | \usage{ 9 | randconf(n, p) 10 | } 11 | %- maybe also `usage' for other objects documented here. 12 | \arguments{ 13 | \item{n}{positive integer.} 14 | \item{p}{positive integer.} 15 | } 16 | 17 | \value{ 18 | A logical vector. 19 | } 20 | 21 | \author{Christian Hennig 22 | \email{christian.hennig@unibo.it} 23 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 24 | 25 | \seealso{\code{\link{sample}}} 26 | 27 | \examples{ 28 | randconf(10,3) 29 | } 30 | \keyword{distribution}% at least one, from doc/KEYWORDS 31 | 32 | -------------------------------------------------------------------------------- /man/ridgeline.Rd: -------------------------------------------------------------------------------- 1 | \name{ridgeline} 2 | \alias{ridgeline} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Ridgeline computation} 5 | \description{ 6 | Computes \eqn{(\alpha*\Sigma_1^{-1}+(1-\alpha)*\Sigma_2^{-1})^{-1}* 7 | \alpha*(\Sigma_1^{-1}*\mu_1)+(1-\alpha)*(\Sigma_2^{-1}*\mu_2)}{% 8 | (alpha*Sigma1^{-1}+(1-alpha)*Sigma2^{-1})^{-1}* 9 | alpha*(Sigma_1^{-1}*mu_1)+(1-alpha)*(Sigma_2^{-1}*mu_2)} 10 | as required for the 11 | computation of the ridgeline (Ray and Lindsay, 2005) to find 12 | all density extrema of a two-component Gaussian mixture with 13 | mean vectors mu1 and mu2 and covariance matrices Sigma1, Sigma2. 14 | } 15 | \usage{ 16 | ridgeline(alpha, mu1, mu2, Sigma1, Sigma2) 17 | } 18 | %- maybe also `usage' for other objects documented here. 19 | \arguments{ 20 | \item{alpha}{numeric between 0 and 1.} 21 | \item{mu1}{mean vector of component 1.} 22 | \item{mu2}{mean vector of component 2.} 23 | \item{Sigma1}{covariance matrix of component 1.} 24 | \item{Sigma2}{covariance matrix of component 2.} 25 | } 26 | 27 | \value{ 28 | A vector. See above. 29 | } 30 | 31 | \references{ 32 | Ray, S. and Lindsay, B. G. (2005) The Topography of Multivariate 33 | Normal Mixtures, \emph{Annals of Statistics}, 33, 2042-2065. 34 | } 35 | \author{Christian Hennig 36 | \email{christian.hennig@unibo.it} 37 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 38 | } 39 | \examples{ 40 | ridgeline(0.5,c(1,1),c(2,5),diag(2),diag(2)) 41 | } 42 | \keyword{cluster}% at least one, from doc/KEYWORDS 43 | \keyword{multivariate} 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /man/ridgeline.diagnosis.Rd: -------------------------------------------------------------------------------- 1 | \name{ridgeline.diagnosis} 2 | \alias{ridgeline.diagnosis} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Ridgeline plots, ratios and unimodality} 5 | \description{ 6 | Computes ridgeline ratios and unimodality checks for pairs of components 7 | given the parameters of a Gaussian mixture. Produces ridgeline plots. 8 | } 9 | \usage{ 10 | ridgeline.diagnosis (propvector,muarray,Sigmaarray, 11 | k=length(propvector), 12 | ipairs="all", compute.ratio=TRUE,by=0.001, 13 | ratiocutoff=NULL,ridgelineplot="matrix") 14 | 15 | } 16 | %- maybe also `usage' for other objects documented here. 17 | \arguments{ 18 | \item{propvector}{vector of component proportions. Length must be 19 | number of components, and must sum up to 1.} 20 | \item{muarray}{matrix of component means (different components are in 21 | different columns).} 22 | \item{Sigmaarray}{three dimensional array with component covariance 23 | matrices (the third dimension refers to components).} 24 | \item{k}{integer. Number of components.} 25 | \item{ipairs}{\code{"all"} or list of vectors of two integers. If 26 | \code{ipairs="all"}, computations are carried out for all pairs of 27 | components. Otherwise, ipairs gives the pairs of components for 28 | which computations are carried out.} 29 | \item{compute.ratio}{logical. If \code{TRUE}, a matrix of ridgeline 30 | ratios is computed, see Hennig (2010a).} 31 | \item{by}{real between 0 and 1. Interval width for density computation 32 | along the ridgeline.} 33 | \item{ratiocutoff}{real between 0 and 1. If not \code{NULL}, the 34 | \code{connection.matrix} (see below) is computed by checking whether 35 | ridgeline ratios between components are below \code{ratiocutoff}.} 36 | \item{ridgelineplot}{one of \code{"none"}, \code{"matrix"}, 37 | \code{"pairwise"}. If \code{"matrix"}, a matrix of pairwise 38 | ridgeline plots (see Hennig 2010b) will be plotted. If 39 | \code{"pairwise"}, pairwise ridgeline plots are plotted (you may 40 | want to set \code{par(ask=TRUE)} to see them all). No plotting if 41 | \code{"none"}.} 42 | } 43 | 44 | \value{ 45 | A list with components 46 | \item{merged.clusters}{vector of integers, stating for every mixture 47 | component the number of the cluster of components that would be merged 48 | by merging connectivity components of the graph specified by 49 | \code{connection.matrix}.} 50 | \item{connection.matrix}{zero-one matrix, in which a one means that the 51 | mixture of the corresponding pair of components of the original 52 | mixture is either unimodel (if \code{ratiocutoff=NULL}) or that their 53 | ridgeline ratio is above \code{ratiocutoff}. If \code{ipairs!="all"}, 54 | ignored pairs always have 0 in this matrix, same for 55 | \code{ratio.matrix}.} 56 | \item{ratio.matrix}{matrix with entries between 0 und 1, giving the 57 | ridgeline ratio, which is the density minimum of the mixture of the 58 | corresponding pair of components along the ridgeline divided by the 59 | minimum of the two maxima closest to the beginning and the end of the 60 | ridgeline.} 61 | } 62 | 63 | \references{ 64 | Hennig, C. (2010a) Methods for merging Gaussian mixture components, 65 | \emph{Advances in Data Analysis and Classification}, 4, 3-34. 66 | 67 | Hennig, C. (2010b) Ridgeline plot and clusterwise stability as tools 68 | for merging Gaussian mixture components. To appear in 69 | \emph{Classification as a Tool for Research}, Proceedings of IFCS 70 | 2009. 71 | 72 | Ray, S. and Lindsay, B. G. (2005) The Topography of Multivariate 73 | Normal Mixtures, \emph{Annals of Statistics}, 33, 2042-2065. 74 | } 75 | \author{Christian Hennig 76 | \email{christian.hennig@unibo.it} 77 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 78 | } 79 | 80 | \seealso{ 81 | \code{\link{ridgeline}}, \code{\link{dridgeline}}, 82 | \code{\link{piridge}}, \code{\link{piridge.zeroes}} 83 | } 84 | 85 | \examples{ 86 | muarray <- cbind(c(0,0),c(0,0.1),c(10,10)) 87 | sigmaarray <- array(c(diag(2),diag(2),diag(2)),dim=c(2,2,3)) 88 | rd <- 89 | ridgeline.diagnosis(c(0.5,0.3,0.2),muarray,sigmaarray,ridgelineplot="matrix",by=0.1) 90 | # Much slower but more precise with default by=0.001. 91 | } 92 | \keyword{cluster}% at least one, from doc/KEYWORDS 93 | \keyword{multivariate} 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /man/simmatrix.Rd: -------------------------------------------------------------------------------- 1 | \name{simmatrix} 2 | \alias{simmatrix} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Extracting intersections between clusters from fpc-object} 5 | \description{ 6 | Extracts the information about the size of the intersections 7 | between representative 8 | Fixed Point Clusters (FPCs) of stable groups from the output of 9 | the FPC-functions \code{\link{fixreg}} and \code{\link{fixmahal}}. 10 | } 11 | \usage{ 12 | simmatrix(fpcobj) 13 | } 14 | %- maybe also `usage' for other objects documented here. 15 | \arguments{ 16 | \item{fpcobj}{an object of class \code{rfpc} or \code{mfpc}.} 17 | } 18 | 19 | \value{ 20 | A non-negative real-valued vector giving the number of points in 21 | the intersections of the representative FPCs of stable groups. 22 | } 23 | 24 | \author{Christian Hennig 25 | \email{christian.hennig@unibo.it} 26 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 27 | } 28 | \note{The intersection between representative FPCs no. \code{i} and 29 | \code{j} is at position \code{\link{sseg}(i,j)}.} 30 | 31 | \seealso{ 32 | \code{\link{fixmahal}}, 33 | \code{\link{fixreg}}, 34 | \code{\link{sseg}} 35 | } 36 | 37 | \examples{ 38 | set.seed(190000) 39 | data(tonedata) 40 | # Note: If you do not use the installed package, replace this by 41 | # tonedata <- read.table("(path/)tonedata.txt", header=TRUE) 42 | attach(tonedata) 43 | tonefix <- fixreg(stretchratio,tuned,mtf=1,ir=20) 44 | simmatrix(tonefix)[sseg(2,3)] 45 | } 46 | \keyword{utilities}% at least one, from doc/KEYWORDS 47 | 48 | -------------------------------------------------------------------------------- /man/solvecov.Rd: -------------------------------------------------------------------------------- 1 | \name{solvecov} 2 | \alias{solvecov} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Inversion of (possibly singular) symmetric matrices} 5 | \description{ 6 | Tries to invert a matrix by \code{solve}. If this fails because of 7 | singularity, an 8 | eigenvector decomposition is computed, and eigenvalues below 9 | \code{1/cmax} are replaced by \code{1/cmax}, i.e., \code{cmax} will be 10 | the corresponding eigenvalue of the inverted matrix. 11 | } 12 | \usage{ 13 | solvecov(m, cmax = 1e+10) 14 | } 15 | %- maybe also `usage' for other objects documented here. 16 | \arguments{ 17 | \item{m}{a numeric symmetric matrix.} 18 | \item{cmax}{a positive value, see above.} 19 | } 20 | 21 | \value{ 22 | A list with the following components: 23 | \item{inv}{the inverted matrix} 24 | \item{coll}{\code{TRUE} if \code{solve} failed because of singularity.} 25 | } 26 | \author{Christian Hennig 27 | \email{christian.hennig@unibo.it} 28 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 29 | 30 | \seealso{\code{\link{solve}}, \code{\link{eigen}}} 31 | 32 | \examples{ 33 | x <- c(1,0,0,1,0,1,0,0,1) 34 | dim(x) <- c(3,3) 35 | solvecov(x) 36 | } 37 | \keyword{array}% at least one, from doc/KEYWORDS 38 | 39 | 40 | -------------------------------------------------------------------------------- /man/sseg.Rd: -------------------------------------------------------------------------------- 1 | \name{sseg} 2 | \alias{sseg} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Position in a similarity vector} 5 | \description{ 6 | \code{sseg(i,j)} gives the position of the similarity of objects 7 | \code{i} and \code{j} in the similarity vectors produced by 8 | \code{fixreg} and \code{fixmahal}. 9 | \code{sseg} should only be used as an auxiliary function in 10 | \code{fixreg} and \code{fixmahal}. 11 | } 12 | \usage{ 13 | sseg(i, j) 14 | } 15 | %- maybe also `usage' for other objects documented here. 16 | \arguments{ 17 | \item{i}{positive integer.} 18 | \item{j}{positive integer.} 19 | } 20 | \value{A positive integer. 21 | } 22 | \author{Christian Hennig 23 | \email{christian.hennig@unibo.it} 24 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 25 | 26 | \examples{ 27 | sseg(3,4) 28 | } 29 | \keyword{utilities}% at least one, from doc/KEYWORDS 30 | -------------------------------------------------------------------------------- /man/stupidkaven.Rd: -------------------------------------------------------------------------------- 1 | \name{stupidkaven} 2 | \alias{stupidkaven} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Stupid average dissimilarity random clustering} 5 | \description{ 6 | Picks k random starting points from given dataset to initialise k 7 | clusters. Then, one by one, the point not yet assigned to any cluster 8 | with smallest average dissimilarity to the points of any already 9 | existing cluster is assigned to that 10 | cluster, until all points are assigned. This is a random versione of 11 | average linkage clustering, see 12 | Akhanli and Hennig (2020). 13 | } 14 | \usage{ 15 | stupidkaven(d,k) 16 | } 17 | %- maybe also `usage' for other objects documented here. 18 | \arguments{ 19 | \item{d}{\code{dist}-object or dissimilarity matrix.} 20 | \item{k}{integer. Number of clusters.} 21 | } 22 | 23 | % \details{ 24 | % } 25 | \value{ 26 | The clustering vector (values 1 to \code{k}, length number of objects 27 | behind \code{d}), 28 | } 29 | \references{ 30 | Akhanli, S. and Hennig, C. (2020) Calibrating and aggregating cluster 31 | validity indexes for context-adapted comparison of clusterings. 32 | \emph{Statistics and Computing}, 30, 1523-1544, 33 | \url{https://link.springer.com/article/10.1007/s11222-020-09958-2}, \url{https://arxiv.org/abs/2002.01822} 34 | 35 | 36 | } 37 | \author{Christian Hennig 38 | \email{christian.hennig@unibo.it} 39 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 40 | } 41 | 42 | \seealso{ 43 | \code{\link{stupidkcentroids}}, \code{\link{stupidknn}}, \code{\link{stupidkfn}} 44 | } 45 | 46 | \examples{ 47 | set.seed(20000) 48 | options(digits=3) 49 | face <- rFace(200,dMoNo=2,dNoEy=0,p=2) 50 | stupidkaven(dist(face),3) 51 | } 52 | \keyword{multivariate}% at least one, from doc/KEYWORDS 53 | \keyword{cluster}% __ONLY ONE__ keyword per line 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /man/stupidkcentroids.Rd: -------------------------------------------------------------------------------- 1 | \name{stupidkcentroids} 2 | \alias{stupidkcentroids} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Stupid k-centroids random clustering} 5 | \description{ 6 | Picks k random centroids from given dataset and assigns every point to 7 | closest centroid. This is called stupid k-centroids in Hennig (2019). 8 | } 9 | \usage{ 10 | stupidkcentroids(xdata, k, distances = inherits(xdata, "dist")) 11 | } 12 | %- maybe also `usage' for other objects documented here. 13 | \arguments{ 14 | \item{xdata}{cases*variables data, \code{dist}-object or dissimilarity 15 | matrix, see \code{distances}.} 16 | \item{k}{integer. Number of clusters.} 17 | \item{distances}{logical. If \code{TRUE}, \code{xdata} is interpreted 18 | as distances.} 19 | } 20 | 21 | % \details{ 22 | % } 23 | \value{ 24 | A list with components 25 | \item{partition}{vector if integers 1 to \code{k}, of length equal to 26 | number of objects, indicates to which cluster an object belongs.} 27 | \item{centroids}{vector of integers of length \code{k}, indicating the 28 | centroids of the clusters (observation number).} 29 | \item{distances}{as argument \code{distances}.} 30 | } 31 | \references{ 32 | 33 | Hennig, C. (2019) Cluster validation by measurement of clustering 34 | characteristics relevant to the user. In C. H. Skiadas (ed.) 35 | \emph{Data Analysis and Applications 1: Clustering and Regression, 36 | Modeling-estimating, Forecasting and Data Mining, Volume 2}, Wiley, 37 | New York 1-24, 38 | \url{https://arxiv.org/abs/1703.09282} 39 | 40 | Akhanli, S. and Hennig, C. (2020) Calibrating and aggregating cluster 41 | validity indexes for context-adapted comparison of clusterings. 42 | \emph{Statistics and Computing}, 30, 1523-1544, 43 | \url{https://link.springer.com/article/10.1007/s11222-020-09958-2}, \url{https://arxiv.org/abs/2002.01822} 44 | 45 | } 46 | \author{Christian Hennig 47 | \email{christian.hennig@unibo.it} 48 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 49 | } 50 | 51 | \seealso{ 52 | \code{\link{stupidknn}}, \code{\link{stupidkfn}}, \code{\link{stupidkaven}} 53 | } 54 | 55 | \examples{ 56 | set.seed(20000) 57 | options(digits=3) 58 | face <- rFace(200,dMoNo=2,dNoEy=0,p=2) 59 | stupidkcentroids(dist(face),3) 60 | } 61 | \keyword{multivariate}% at least one, from doc/KEYWORDS 62 | \keyword{cluster}% __ONLY ONE__ keyword per line 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /man/stupidkfn.Rd: -------------------------------------------------------------------------------- 1 | \name{stupidkfn} 2 | \alias{stupidkfn} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Stupid farthest neighbour random clustering} 5 | \description{ 6 | Picks k random starting points from given dataset to initialise k 7 | clusters. Then, one by one, a point not yet assigned to any cluster 8 | is assigned to that 9 | cluster, until all points are assigned. The point/cluster pair to be 10 | used is picked according to the smallest distance of a point to the 11 | farthest point to it in any of the already existing clusters as in 12 | complete linkage clustering, see 13 | Akhanli and Hennig (2020). 14 | } 15 | \usage{ 16 | stupidkfn(d,k) 17 | } 18 | %- maybe also `usage' for other objects documented here. 19 | \arguments{ 20 | \item{d}{\code{dist}-object or dissimilarity matrix.} 21 | \item{k}{integer. Number of clusters.} 22 | } 23 | 24 | % \details{ 25 | % } 26 | \value{ 27 | The clustering vector (values 1 to \code{k}, length number of objects 28 | behind \code{d}), 29 | } 30 | \references{ 31 | Akhanli, S. and Hennig, C. (2020) Calibrating and aggregating cluster 32 | validity indexes for context-adapted comparison of clusterings. 33 | \emph{Statistics and Computing}, 30, 1523-1544, 34 | \url{https://link.springer.com/article/10.1007/s11222-020-09958-2}, \url{https://arxiv.org/abs/2002.01822} 35 | 36 | } 37 | \author{Christian Hennig 38 | \email{christian.hennig@unibo.it} 39 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 40 | } 41 | 42 | \seealso{ 43 | \code{\link{stupidkcentroids}}, \code{\link{stupidknn}}, \code{\link{stupidkaven}} 44 | } 45 | 46 | \examples{ 47 | set.seed(20000) 48 | options(digits=3) 49 | face <- rFace(200,dMoNo=2,dNoEy=0,p=2) 50 | stupidkfn(dist(face),3) 51 | } 52 | \keyword{multivariate}% at least one, from doc/KEYWORDS 53 | \keyword{cluster}% __ONLY ONE__ keyword per line 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /man/stupidknn.Rd: -------------------------------------------------------------------------------- 1 | \name{stupidknn} 2 | \alias{stupidknn} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Stupid nearest neighbour random clustering} 5 | \description{ 6 | Picks k random starting points from given dataset to initialise k 7 | clusters. Then, one by one, the point not yet assigned to any cluster 8 | that is closest to an already assigned point is assigned to that 9 | cluster, until all points are assigned. This is called stupid nearest 10 | neighbour clustering in Hennig (2019). 11 | } 12 | \usage{ 13 | stupidknn(d,k) 14 | } 15 | %- maybe also `usage' for other objects documented here. 16 | \arguments{ 17 | \item{d}{\code{dist}-object or dissimilarity matrix.} 18 | \item{k}{integer. Number of clusters.} 19 | } 20 | 21 | % \details{ 22 | % } 23 | \value{ 24 | The clustering vector (values 1 to \code{k}, length number of objects 25 | behind \code{d}), 26 | } 27 | \references{ 28 | Hennig, C. (2019) Cluster validation by measurement of clustering 29 | characteristics relevant to the user. In C. H. Skiadas (ed.) 30 | \emph{Data Analysis and Applications 1: Clustering and Regression, 31 | Modeling-estimating, Forecasting and Data Mining, Volume 2}, Wiley, 32 | New York 1-24, 33 | \url{https://arxiv.org/abs/1703.09282} 34 | 35 | Akhanli, S. and Hennig, C. (2020) Calibrating and aggregating cluster 36 | validity indexes for context-adapted comparison of clusterings. 37 | \emph{Statistics and Computing}, 30, 1523-1544, 38 | \url{https://link.springer.com/article/10.1007/s11222-020-09958-2}, \url{https://arxiv.org/abs/2002.01822} 39 | 40 | } 41 | \author{Christian Hennig 42 | \email{christian.hennig@unibo.it} 43 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 44 | } 45 | 46 | \seealso{ 47 | \code{\link{stupidkcentroids}}, \code{\link{stupidkfn}}, \code{\link{stupidkaven}} 48 | } 49 | 50 | \examples{ 51 | set.seed(20000) 52 | options(digits=3) 53 | face <- rFace(200,dMoNo=2,dNoEy=0,p=2) 54 | stupidknn(dist(face),3) 55 | } 56 | \keyword{multivariate}% at least one, from doc/KEYWORDS 57 | \keyword{cluster}% __ONLY ONE__ keyword per line 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /man/tdecomp.Rd: -------------------------------------------------------------------------------- 1 | \name{tdecomp} 2 | \alias{tdecomp} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Root of singularity-corrected eigenvalue decomposition} 5 | \description{ 6 | Computes transposed eigenvectors of matrix \code{m} times diagonal of 7 | square root of eigenvalues so that eigenvalues smaller than 1e-6 are 8 | set to 1e-6. 9 | } 10 | \usage{ 11 | tdecomp(m) 12 | } 13 | %- maybe also `usage' for other objects documented here. 14 | \arguments{ 15 | \item{m}{a symmetric matrix of minimum format 2*2.} 16 | } 17 | \details{ 18 | Thought for use in \code{discrcoord} only.} 19 | \value{ 20 | a matrix. 21 | } 22 | \author{Christian Hennig 23 | \email{christian.hennig@unibo.it} 24 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 25 | } 26 | \note{ 27 | Thought for use within \code{\link{discrcoord}} only. 28 | } 29 | 30 | \examples{ 31 | x <- rnorm(10) 32 | y <- rnorm(10) 33 | z <- cov(cbind(x,y)) 34 | round(tdecomp(z),digits=2) 35 | } 36 | \keyword{array}% at least one, from doc/KEYWORDS 37 | 38 | -------------------------------------------------------------------------------- /man/tonedata.Rd: -------------------------------------------------------------------------------- 1 | \name{tonedata} 2 | \alias{tonedata} 3 | \docType{data} 4 | \title{Tone perception data} 5 | \description{ 6 | The tone perception data stem 7 | from an experiment of Cohen (1980) and have been analyzed in de Veaux 8 | (1989). 9 | A pure fundamental tone was played to a 10 | trained musician. Electronically generated overtones were added, determined 11 | by a stretching ratio of \code{stretchratio}. \code{stretchratio=2.0} 12 | corresponds to the harmonic pattern 13 | usually heard in traditional definite pitched instruments. The musician was 14 | asked to tune an adjustable tone to the octave above the fundamental tone. 15 | \code{tuned} gives the ratio of the adjusted tone to the fundamental, 16 | i.e. \code{tuned=2.0} would be the correct tuning for all 17 | \code{stretchratio}-values. 18 | The data analyzed here belong to 150 trials 19 | with the same musician. In the original study, there were four further 20 | musicians. 21 | } 22 | \usage{data(tonedata)} 23 | \format{A data frame with 2 variables \code{stretchratio} and 24 | \code{tuned} and 150 cases.} 25 | \source{Cohen, E. A. (1980) \emph{Inharmonic tone 26 | perception}. Unpublished Ph.D. dissertation, Stanford University} 27 | \references{ 28 | de Veaux, R. D. (1989) Mixtures of Linear Regressions, 29 | \emph{Computational Statistics and Data Analysis} 8, 227-245. 30 | } 31 | \keyword{datasets} 32 | -------------------------------------------------------------------------------- /man/unimodal.ind.Rd: -------------------------------------------------------------------------------- 1 | \name{unimodal.ind} 2 | \alias{unimodal.ind} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Is a fitted denisity unimodal or not?} 5 | \description{ 6 | Checks whether a series of fitted density values (such as given out as 7 | \code{y}-component of \code{\link{density}}) is unimodal. 8 | } 9 | \usage{ 10 | unimodal.ind(y) 11 | } 12 | %- maybe also `usage' for other objects documented here. 13 | \arguments{ 14 | \item{y}{numeric vector of fitted density values in order of 15 | increasing x-values such as given out as 16 | \code{y}-component of \code{\link{density}}.} 17 | } 18 | 19 | \value{ 20 | Logical. \code{TRUE} if unimodal. 21 | } 22 | 23 | \author{Christian Hennig 24 | \email{christian.hennig@unibo.it} 25 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 26 | } 27 | \examples{ 28 | unimodal.ind(c(1,3,3,4,2,1,0,0)) 29 | } 30 | \keyword{univar}% at least one, from doc/KEYWORDS 31 | % \keyword{multivariate} 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /man/weightplots.Rd: -------------------------------------------------------------------------------- 1 | \name{weightplots} 2 | \alias{weightplots} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Ordered posterior plots} 5 | \description{ 6 | Ordered posterior plots for Gaussian mixture components, see Hennig (2010). 7 | } 8 | \usage{ 9 | weightplots(z, clusternumbers="all", clustercol=2, 10 | allcol=grey(0.2+((1:ncol(z))-1)* 11 | 0.6/(ncol(z)-1)), 12 | lty=rep(1,ncol(z)),clusterlwd=3, 13 | legendposition="none", 14 | weightcutoff=0.01,ask=TRUE, ...) 15 | } 16 | %- maybe also `usage' for other objects documented here. 17 | \arguments{ 18 | \item{z}{matrix with rows corresponding to observations and columns 19 | corresponding to mixture components. Entries are probabilities that 20 | an observation has been generated by a mixture component. These will 21 | normally be estimated a posteriori probabilities, as generated as 22 | component \code{z} of the output object from 23 | \code{\link[mclust]{summary.mclustBIC}}.} 24 | \item{clusternumbers}{\code{"all"} or vector of integers. Numbers of 25 | components for which plots are drawn.} 26 | \item{clustercol}{colour used for the main components for which a 27 | plot is drawn.} 28 | \item{allcol}{colours used for respective other components in plots in 29 | which they are not main components.} 30 | \item{lty}{line types for components.} 31 | \item{clusterlwd}{numeric. Line width for main component.} 32 | \item{legendposition}{\code{"none"} or vector with two coordinates in 33 | the plot, where a legend should be printed.} 34 | \item{weightcutoff}{numeric between 0 and 1. Observations are only taken 35 | into account for which the posterior probability for the main 36 | component is larger than this.} 37 | \item{ask}{logical. If \code{TRUE}, it sets \code{par(ask=TRUE)} in 38 | the beginning and \code{par(ask=FALSE)} after all plots were showed.} 39 | \item{...}{further parameters to be passed on to \code{\link{legend}}.} 40 | } 41 | 42 | \value{ 43 | Invisible matrix of posterior probabilities \code{z} from 44 | \code{mclustsummary}. 45 | } 46 | 47 | \details{ 48 | Shows posterior probabilities for observations belonging to all 49 | mixture components on the y-axis, with points ordered by posterior 50 | probability for main component. 51 | } 52 | 53 | \references{ 54 | Hennig, C. (2010) Methods for merging Gaussian mixture components, 55 | \emph{Advances in Data Analysis and Classification}, 4, 3-34. 56 | } 57 | \author{Christian Hennig 58 | \email{christian.hennig@unibo.it} 59 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 60 | } 61 | 62 | \examples{ 63 | require(mclust) 64 | require(MASS) 65 | data(crabs) 66 | dc <- crabs[,4:8] 67 | cm <- mclustBIC(crabs[,4:8],G=9,modelNames="EEE") 68 | scm <- summary(cm,crabs[,4:8]) 69 | weightplots(scm$z,clusternumbers=1:3,ask=FALSE) 70 | weightplots(scm$z,clusternumbers=1:3,allcol=1:9, ask=FALSE, 71 | legendposition=c(5,0.7)) 72 | # Remove ask=FALSE to have time to watch the plots. 73 | } 74 | \keyword{multivariate} 75 | \keyword{cluster} 76 | 77 | 78 | -------------------------------------------------------------------------------- /man/wfu.Rd: -------------------------------------------------------------------------------- 1 | \name{wfu} 2 | \alias{wfu} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Weight function (for Mahalabobis distances)} 5 | \description{ 6 | Function of the elements of \code{md}, which is 1 for arguments smaller 7 | than \code{ca}, 0 for arguments larger than \code{ca2} and linear 8 | (default: continuous) in between. 9 | 10 | Thought for use in \code{fixmahal}. 11 | } 12 | \usage{ 13 | wfu(md, ca, ca2, a1 = 1/(ca - ca2), a0 = -a1 * ca2) 14 | } 15 | %- maybe also `usage' for other objects documented here. 16 | \arguments{ 17 | \item{md}{vector of positive numericals.} 18 | \item{ca}{positive numerical.} 19 | \item{ca2}{positive numerical.} 20 | \item{a1}{numerical. Slope.} 21 | \item{a0}{numerical. Intercept.} 22 | } 23 | 24 | \value{ 25 | A vector of numericals between 0 and 1. 26 | } 27 | 28 | \author{Christian Hennig 29 | \email{christian.hennig@unibo.it} 30 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/}} 31 | 32 | \seealso{\code{\link{fixmahal}}} 33 | 34 | \examples{ 35 | md <- seq(0,10,by=0.1) 36 | round(wfu(md,ca=5,ca2=8),digits=2) 37 | } 38 | \keyword{arith}% at least one, from doc/KEYWORDS 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /man/xtable.Rd: -------------------------------------------------------------------------------- 1 | \name{xtable} 2 | \alias{xtable} 3 | 4 | %- Also NEED an `\alias' for EACH other topic documented here. 5 | \title{Partition crosstable with empty clusters} 6 | \description{ 7 | This produces a crosstable between two integer vectors (partitions) of 8 | the same length with a given maximum vector entry \code{k} so that the 9 | size of the table is \code{k*k} with zeroes for missing entries 10 | between 1 and \code{k} (the command \code{\link{table}} does pretty 11 | much the same thing but will leave out missing entries). 12 | } 13 | \usage{ 14 | xtable(c1,c2,k) 15 | } 16 | \arguments{ 17 | \item{c1}{vector of integers.} 18 | \item{c2}{vector of integers of same length as \code{c1}.} 19 | \item{k}{integer. Must be larger or equal to maximum entry in 20 | \code{c1} and \code{c2}.} 21 | } 22 | 23 | \value{ 24 | A matrix of dimensions \code{c(k,k)}. Entry \code{[i,j]} gives the 25 | number of places in which \code{c1==i & c2==j}. 26 | } 27 | \author{Christian Hennig 28 | \email{christian.hennig@unibo.it} 29 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 30 | } 31 | 32 | \seealso{ 33 | \code{\link{table}} 34 | } 35 | \examples{ 36 | c1 <- 1:3 37 | c2 <- c(1,1,2) 38 | xtable(c1,c2,3) 39 | } 40 | \keyword{array}% at least one, from doc/KEYWORDS 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /man/zmisclassification.matrix.Rd: -------------------------------------------------------------------------------- 1 | \name{zmisclassification.matrix} 2 | \alias{zmisclassification.matrix} 3 | %- Also NEED an `\alias' for EACH other topic documented here. 4 | \title{Matrix of misclassification probabilities between mixture components} 5 | \description{ 6 | Matrix of misclassification probabilities in a mixture distribution 7 | between two mixture components from estimated posterior probabilities 8 | regardless of component parameters, see Hennig (2010). 9 | } 10 | \usage{ 11 | zmisclassification.matrix(z,pro=NULL,clustering=NULL, 12 | ipairs="all",symmetric=TRUE, 13 | stat="max") 14 | 15 | } 16 | %- maybe also `usage' for other objects documented here. 17 | \arguments{ 18 | \item{z}{matrix of posterior probabilities for observations (rows) to 19 | belong to mixture components (columns), so entries need to sum up to 20 | 1 for each row.} 21 | \item{pro}{vector of component proportions, need to sum up to 22 | 1. Computed from \code{z} as default.} 23 | \item{clustering}{vector of integers giving the estimated mixture 24 | components for every observation. Computed from \code{z} as 25 | default.} 26 | \item{ipairs}{\code{"all"} or list of vectors of two integers. If 27 | \code{ipairs="all"}, computations are carried out for all pairs of 28 | components. Otherwise, ipairs gives the pairs of components for 29 | which computations are carried out.} 30 | \item{symmetric}{logical. If \code{TRUE}, the matrix is symmetrised, 31 | see parameter \code{stat}.} 32 | \item{stat}{\code{"max"} or \code{"mean"}. The statistic by which the 33 | two misclassification probabilities are aggregated if 34 | \code{symmetric=TRUE}.} 35 | } 36 | 37 | \value{ 38 | A matrix with the (symmetrised, if required) misclassification 39 | probabilities between each pair of mixture components. If 40 | \code{symmetric=FALSE}, matrix entry \code{[i,j]} is the estimated 41 | probability that an observation generated by component 42 | \code{j} is classified to component \code{i} by maximum a posteriori rule. 43 | } 44 | 45 | \references{ 46 | Hennig, C. (2010) Methods for merging Gaussian mixture components, 47 | \emph{Advances in Data Analysis and Classification}, 4, 3-34. 48 | } 49 | \author{Christian Hennig 50 | \email{christian.hennig@unibo.it} 51 | \url{https://www.unibo.it/sitoweb/christian.hennig/en/} 52 | } 53 | \seealso{ 54 | \code{\link{confusion}} 55 | } 56 | \examples{ 57 | set.seed(12345) 58 | m <- rpois(20,lambda=5) 59 | dim(m) <- c(5,4) 60 | m <- m/apply(m,1,sum) 61 | round(zmisclassification.matrix(m,symmetric=FALSE),digits=2) 62 | } 63 | \keyword{cluster}% at least one, from doc/KEYWORDS 64 | \keyword{multivariate} 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /tests/fpctests_notallin.R: -------------------------------------------------------------------------------- 1 | # This tests a few things that are not run in the examples. 2 | 3 | library(fpc) 4 | library(MASS) 5 | library(diptest) 6 | library(mclust) 7 | options(digits=3) 8 | 9 | set.seed(4634) 10 | face <- rFace(300,dMoNo=2,dNoEy=0,p=3) 11 | grface <- as.integer(attr(face,"grouping")) 12 | # discrproj(face,grface, clnum=1, method="bc")$units 13 | discrproj(face,grface, clnum=1, method="anc")$units 14 | discrproj(face,grface, clnum=1, method="awc")$units 15 | 16 | 17 | pamk(face,krange=1:5,criterion="ch",usepam=FALSE,critout=TRUE) 18 | 19 | set.seed(20000) 20 | face50 <- rFace(50,dMoNo=2,dNoEy=0,p=2) 21 | pamk(dist(face50),krange=1:5,criterion="asw",critout=TRUE) 22 | 23 | x <- c(1,2,3,6,6,7,8,120) 24 | ff8 <- fixmahal(x) 25 | summary(ff8) 26 | # ...dataset a bit too small for the defaults... 27 | ff9 <- fixmahal(x, mnc=3, startn=3) 28 | summary(ff9) 29 | 30 | set.seed(776655) 31 | v1 <- rnorm(100) 32 | v2 <- rnorm(100) 33 | d1 <- sample(1:5,100,replace=TRUE) 34 | d2 <- sample(1:4,100,replace=TRUE) 35 | ldata <- cbind(v1,v2,d1,d2) 36 | fr <- flexmixedruns(ldata, 37 | continuous=2,discrete=2,simruns=1,initial.cluster=c(rep(1,5),rep(2,45), 38 | rep(3,50)), 39 | control=list(minprior=0.1), 40 | n.cluster=3,allout=FALSE) 41 | print(fr$optsummary) 42 | 43 | dface <- dist(face50) 44 | 45 | 46 | hclusttreeCBI(face50,minlevel=2,method="complete",scaling=TRUE) 47 | 48 | disthclusttreeCBI(dface,minlevel=2,method="complete") 49 | 50 | noisemclustCBI(face50,G=1:5,emModelNames="VVV",nnk=2) 51 | 52 | distnoisemclustCBI(dface,G=5,emModelNames="EEE",nnk=2, 53 | mdsmethod="classical", 54 | mdsdim=2) 55 | 56 | mahalCBI(face50,clustercut=0.5) 57 | 58 | set.seed(20000) 59 | face100 <- rFace(100,dMoNo=2,dNoEy=0,p=2) 60 | cbf <- clusterboot(face100,B=2,clustermethod=speccCBI,showplots=TRUE,k=6,seed=50000) 61 | cbf$nc 62 | cbf$noisemethod 63 | cbf$bootmethod 64 | # suppressWarnings(if(require(tclust)) 65 | # print(clusterboot(face100,B=2,clustermethod=tclustCBI,showplots=TRUE,k=5,seed=50000,noisemethod=TRUE))) 66 | 67 | 68 | complete3 <- cutree(hclust(dface),3) 69 | 70 | cluster.stats(dface,complete3,G2=TRUE) 71 | 72 | set.seed(55667788) 73 | 74 | data(crabs) 75 | dc <- crabs[,4:8] 76 | cmo <- mclustBIC(crabs[,4:8],G=9,modelNames="EEE") 77 | # set.seed(12345) 78 | cm <- mclustBIC(crabs[,4:8],G=9,modelNames="EEE", 79 | initialization=list(noise=(1:200)[sample(200,50)])) 80 | 81 | 82 | scm <- summary(cm,crabs[,4:8]) 83 | scmo <- summary(cmo,crabs[,4:8]) 84 | 85 | set.seed(334455) 86 | summary(mergenormals(crabs[,4:8],scm,method="ridge.ratio",by=0.05)) 87 | summary(mergenormals(crabs[,4:8],scmo,method="ridge.uni",by=0.05)) 88 | # summary(mergenormals(crabs[,4:8],scm,method="diptantrum",by=0.05)) 89 | # summary(mergenormals(crabs[,4:8],scmo,method="dipuni",by=0.05)) 90 | # summary(mergenormals(crabs[,4:8],scm,method="predictive",M=2)) 91 | 92 | set.seed(20000) 93 | x1 <- rnorm(50) 94 | y <- rnorm(100) 95 | x2 <- rnorm(40,mean=20) 96 | x3 <- rnorm(10,mean=25,sd=100) 97 | x0 <- cbind(c(x1,x2,x3),y) 98 | 99 | prediction.strength(x0,M=10,Gmax=4, 100 | clustermethod=noisemclustCBI, 101 | classification="qda") 102 | 103 | prediction.strength(dist(x0),M=10,Gmax=4, 104 | clustermethod=claraCBI, 105 | classification="centroids") 106 | 107 | 108 | set.seed(20000) 109 | xdata <- c(rnorm(10,0,1),rnorm(10,8,1)) 110 | clustermethod=c("claraCBI","dbscanCBI") 111 | 112 | clustermethodpars <- list() 113 | clustermethodpars[[1]] <- clustermethodpars[[2]] <- list() 114 | clustermethodpars[[2]]$eps <- 2 115 | clustermethodpars[[2]]$MinPts <- 2 116 | cbs <- clusterbenchstats(xdata,G=3,clustermethod=clustermethod, 117 | distmethod=rep(TRUE,2),ncinput=c(TRUE,FALSE),scaling=FALSE, 118 | clustermethodpars=clustermethodpars,nnruns=2,kmruns=2,fnruns=1,avenruns=1,useallg=TRUE) 119 | 120 | print(cbs$sstat,aggregate=TRUE,weights=c(1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1),include.othernc=cbs$cm$othernc) 121 | print(cbs$qstat,aggregate=TRUE,weights=c(1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1),include.othernc=cbs$cm$othernc) 122 | 123 | 124 | --------------------------------------------------------------------------------