├── DESCRIPTION
├── MD5
├── NAMESPACE
├── R
    ├── addclustermethods.R
    ├── clusterboot.R
    ├── clusterindexes.R
    ├── cquality20.R
    ├── dbscan.R
    ├── discrproj.R
    ├── discrproj2.R
    ├── fixreg.R
    ├── fpc.R
    ├── lcmixed.R
    ├── localshape.R
    ├── mergenormals.R
    ├── rFace.R
    └── regmix.R
├── data
    └── tonedata.txt.gz
├── man
    ├── adcoord.Rd
    ├── ancoord.Rd
    ├── awcoord.Rd
    ├── batcoord.Rd
    ├── bhattacharyya.dist.Rd
    ├── bhattacharyya.matrix.Rd
    ├── calinhara.Rd
    ├── can.Rd
    ├── cat2bin.Rd
    ├── cdbw.Rd
    ├── cgrestandard.Rd
    ├── classifdist.Rd
    ├── clucols.Rd
    ├── clujaccard.Rd
    ├── clusexpect.Rd
    ├── clustatsum.Rd
    ├── cluster.magazine.Rd
    ├── cluster.stats.Rd
    ├── cluster.varstats.Rd
    ├── clusterbenchstats.Rd
    ├── clusterboot.Rd
    ├── cmahal.Rd
    ├── concomp.Rd
    ├── confusion.Rd
    ├── cov.wml.Rd
    ├── cqcluster.stats.Rd
    ├── cvnn.Rd
    ├── cweight.Rd
    ├── dbscan.Rd
    ├── dipp.tantrum.Rd
    ├── diptest.multi.Rd
    ├── discrcoord.Rd
    ├── discrete.recode.Rd
    ├── discrproj.Rd
    ├── distancefactor.Rd
    ├── distcritmulti.Rd
    ├── distrsimilarity.Rd
    ├── dridgeline.Rd
    ├── dudahart2.Rd
    ├── extract.mixturepars.Rd
    ├── findrep.Rd
    ├── fixmahal.Rd
    ├── fixreg.Rd
    ├── flexmixedruns.Rd
    ├── fpc-package.Rd
    ├── fpclusters.Rd
    ├── itnumber.Rd
    ├── jittervar.Rd
    ├── kmeansCBI.Rd
    ├── kmeansruns.Rd
    ├── lcmixed.Rd
    ├── localshape.Rd
    ├── mahalanodisc.Rd
    ├── mahalanofix.Rd
    ├── mahalconf.Rd
    ├── mergenormals.Rd
    ├── mergeparameters.Rd
    ├── minsize.Rd
    ├── mixdens.Rd
    ├── mixpredictive.Rd
    ├── mvdcoord.Rd
    ├── ncoord.Rd
    ├── neginc.Rd
    ├── nselectboot.Rd
    ├── pamk.Rd
    ├── piridge.Rd
    ├── piridge.zeroes.Rd
    ├── plot.valstat.Rd
    ├── plotcluster.Rd
    ├── prediction.strength.Rd
    ├── rFace.Rd
    ├── randcmatrix.Rd
    ├── randconf.Rd
    ├── randomclustersim.Rd
    ├── regmix.Rd
    ├── ridgeline.Rd
    ├── ridgeline.diagnosis.Rd
    ├── simmatrix.Rd
    ├── solvecov.Rd
    ├── sseg.Rd
    ├── stupidkaven.Rd
    ├── stupidkcentroids.Rd
    ├── stupidkfn.Rd
    ├── stupidknn.Rd
    ├── tdecomp.Rd
    ├── tonedata.Rd
    ├── unimodal.ind.Rd
    ├── valstat.object.Rd
    ├── weightplots.Rd
    ├── wfu.Rd
    ├── xtable.Rd
    └── zmisclassification.matrix.Rd
└── tests
    ├── Examples
        └── fpc-Ex.Rout.save
    ├── fpctests_notallin.R
    └── fpctests_notallin.Rout.save


/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: fpc
 2 | Title: Flexible Procedures for Clustering
 3 | Version: 2.2-13
 4 | Date: 2024-09-23
 5 | Authors@R: person(given = "Christian",
 6 |                     family = "Hennig",
 7 |                     role = c("aut", "cre"),
 8 |                     email = "christian.hennig@unibo.it")
 9 | Depends: R (>= 2.0)
10 | Imports: MASS, cluster, mclust, flexmix, prabclus, class, diptest,
11 |         robustbase, kernlab, grDevices, graphics, methods, stats,
12 |         utils, parallel
13 | Suggests: tclust, pdfCluster, mvtnorm
14 | Description: Various methods for clustering and cluster validation.
15 |   Fixed point clustering. Linear regression clustering. Clustering by 
16 |   merging Gaussian mixture components. Symmetric 
17 |   and asymmetric discriminant projections for visualisation of the 
18 |   separation of groupings. Cluster validation statistics
19 |   for distance based clustering including corrected Rand index. 
20 |   Standardisation of cluster validation statistics by random clusterings and 
21 |   comparison between many clustering methods and numbers of clusters based on
22 |   this.  
23 |   Cluster-wise cluster stability assessment. Methods for estimation of 
24 |   the number of clusters: Calinski-Harabasz, Tibshirani and Walther's 
25 |   prediction strength, Fang and Wang's bootstrap stability. 
26 |   Gaussian/multinomial mixture fitting for mixed 
27 |   continuous/categorical variables. Variable-wise statistics for cluster
28 |   interpretation. DBSCAN clustering. Interface functions for many 
29 |   clustering methods implemented in R, including estimating the number of
30 |   clusters with kmeans, pam and clara. Modality diagnosis for Gaussian
31 |   mixtures. For an overview see package?fpc.
32 | License: GPL
33 | URL: https://www.unibo.it/sitoweb/christian.hennig/en/
34 | NeedsCompilation: no
35 | Packaged: 2024-09-23 23:23:04 UTC; chrish
36 | Author: Christian Hennig [aut, cre]
37 | Maintainer: Christian Hennig <christian.hennig@unibo.it>
38 | Repository: CRAN
39 | Date/Publication: 2024-09-24 05:50:02 UTC
40 | 


--------------------------------------------------------------------------------
/MD5:
--------------------------------------------------------------------------------
  1 | d8a47a3769189ca6cdf1348fce09fdb9 *DESCRIPTION
  2 | 1125ef47578160408d5075ac0948abcd *NAMESPACE
  3 | aec30efb5a63e14e8987a882e3585a7c *R/addclustermethods.R
  4 | 8c8c3adab571a79097b5e0651e155390 *R/clusterboot.R
  5 | 9df3134d1f6ad4f8f77a5943e0caec6d *R/clusterindexes.R
  6 | 88465a515081c286290dc3bc7f800d73 *R/cquality20.R
  7 | 742e9a99e68a078688ab046eba68e97b *R/dbscan.R
  8 | b7c01634910fd6957ef94642398dc4d7 *R/discrproj.R
  9 | d85447226e4ac2c17ce73fc907193176 *R/discrproj2.R
 10 | ddd1d4523b35728dd81e26f593895b45 *R/fixreg.R
 11 | 9e19a17d18fd66a939c4fa74a0234afd *R/fpc.R
 12 | 05ccaaa9be1507b51a78e56418231ffc *R/lcmixed.R
 13 | 9a31672e2d1b0eaa383861dd2b630b23 *R/localshape.R
 14 | 20c83aae9aa85787f27ca65bca2c132d *R/mergenormals.R
 15 | b2788e1bba14be4b1e1a1dc22b00b776 *R/rFace.R
 16 | 382e8aa6e94a4dceaded3365064039d8 *R/regmix.R
 17 | 3505e046680bd1f5bb7a8b2d20ef38c4 *data/tonedata.txt.gz
 18 | a8ecd61d2070e101ba9ad5e8f42d01e6 *man/adcoord.Rd
 19 | 3a4ec884478722c35b2cdd947dfe74b5 *man/ancoord.Rd
 20 | 5ab5d71b1b9902e43b5837dfe03c9a4c *man/awcoord.Rd
 21 | acb2a3f2ab2c8d0bcd78f333fe5f6322 *man/batcoord.Rd
 22 | f9d350c902ba3fb50c1e4d664b89528c *man/bhattacharyya.dist.Rd
 23 | afc86d08fa24b04bdf9a6bfed231f001 *man/bhattacharyya.matrix.Rd
 24 | 0696e200baef6495722674ac041f4a4f *man/calinhara.Rd
 25 | 2b5264ab2e49d9f1a6b69f6b371ba0d4 *man/can.Rd
 26 | 168c38d6eaf6f7765492425a77dffb19 *man/cat2bin.Rd
 27 | c1aac2369f3bd407a11f86f17232f919 *man/cdbw.Rd
 28 | f8207303a74e577b004a128f3e8cb812 *man/cgrestandard.Rd
 29 | 7276547ff5b24a46ac9db08b26ef5f22 *man/classifdist.Rd
 30 | 388a1c27aeb2abf791c3694e5b6f0362 *man/clucols.Rd
 31 | ab40c02193537ab2cec1a5c3cb848102 *man/clujaccard.Rd
 32 | 117a797d11c1ab28b8a4453ac9ab49e5 *man/clusexpect.Rd
 33 | e6afaf7f014e9aba03cd84d42c614ea3 *man/clustatsum.Rd
 34 | 3cba7a50d7060a4587566801556ca9b3 *man/cluster.magazine.Rd
 35 | 2e1d6c3f909a21259af7f08bf1a1f61e *man/cluster.stats.Rd
 36 | 030b373fea2741e53ece7db3f4224032 *man/cluster.varstats.Rd
 37 | 421c33589609de50ff288ac659eb87fe *man/clusterbenchstats.Rd
 38 | e8fdf85cbdfc2de7056bacf599ca5c75 *man/clusterboot.Rd
 39 | 9db8a890d7d579b062342b96ae870eed *man/cmahal.Rd
 40 | dfc9b80c1b32810d9819f14bd18a2f4f *man/concomp.Rd
 41 | 69c83485d86d098e5b1e382aa1aa0e03 *man/confusion.Rd
 42 | 46511da6ae33e758ea4df921271a06e3 *man/cov.wml.Rd
 43 | eac07f87a35f3afa23781188dfa7d19f *man/cqcluster.stats.Rd
 44 | 3d82668239fad4518bcf284abea52948 *man/cvnn.Rd
 45 | ad16fc22770ef7f7837edd1441ae9a19 *man/cweight.Rd
 46 | 1de81baa2b792e07f951dbb68fe53571 *man/dbscan.Rd
 47 | d13c67097f7c290f11bf4387aca5eb1a *man/dipp.tantrum.Rd
 48 | 2a6593ed2ebd68ee4d4b82ad4c827ca1 *man/diptest.multi.Rd
 49 | 927e7505630ff847155eb40fecc67b84 *man/discrcoord.Rd
 50 | 5369ed120187a12fac527e05838f8385 *man/discrete.recode.Rd
 51 | 1137fec01134e3337de519984530396c *man/discrproj.Rd
 52 | 9bf4fba6be7109d5d969595a382e197e *man/distancefactor.Rd
 53 | 7f1528fc62b218f63c6ea92b6b082488 *man/distcritmulti.Rd
 54 | 356bb125dde6b29300be02bf2e3f0c16 *man/distrsimilarity.Rd
 55 | 2fd8c12a19a526bf2838ea5e420df046 *man/dridgeline.Rd
 56 | fbceefe2215ac7b210f3ae58f9242504 *man/dudahart2.Rd
 57 | f26709267c6fdd746702b041504589cc *man/extract.mixturepars.Rd
 58 | 6bf6188e60401384a81b74e7813ae724 *man/findrep.Rd
 59 | 9af298038dffe9c45b66d2deb8fdc41f *man/fixmahal.Rd
 60 | ef902926337553264cc8080cc2bfe18d *man/fixreg.Rd
 61 | 8cc1d2d3d5ad5037de46ec7e578d37d5 *man/flexmixedruns.Rd
 62 | cae437b4708a1d825f828d271bd50e99 *man/fpc-package.Rd
 63 | 84bcae9460565f4244353ff7d349de38 *man/fpclusters.Rd
 64 | 3406d81af07dca5bb922b4e7746d0bff *man/itnumber.Rd
 65 | 2f0dd05ea31c9534529bb763076af795 *man/jittervar.Rd
 66 | e027b94a960938fad460e8dfe4de81b4 *man/kmeansCBI.Rd
 67 | aff11c26aeab6c65b65bf0cf17f35363 *man/kmeansruns.Rd
 68 | f8ac0ee0d2969de37064f386b660f3b4 *man/lcmixed.Rd
 69 | 1327aab55e8b9d988cb8702c360334e8 *man/localshape.Rd
 70 | 1b4c349f88497821c18ba8f07eab9e0e *man/mahalanodisc.Rd
 71 | 79dfef5657596ddd14fa54d259e7fc43 *man/mahalanofix.Rd
 72 | 91bb05e5be571ca0a58aea51524ce634 *man/mahalconf.Rd
 73 | 31b55db417104f978b01a3d66a5a837b *man/mergenormals.Rd
 74 | 4a362ae7a8f5b62b3e6bcc4aa69d96d8 *man/mergeparameters.Rd
 75 | eb4f876ed214aad657300e05732f87f9 *man/minsize.Rd
 76 | 9d315a8d06ce60c5cd131dec61266728 *man/mixdens.Rd
 77 | 9a57a7a77170d60660d9fe6b32d5bee2 *man/mixpredictive.Rd
 78 | 2f12f3e1cab6b01d3dfbcbd62fb4e19d *man/mvdcoord.Rd
 79 | 19bcb0510dc85e2e78ede077978169ce *man/ncoord.Rd
 80 | 9cd75fca00ac100d2d5490c0b3efccc2 *man/neginc.Rd
 81 | c1496afc6c30854f09ae355ade834bdd *man/nselectboot.Rd
 82 | 2c5757fed7679a28e38574890abfbf37 *man/pamk.Rd
 83 | 96a5321a0408ea3672912109356b330e *man/piridge.Rd
 84 | e25a1ffe3fd58568f3fc22f778466898 *man/piridge.zeroes.Rd
 85 | 9c2924a16367b879254f85c2328f49ea *man/plot.valstat.Rd
 86 | a65a08d014a70310af397eeac7e6c515 *man/plotcluster.Rd
 87 | c1446aacf9806e47031c2bb4f7cb3805 *man/prediction.strength.Rd
 88 | 8eda1a69722326ae53f828c691677f6c *man/rFace.Rd
 89 | 587846ed6be5faa2de83d952ec75e40d *man/randcmatrix.Rd
 90 | fa4a2d8cf2ab7a0a95c27f487082892e *man/randconf.Rd
 91 | add27cde61e11b21962206fa48710eff *man/randomclustersim.Rd
 92 | 50d558a06762eccd46f93e36abfaee6f *man/regmix.Rd
 93 | de39716505501f20388419b1b6dbf52f *man/ridgeline.Rd
 94 | 68593a7a80098f9de2ffefbca6f048e7 *man/ridgeline.diagnosis.Rd
 95 | eff9ad3ea5bbb8fd73203fdad64572b6 *man/simmatrix.Rd
 96 | d6c7f469f0186b49621bcddd3c3fa2e7 *man/solvecov.Rd
 97 | 9dd4c04057b4a3a3642b8950fd3cfc18 *man/sseg.Rd
 98 | 82b4787f4a697756c94239f3eff00bf1 *man/stupidkaven.Rd
 99 | ddb9d6f89110b180a574e6583b2838c1 *man/stupidkcentroids.Rd
100 | 3a83a0a91c82efda6e8be620ece68259 *man/stupidkfn.Rd
101 | 73018257b3c6d6710a4ebd2a25a5138e *man/stupidknn.Rd
102 | e5c96c7b34a1cfb10b0195b15a051741 *man/tdecomp.Rd
103 | e156cb0ced917fe86298bf3fbb316a25 *man/tonedata.Rd
104 | 1513bc6b3a73bbe9aae8718e5f825b0a *man/unimodal.ind.Rd
105 | c22b38806cbda0b6c46afd0702ffda27 *man/valstat.object.Rd
106 | 013d8afb7015e85ebb434ab27bc14265 *man/weightplots.Rd
107 | 19cb8bae42e9c79f99e7ec427a2c66ad *man/wfu.Rd
108 | c895dda06251f6f2c38b2f0669f66874 *man/xtable.Rd
109 | 98033be110c0e6d320a01868fa441d27 *man/zmisclassification.matrix.Rd
110 | 05c8bcca6070ae5a8f7bea3f66de5c63 *tests/Examples/fpc-Ex.Rout.save
111 | 19641842cec18ebe59fc649acc502274 *tests/fpctests_notallin.R
112 | 7b58a228503baef69789bccaacf38d65 *tests/fpctests_notallin.Rout.save
113 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Remove the previous line if you edit this file
 2 | # This is the default, just nicked.
 3 | 
 4 | # Export all names
 5 | exportPattern(".")
 6 | 
 7 | # Import all packages listed as Imports or Depends
 8 | import(
 9 |   MASS,
10 |   cluster,
11 |   mclust,
12 |   flexmix,
13 |   prabclus,
14 |   class,
15 |   diptest,
16 |   robustbase
17 | )
18 | 
19 | importFrom("kernlab",specc)
20 | 
21 | importFrom("grDevices", "colors", "colours", "grey", "xy.coords")
22 | importFrom("graphics", "abline", "hist", "legend", "pairs", "par",
23 |              "points", "polygon", "title","axis","text")
24 | importFrom("methods", "new")
25 | importFrom("stats", "BIC", "addmargins", "as.dist", "cmdscale", "coef",
26 |              "coefficients", "cor", "cov", "cov.wt", "cutree", "density",
27 |              "dist", "dnorm", "fitted.values", "hclust", "kmeans", "lm",
28 |              "lsfit", "mahalanobis", "median", "pchisq", "pnorm",
29 |              "qbinom", "qchisq", "qnorm", "quantile", "rbinom", "resid",
30 |               "residuals", "rexp", "rgamma", "rnorm", "rt", "runif", "sd",
31 |              "weighted.mean","ecdf","pgamma")
32 | importFrom("utils", "data")
33 | importFrom("parallel", "mclapply", "detectCores")
34 | 
35 | S3method(fpclusters, mfpc)
36 | S3method(fpclusters, rfpc)
37 | S3method(plot, clboot)
38 | S3method(plot, dbscan)
39 | S3method(plot, mfpc)
40 | S3method(plot, rfpc)
41 | S3method(plot, valstat)
42 | S3method(predict, dbscan)
43 | S3method(print, clboot)
44 | S3method(print, dbscan)
45 | S3method(print, mfpc)
46 | S3method(print, predstr)
47 | S3method(print, rfpc)
48 | S3method(print, summary.mergenorm)
49 | S3method(print, summary.mfpc)
50 | S3method(print, summary.rfpc)
51 | S3method(print, summary.cquality)
52 | S3method(print, varwisetables)
53 | S3method(print, clusterbenchstats)
54 | S3method(print, valstat)
55 | S3method(summary, mergenorm)
56 | S3method(summary, mfpc)
57 | S3method(summary, rfpc)
58 | S3method(summary, cquality)
59 | 
60 | 
61 |          
62 | 


--------------------------------------------------------------------------------
/R/dbscan.R:
--------------------------------------------------------------------------------
  1 | dbscan <- function(
  2 |   data
  3 | , eps
  4 | , MinPts    = 5
  5 | , scale     = FALSE
  6 | , method    = c("hybrid","raw","dist")
  7 | # , no.check  = FALSE
  8 | , seeds     = TRUE
  9 | , showplot  = FALSE
 10 | , countmode = NULL #c(1,2,3,5,10,100,1000,5000,10000,50000)
 11 | )
 12 | {
 13 | #  if (!require(distpatch))
 14 |   distcomb <- function(x,data){
 15 |     data <- t(data)
 16 |     temp <- apply(x, 1, function(x){
 17 |       sqrt(colSums((data-x)^2))
 18 |     })
 19 |     if (is.null(dim(temp)))
 20 |       matrix(temp, nrow(x), ncol(data))
 21 |     else
 22 |       t(temp)
 23 |   }
 24 |   method <- match.arg(method)
 25 |   data <- as.matrix(data)
 26 |   n <- nrow(data)
 27 |   if (scale)
 28 |     data <- scale(data)
 29 |   classn <- cv <- integer(n)
 30 |   isseed <- logical(n)
 31 |   cn <- integer(1)
 32 |   for (i in 1:n){
 33 |     if (i %in% countmode)
 34 |       cat("Processing point ", i," of ",n, ".\n")
 35 |     unclass <- (1:n)[cv<1]
 36 |     if (cv[i]==0){
 37 |       if (method=="dist"){
 38 |         reachables <- unclass[data[i,unclass]<=eps]
 39 |       }else{
 40 |         reachables <- unclass[as.vector(distcomb(data[i,, drop=FALSE],data[unclass,, drop=FALSE]))<=eps]
 41 |       }
 42 |       if (length(reachables)+classn[i]<MinPts)
 43 |         cv[i] <- (-1)
 44 |       else{
 45 |         cn <- cn+1
 46 |         cv[i] <- cn
 47 |         isseed[i] <- TRUE
 48 |         reachables <- setdiff(reachables, i)
 49 |         unclass <- setdiff(unclass, i)
 50 |         classn[reachables] <- classn[reachables]+1
 51 |         while (length(reachables)){
 52 |           if (showplot)
 53 |             plot(data,  col=1+cv, pch=1+isseed)
 54 |           cv[reachables] <- cn
 55 |           ap <- reachables
 56 |           reachables <- integer()
 57 |           if (method=="hybrid"){
 58 |             tempdist <- distcomb(data[ap, , drop=FALSE], data[unclass, , drop=FALSE])
 59 |             frozen.unclass <- unclass
 60 |           }
 61 |           for (i2 in seq(along=ap)){
 62 |             j <- ap[i2]
 63 |             if (showplot>1)
 64 |               plot(data, col=1+cv, pch=1+isseed)
 65 |             if (method=="dist"){
 66 |               jreachables <- unclass[data[j,unclass]<=eps]
 67 |             }else if (method=="hybrid"){
 68 |               jreachables <- unclass[tempdist[i2,match(unclass, frozen.unclass)]<=eps]
 69 |             }else{
 70 |               jreachables <- unclass[as.vector(distcomb(data[j,, drop=FALSE], data[unclass,, drop=FALSE]))<=eps]
 71 |             }
 72 |             if (length(jreachables)+classn[j]>=MinPts){
 73 |               isseed[j] <- TRUE
 74 |               cv[jreachables[cv[jreachables]<0]] <- cn
 75 |               reachables <- union(reachables, jreachables[cv[jreachables]==0])  # isseed for these new reachables tested at next while loop
 76 |             }
 77 |             # must be after querying classn, otherwise we count j itself twice
 78 |             classn[jreachables] <- classn[jreachables]+1
 79 |             unclass <- setdiff(unclass, j)
 80 |           } # for j
 81 |         } # while sum reachables>0
 82 |       } # else (sum reachables + ... >= MinPts)
 83 |     } # if cv==0
 84 |     if (!length(unclass))
 85 |       break
 86 |   } # for i
 87 |   rm(classn)
 88 |   if (any(cv==(-1))){
 89 |     cv[cv==(-1)] <- 0
 90 |   }
 91 |   if (showplot)
 92 |     plot(data,  col=1+cv, pch=1+isseed)
 93 |   out <- list(
 94 |     cluster  = cv
 95 |   , eps      = eps
 96 |   , MinPts   = MinPts
 97 |   )
 98 |   if (seeds && cn>0){
 99 |     out$isseed <- isseed
100 |   }
101 |   class(out) <- "dbscan"
102 |   out
103 | } # dbscan
104 | 
105 | 
106 | print.dbscan <- function(x, ...){
107 |   cat("dbscan Pts=", length(x$cluster), " MinPts=", x$MinPts, " eps=", x$eps, "\n", sep="")
108 |   if (is.null(x$isseed))
109 |     tab <- table(x$cluster)
110 |   else{
111 |     tab <- table(c("seed", "border")[2-x$isseed], cluster=x$cluster)
112 |     if (is.null(dim(tab))){
113 |       tab <- cbind(tab)
114 |       colnames(tab) <- unique(x$cluster)
115 |     }
116 |     tab <- rbind(tab, total=colSums(tab))
117 |   }
118 |   print(tab, ...)
119 | }
120 | 
121 | plot.dbscan <- function(x, data, ...)
122 | {
123 |   plot(data, col=1+x$cluster, pch=1+x$isseed, ...)
124 | }
125 | 
126 | 
127 | predict.dbscan <- function(
128 |   object
129 | , data
130 | , newdata     = NULL
131 | , predict.max = 1000
132 | # , no.check    = FALSE
133 | , ...
134 | )
135 | {
136 |   if (is.null(newdata)){
137 | 
138 |     return(object$cluster)
139 | 
140 |   }else{
141 | 
142 |     if (is.null(object$isseed))
143 |       stop("no seeds to predict")
144 | 
145 |     dmax <- object$eps
146 |     data <- data[object$isseed, , drop=FALSE]
147 |     out <- object$cluster[object$isseed]
148 | 
149 | #    if (!require(distpatch))
150 |     distpair <- function(x,data){
151 |       sqrt(rowSums((x-data)^2))
152 |     }
153 | 
154 | #    require(class)
155 |     batchpredict <- function(newdata){
156 |       w <- as.integer(knn1(data, newdata, 1:n.orig))
157 |       newout <- out[w]
158 |       if (!is.null(dmax)){
159 |         d <- distpair(data[w,,drop=FALSE], newdata)
160 |         newout[d>dmax] <- 0
161 |       }
162 |       return(newout)
163 |     }
164 |     n <- nrow(newdata)
165 |     n.orig <- nrow(data)
166 |     if (n>predict.max){
167 |       i <- 1:n
168 |       ret <- do.call("c", lapply(split(i, (i-1)%/%predict.max), function(i)batchpredict(newdata[i, , drop=FALSE])))
169 |     }else{
170 |       ret <- batchpredict(newdata)
171 |     }
172 |     return(ret)
173 |   }
174 | }
175 | 
176 | 
177 | 
178 | # if (FALSE){
179 | # 
180 | #   x <- t(t(sort(c(rnorm(20), 1:10))))
181 | #   ds1 <- dbscan1(x, MinPts=5, eps=2, showplot=1)
182 | #   ds <- dbscan(x, MinPts=5, eps=2, showplot=1)
183 | # 
184 | #   par(mfrow=c(2, 1))
185 | #   plot(x, col=1+ds1$classification)
186 | #   plot(ds, x)
187 | #   ds1
188 | #   ds
189 | #   par(mfrow=c(1, 1))
190 | # 
191 | # }
192 | 


--------------------------------------------------------------------------------
/R/localshape.R:
--------------------------------------------------------------------------------
 1 | localshape <- function(xdata,proportion=0.1,mscatter="mcd",mcdalpha=0.8,
 2 |                        covstandard="det"){
 3 | #  if (mscatter=="mcd") require(robustbase)
 4 |   xdata <- as.matrix(xdata)
 5 |   scatter <- switch(mscatter,
 6 |                     mcd=covMcd(xdata,alpha=mcdalpha)$cov,
 7 |                     cov=cov(xdata))
 8 |   n <- nrow(xdata)
 9 |   p <- ncol(xdata)
10 |   np <- round(proportion*n)
11 |   mmatrix <- matrix(0,n,n)
12 |   for (i in 1:n)
13 |     mmatrix[i,] <- mahalanobis(xdata,xdata[i,],scatter)
14 |   lcov <- matrix(0,p,p)
15 |   for (i in 1:n){
16 |     xc <- cov(xdata[order(mmatrix[i,])[1:np],])
17 |     lcov <- lcov+switch(covstandard,
18 |                         trace=xc/sum(diag(xc)),
19 |                         det=xc/det(xc),
20 |                         none=xc)    
21 |   }
22 |   lcov <- lcov/n
23 |   lcov
24 | }
25 |     
26 | 


--------------------------------------------------------------------------------
/R/regmix.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # randcmatrix=random partition matrix for n observations to cln clusters
  3 | randcmatrix <- function (n,cln,p){
  4 |   ct <- 0
  5 |   while(ct<p+3){
  6 |     m <- rep(0, times=n*cln)
  7 |     summ <- rep(0, times=cln)
  8 |     dim(m) <- c(n,cln)
  9 |     for (i in 1:n){
 10 |       nummer <- round(0.5+cln*runif(1))
 11 | #      print(m[i])
 12 |       m[i,nummer] <- 1
 13 |       summ[nummer] <- summ[nummer] + 1
 14 |     }
 15 |     ct <- min(summ)
 16 |   }
 17 |   m
 18 | }
 19 | 
 20 | 
 21 | # Regression EM iteration;
 22 | # m = random partition matrix, cln = number of clusters, icrit = iteration
 23 | # stopping criterion, minsig = minimal error variance
 24 | # output: coef=regression coefficients, var=residual variances, eps=cluster 
 25 | # proportions, z=posterior probabilities, loglik= loglikelihood, warn= T
 26 | # if too small or collinear cluster 
 27 | regem <- function (indep, dep, m, cln, icrit=1.e-5, minsig=1.e-6,
 28 |                          warnings=FALSE) {
 29 |   n <- length(dep)
 30 |   p <- ncol(as.matrix(indep))
 31 |   loglik <- (-1.e8)
 32 |   eps <- rep(0,cln)
 33 |   fv <- rep(0,n*cln)
 34 |   dim(fv) <- c(n,cln)
 35 |   rc <- rep(0,(p+1)*cln)
 36 |   dim(rc) <- c(p+1,cln)
 37 |   rv <- rep(0,cln)
 38 |   stm <- rep(0,n)  
 39 |   change <- TRUE
 40 |   smallcluster <- FALSE
 41 |   while (change & !smallcluster) {
 42 | #    plot(indep,dep)
 43 |     for(i in 1:cln){
 44 |       eps[i] <- sum(m[,i])/n
 45 |       if (sum(m[,i]>0.01) < p+2){
 46 |         if (warnings) warning("Too small cluster")
 47 |         smallcluster <- TRUE
 48 |       } # if too small cluster
 49 |       else{
 50 |         reg <- lm(dep~indep, weights=m[,i])
 51 |         fv[,i] <- fitted.values(reg)
 52 |         rc[,i] <- coefficients(reg)
 53 | #        abline(rc[,i],col=i)
 54 |         for (j in 2:(p+1))
 55 |           if (is.na(rc[j,i])){
 56 |             smallcluster <- TRUE
 57 |             if (warnings) warning("Collinear regressors")
 58 |           } # if collinearity
 59 |         res <- residuals(reg)
 60 |         rv[i] <- weighted.mean(res^2,m[,i])
 61 |         if (rv[i]<minsig){
 62 |           rv[i] <- minsig
 63 |           if (warnings) warning("Error variance smaller than minimum.")
 64 |         } # if error variance below minimum
 65 |       } # else (cluster large enough)
 66 |     } # for i
 67 |     if(!smallcluster){
 68 |         for (i in 1:cln)
 69 |           for (j in 1:n){
 70 | # cat("i= ",i," j= ",j, "dep[j]= ",dep[j]," mean= ",fv[j,i]," sd= ",sqrt(rv[i]),    "\n")
 71 |             m[j,i] <- eps[i]*dnorm(dep[j],mean=fv[j,i], sd=sqrt(rv[i]))
 72 |           }
 73 |         for (j in 1:n){
 74 |           stm[j] <- sum(m[j,])
 75 |           for (i in 1:cln)
 76 |             m[j,i] <- m[j,i]/stm[j]
 77 |         } # for j
 78 |         oldlog <- loglik        
 79 |         loglik <- sum(log(stm))
 80 |         change <- (loglik - oldlog > icrit)
 81 |     } # if no collinearity & clusters large enough
 82 |   } # while change
 83 |   g <- c()
 84 |   for (i in 1:n)
 85 |     g[i] <- which.max(m[i,])
 86 |   out <- list(coef=rc, vars=rv, z=m, g=g, eps=eps, loglik=loglik,
 87 |               warn=smallcluster)
 88 |   out
 89 | } # regem     
 90 | 
 91 | 
 92 | 
 93 | # Regression mixture analysis (DeSarbo and Cron), 
 94 | # ir=iteration runs, nclust= cluster numbers vector, icrit=iteration stopping 
 95 | # criterion, minsig = minimum error variance
 96 | regmix <- function (indep, dep,
 97 |                     ir=1, nclust=1:7, icrit=1.e-5, minsig=1.e-6,
 98 |                     warnings=FALSE){
 99 |   n <- length(dep)
100 |   p <- ncol(as.matrix(indep))
101 |   clnopt <- min(nclust)
102 |   czmax <- max(nclust)  
103 |   bic <- loglik <- (-1.e9)
104 |   clbic <- rep((-1.e9), czmax)
105 |   eps <- rep(0, czmax)
106 |   rc <- rep(0,(p+1)*czmax)
107 |   dim(rc) <- c(p+1,czmax)
108 |   rv <- rep(0,czmax)
109 |   z <- rep(0, n*czmax)
110 |   dim(z) <- c(n,czmax)
111 |   for (cln in nclust){
112 |     for (i in 1:ir){
113 |       cat("Iteration ",i," for ",cln," clusters.\n")
114 |       emi <- regem(indep, dep, m=randcmatrix(n,cln,p), cln=cln,
115 |                          icrit=icrit, minsig=minsig, warnings=warnings)
116 |       if (emi$warn)   
117 |         emi <- regem(indep, dep, m=randcmatrix(n,cln,p), cln=cln,
118 |                            icrit=icrit, minsig=minsig, warnings=warnings)
119 |       if (!emi$warn){
120 |         bicval <- 2*emi$loglik - log(n)*((p+3)*cln-1)
121 |         if (bicval > clbic[cln])
122 |           clbic[cln] <- bicval
123 |         if (bicval > bic){
124 |           clnopt <- cln
125 |           bic <- bicval
126 |           loglik <- emi$loglik
127 |           eps[1:cln] <- emi$eps
128 |           rc[,1:cln] <- emi$coef
129 |           rv[1:cln] <- emi$var
130 |           z[,1:cln] <- emi$z
131 |         } # if bicval>bic
132 |       }   # if no warning
133 |     }     # for i
134 |   }       # for cln
135 |   g <- c()
136 |   for (i in 1:n)
137 |     g[i] <- which.max(z[i,1:clnopt])
138 |   out <- list(clnopt=clnopt, loglik=loglik, bic=clbic,
139 |               coef=rc[,1:clnopt], var=rv[1:clnopt], eps=eps[1:clnopt], 
140 |               z=z[,1:clnopt], g=g)
141 |   out
142 | # clnopt: Optimal number of clusters, loglik: Loglikelihood, bic: Vector of
143 | # BIC values, coef: Regression coefficients, var: Error variances: 
144 | # eps: cluster proportions, z:a posteriori probabilities, g:optimal
145 | # classification
146 | }          
147 |         
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 


--------------------------------------------------------------------------------
/data/tonedata.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/fpc/ed319818bd2575441ff92d1905ebe77016e6c5e0/data/tonedata.txt.gz


--------------------------------------------------------------------------------
/man/adcoord.Rd:
--------------------------------------------------------------------------------
 1 | \name{adcoord}
 2 | \alias{adcoord}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Asymmetric discriminant coordinates}
 5 | \description{
 6 |   Asymmetric discriminant coordinates as defined
 7 |   in Hennig (2003). Asymmetric discriminant projection means that there
 8 |   are two classes, one of which is treated as the homogeneous class
 9 |   (i.e., it should appear homogeneous and separated in the resulting projection)
10 |   while the other may be heterogeneous. 
11 |   The principle is to maximize the ratio between the projection of a between
12 |   classes separation matrix and the projection of the covariance matrix
13 |   within the homogeneous class.  
14 | }
15 | \usage{
16 | adcoord(xd, clvecd, clnum=1)
17 | }
18 | %- maybe also `usage' for other objects documented here.
19 | \arguments{
20 |   \item{xd}{the data matrix; a numerical object which can be coerced
21 |     to a matrix.}
22 |   \item{clvecd}{integer vector of class numbers; length must equal
23 |     \code{nrow(xd)}.}
24 |   \item{clnum}{integer. Number of the homogeneous class.}
25 | }
26 | \details{
27 |   The square root of the homogeneous classes covariance matrix
28 |   is inverted by use of
29 |   \code{\link{tdecomp}}, which can be expected to give
30 |   reasonable results for singular within-class covariance matrices.
31 | }
32 | % \details{
33 | % }
34 | \value{
35 |   List with the following components
36 |   \item{ev}{eigenvalues in descending order.}
37 |   \item{units}{columns are coordinates of projection basis vectors.
38 |   New points \code{x} can be projected onto the projection basis vectors
39 |   by \code{x \%*\% units}}
40 |   \item{proj}{projections of \code{xd} onto \code{units}.} 
41 | }
42 | \references{
43 |   Hennig, C. (2004) Asymmetric linear dimension reduction for classification.
44 |   Journal of Computational and Graphical Statistics 13, 930-945 .
45 |   
46 |   Hennig, C. (2005)  A method for visual cluster validation.  In:
47 |   Weihs, C. and Gaul, W. (eds.): Classification - The Ubiquitous
48 |   Challenge. Springer, Heidelberg 2005, 153-160.
49 | 
50 | }
51 | \author{Christian Hennig
52 |   \email{christian.hennig@unibo.it}
53 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
54 | }
55 | 
56 | \seealso{
57 |   \code{\link{plotcluster}} for straight forward discriminant plots.
58 |   \code{\link{discrproj}} for alternatives.
59 |   \code{\link{rFace}} for generation of the example data used below.
60 | }
61 | 
62 | \examples{
63 |   set.seed(4634)
64 |   face <- rFace(600,dMoNo=2,dNoEy=0)
65 |   grface <- as.integer(attr(face,"grouping"))
66 |   adcf <- adcoord(face,grface==2)
67 |   adcf2 <- adcoord(face,grface==4)
68 |   plot(adcf$proj,col=1+(grface==2))
69 |   plot(adcf2$proj,col=1+(grface==4))
70 |   # ...done in one step by function plotcluster.
71 | }
72 | \keyword{multivariate}% at least one, from doc/KEYWORDS
73 | \keyword{classif}% __ONLY ONE__ keyword per line
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/man/ancoord.Rd:
--------------------------------------------------------------------------------
 1 | \name{ancoord}
 2 | \alias{ancoord}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Asymmetric neighborhood based discriminant coordinates}
 5 | \description{
 6 |   Asymmetric neighborhood based discriminant coordinates as defined
 7 |   in Hennig (2003). Asymmetric discriminant projection means that there
 8 |   are two classes, one of which is treated as the homogeneous class
 9 |   (i.e., it should appear homogeneous and separated in the resulting projection)
10 |   while the other may be heterogeneous. 
11 |   The principle is to maximize the ratio between the projection of a between
12 |   classes covariance matrix, which is defined by averaging the
13 |   between classes covariance matrices in the neighborhoods of the points
14 |   of the homogeneous class and the projection of the covariance matrix
15 |   within the homogeneous class.  
16 | }
17 | \usage{
18 | ancoord(xd, clvecd, clnum=1, nn=50, method="mcd", countmode=1000, ...)
19 | }
20 | %- maybe also `usage' for other objects documented here.
21 | \arguments{
22 |   \item{xd}{the data matrix; a numerical object which can be coerced
23 |     to a matrix.}
24 |   \item{clvecd}{integer vector of class numbers; length must equal
25 |     \code{nrow(xd)}.}
26 |   \item{clnum}{integer. Number of the homogeneous class.}
27 |   \item{nn}{integer. Number of points which belong to the neighborhood
28 |     of each point (including the point itself).}
29 |   \item{method}{one of
30 |     "mve", "mcd" or "classical". Covariance matrix used within the
31 |     homogeneous class.
32 |     "mcd" and "mve" are robust covariance matrices as implemented
33 |     in \code{\link[MASS]{cov.rob}}. "classical" refers to the classical
34 |     covariance matrix.}
35 |   \item{countmode}{optional positive integer. Every \code{countmode}
36 |     algorithm runs \code{ancoord} shows a message.}
37 |   \item{...}{no effect}
38 | }
39 | \details{
40 |   The square root of the homogeneous classes covariance matrix
41 |   is inverted by use of
42 |   \code{\link{tdecomp}}, which can be expected to give
43 |   reasonable results for singular within-class covariance matrices.
44 | }
45 | % \details{
46 | % }
47 | \value{
48 |   List with the following components
49 |   \item{ev}{eigenvalues in descending order.}
50 |   \item{units}{columns are coordinates of projection basis vectors.
51 |   New points \code{x} can be projected onto the projection basis vectors
52 |   by \code{x \%*\% units}}
53 |   \item{proj}{projections of \code{xd} onto \code{units}.} 
54 | }
55 | \references{
56 |   Hennig, C. (2004) Asymmetric linear dimension reduction for classification.
57 |   Journal of Computational and Graphical Statistics 13, 930-945 .
58 |   
59 |   Hennig, C. (2005)  A method for visual cluster validation.  In:
60 |   Weihs, C. and Gaul, W. (eds.): Classification - The Ubiquitous
61 |   Challenge. Springer, Heidelberg 2005, 153-160.
62 | 
63 | }
64 | \author{Christian Hennig
65 |   \email{christian.hennig@unibo.it}
66 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
67 | }
68 | 
69 | \seealso{
70 |   \code{\link{plotcluster}} for straight forward discriminant plots.
71 |   \code{\link{discrproj}} for alternatives.
72 |   \code{\link{rFace}} for generation of the example data used below.
73 | }
74 | 
75 | \examples{
76 |   set.seed(4634)
77 |   face <- rFace(600,dMoNo=2,dNoEy=0)
78 |   grface <- as.integer(attr(face,"grouping"))
79 |   ancf2 <- ancoord(face,grface==4)
80 |   plot(ancf2$proj,col=1+(grface==4))
81 |   # ...done in one step by function plotcluster.
82 | }
83 | \keyword{multivariate}% at least one, from doc/KEYWORDS
84 | \keyword{classif}% __ONLY ONE__ keyword per line
85 | 
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/man/awcoord.Rd:
--------------------------------------------------------------------------------
  1 | \name{awcoord}
  2 | \alias{awcoord}
  3 | %- Also NEED an `\alias' for EACH other topic documented here.
  4 | \title{Asymmetric weighted discriminant coordinates}
  5 | \description{
  6 |   Asymmetric weighted discriminant coordinates as defined
  7 |   in Hennig (2003). Asymmetric discriminant projection means that there
  8 |   are two classes, one of which is treated as the homogeneous class
  9 |   (i.e., it should appear homogeneous and separated in the resulting projection)
 10 |   while the other may be heterogeneous. 
 11 |   The principle is to maximize the ratio between the projection of a between
 12 |   classes separation matrix and the projection of the covariance matrix
 13 |   within the homogeneous class. Points are weighted according to their
 14 |   (robust) Mahalanobis distance to the homogeneous class. 
 15 | }
 16 | \usage{
 17 | awcoord(xd, clvecd, clnum=1, mahal="square", method="classical",
 18 |                      clweight=switch(method,classical=FALSE,TRUE),
 19 |                      alpha=0.99, subsample=0, countmode=1000, ...) 
 20 | }
 21 | %- maybe also `usage' for other objects documented here.
 22 | \arguments{
 23 |   \item{xd}{the data matrix; a numerical object which can be coerced
 24 |     to a matrix.}
 25 |   \item{clvecd}{integer vector of class numbers; length must equal
 26 |     \code{nrow(xd)}.}
 27 |   \item{clnum}{integer. Number of the homogeneous class.}
 28 |   \item{mahal}{"md" or "square". If "md", the points are weighted by the
 29 |     square root of the \code{alpha}-quantile of the
 30 |     corresponding chi squared distribution
 31 |     over the roots of their Mahalanobis distance to the
 32 |     homogeneous class, unless
 33 |     this is smaller than 1. If "square" (which is recommended), the
 34 |     (originally squared) Mahalanobis distance and the
 35 |     unrooted quantile is used.}
 36 |   \item{method}{one of
 37 |     "mve", "mcd" or "classical". Covariance matrix used within the
 38 |     homogeneous class and for the computation of the Mahalanobis distances.
 39 |     "mcd" and "mve" are robust covariance matrices as implemented
 40 |     in \code{\link[MASS]{cov.rob}}. "classical" refers to the classical
 41 |     covariance matrix.}
 42 |   \item{clweight}{logical. If \code{FALSE}, only the points of the
 43 |     heterogeneous class are weighted. This, together with
 44 |     \code{method="classical"}, computes AWC as defined in Hennig (2003). If
 45 |     \code{TRUE}, all points are weighted. This, together with
 46 |     \code{method="mcd"}, computes ARC as defined in Hennig (2003).}
 47 |   \item{alpha}{numeric between 0 and 1. The corresponding quantile of
 48 |     the chi squared distribution is used for the downweighting
 49 |     of points. Points with a smaller Mahalanobis distance to the
 50 |     homogeneous class get full weight.}
 51 |   \item{subsample}{integer. If 0, all points are used. Else, only a
 52 |     subsample of \code{subsample} of the points is used.}
 53 |   \item{countmode}{optional positive integer. Every \code{countmode}
 54 |     algorithm runs \code{awcoord} shows a message.}
 55 |   \item{...}{no effect}
 56 | }
 57 | \details{
 58 |   The square root of the homogeneous classes covariance matrix
 59 |   is inverted by use of
 60 |   \code{\link{tdecomp}}, which can be expected to give
 61 |   reasonable results for singular within-class covariance matrices.
 62 | }
 63 | % \details{
 64 | % }
 65 | \value{
 66 |   List with the following components
 67 |   \item{ev}{eigenvalues in descending order.}
 68 |   \item{units}{columns are coordinates of projection basis vectors.
 69 |   New points \code{x} can be projected onto the projection basis vectors
 70 |   by \code{x \%*\% units}}
 71 |   \item{proj}{projections of \code{xd} onto \code{units}.} 
 72 | }
 73 | \references{
 74 |   Hennig, C. (2004) Asymmetric linear dimension reduction for classification.
 75 |   Journal of Computational and Graphical Statistics 13, 930-945 .
 76 |   
 77 |   Hennig, C. (2005)  A method for visual cluster validation.  In:
 78 |   Weihs, C. and Gaul, W. (eds.): Classification - The Ubiquitous
 79 |   Challenge. Springer, Heidelberg 2005, 153-160.
 80 | }
 81 | \author{Christian Hennig
 82 |   \email{christian.hennig@unibo.it}
 83 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
 84 | }
 85 | 
 86 | \seealso{
 87 |   \code{\link{plotcluster}} for straight forward discriminant plots.
 88 |   \code{\link{discrproj}} for alternatives.
 89 |   \code{\link{rFace}} for generation of the example data used below.
 90 | }
 91 | 
 92 | \examples{
 93 |   set.seed(4634)
 94 |   face <- rFace(600,dMoNo=2,dNoEy=0)
 95 |   grface <- as.integer(attr(face,"grouping"))
 96 |   awcf <- awcoord(face,grface==1)
 97 |   # awcf2 <- ancoord(face,grface==1, method="mcd")
 98 |   plot(awcf$proj,col=1+(grface==1))
 99 |   # plot(awcf2$proj,col=1+(grface==1))
100 |   # ...done in one step by function plotcluster.
101 | }
102 | \keyword{multivariate}% at least one, from doc/KEYWORDS
103 | \keyword{classif}% __ONLY ONE__ keyword per line
104 | 
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/man/batcoord.Rd:
--------------------------------------------------------------------------------
 1 | \name{batcoord}
 2 | \alias{batcoord}
 3 | \alias{batvarcoord}
 4 | %- Also NEED an `\alias' for EACH other topic documented here.
 5 | \title{Bhattacharyya discriminant projection}
 6 | \description{
 7 |   Computes Bhattacharyya discriminant projection coordinates
 8 |   as described in Fukunaga (1990), p. 455 ff.
 9 | }
10 | \usage{
11 | batcoord(xd, clvecd, clnum=1, dom="mean")
12 | batvarcoord(xd, clvecd, clnum=1)
13 | }
14 | %- maybe also `usage' for other objects documented here.
15 | \arguments{
16 |   \item{xd}{the data matrix; a numerical object which can be coerced
17 |     to a matrix.}
18 |   \item{clvecd}{integer or logical vector of class numbers; length must equal
19 |     \code{nrow(xd)}.}
20 |   \item{clnum}{integer, one of the values of \code{clvecd}, if this is
21 |     an integer vector. Bhattacharyya projections can only be computed if
22 |     there are only two classes in the dataset. \code{clnum} is the number
23 |     of one of the two classes. All the points indicated by other values
24 |     of \code{clvecd} are interpreted as the second class.}
25 |   \item{dom}{string. \code{dom="mean"} means that the discriminant
26 |     coordinate for the group means is computed as the first projection
27 |     direction by
28 |     \code{\link{discrcoord}} (option \code{pool="equal"}; both classes
29 |     have the same weight for computing the within-class covariance
30 |     matrix). Then the data is projected into a subspace orthogonal
31 |     (w.r.t. the within-class covariance) to the
32 |     discriminant coordinate, and the projection coordinates to maximize
33 |     the differences in variance are computed. \cr
34 |     \code{dom="variance"} means that the projection coordinates
35 |     maximizing the difference in variances are computed. Then they are
36 |     ordered with respect to the Bhattacharyya distance, which takes also
37 |     the mean differences into account. Both procedures are implemented
38 |     as described in Fukunaga (1990).}
39 | }
40 | \details{
41 | \code{batvarcoord} computes the optimal projection coordinates with
42 | respect to the difference in variances. \code{batcoord} combines the
43 | differences in mean and variance as explained for the argument \code{dom}.
44 | }
45 | \value{
46 |   \code{batcoord} returns a list with the components \code{ev, rev,
47 |     units, proj}. \code{batvarcoord} returns a list with the components
48 |   \code{ev, rev, units, proj, W, S1, S2}. 
49 |   \item{ev}{vector of eigenvalues. If \code{dom="mean"}, then first eigenvalue
50 |     from \code{\link{discrcoord}}. Further eigenvalues are of
51 |   \eqn{S_1^{-1}S_2}, where \eqn{S_i} is the covariance matrix of class
52 |   i. For \code{batvarcoord} or
53 |   if \code{dom="variance"}, all eigenvalues come from
54 |   \eqn{S_1^{-1}S_2} and are ordered by \code{rev}.}
55 | \item{rev}{for \code{batcoord}:
56 |   vector of projected Bhattacharyya distances (Fukunaga
57 |   (1990), p. 99). Determine quality of the projection coordinates.
58 |   For \code{batvarcoord}: vector of amount of projected difference in
59 |   variances.}
60 |   \item{units}{columns are coordinates of projection basis vectors.
61 |   New points \code{x} can be projected onto the projection basis vectors
62 |   by \code{x \%*\% units}.}
63 |   \item{proj}{projections of \code{xd} onto \code{units}.} 
64 | 
65 |   \item{W}{matrix \eqn{S_1^{-1}S_2}.}
66 |   \item{S1}{covariance matrix of the first class.}    
67 |   \item{S2}{covariance matrix of the second class.}    
68 | }
69 | \references{
70 |   Fukunaga, K. (1990). \emph{Introduction to Statistical Pattern
71 |     Recognition} (2nd ed.). Boston: Academic Press.
72 | }
73 | \author{Christian Hennig
74 |   \email{christian.hennig@unibo.it}
75 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
76 | 
77 | \seealso{
78 |   \code{\link{plotcluster}} for straight forward discriminant plots.
79 |   
80 |   \code{\link{discrcoord}} for discriminant coordinates.
81 | 
82 |   \code{\link{rFace}} for generation of the example data used below.
83 | }
84 | 
85 | \examples{
86 | set.seed(4634)
87 | face <- rFace(600,dMoNo=2,dNoEy=0)
88 | grface <- as.integer(attr(face,"grouping"))
89 | bcf2 <- batcoord(face,grface==2)
90 | plot(bcf2$proj,col=1+(grface==2))
91 | bcfv2 <- batcoord(face,grface==2,dom="variance")
92 | plot(bcfv2$proj,col=1+(grface==2))
93 | bcfvv2 <- batvarcoord(face,grface==2)
94 | plot(bcfvv2$proj,col=1+(grface==2))
95 | }
96 | \keyword{multivariate}% at least one, from doc/KEYWORDS
97 | \keyword{classif}% __ONLY ONE__ keyword per line
98 | 
99 | 


--------------------------------------------------------------------------------
/man/bhattacharyya.dist.Rd:
--------------------------------------------------------------------------------
 1 | \name{bhattacharyya.dist}
 2 | \alias{bhattacharyya.dist}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Bhattacharyya distance between Gaussian distributions}
 5 | \description{
 6 |   Computes Bhattacharyya distance between two multivariate
 7 |   Gaussian distributions. See Fukunaga (1990).
 8 | }
 9 | \usage{
10 | bhattacharyya.dist(mu1, mu2, Sigma1, Sigma2)
11 | }
12 | %- maybe also `usage' for other objects documented here.
13 | \arguments{
14 |   \item{mu1}{mean vector of component 1.}
15 |   \item{mu2}{mean vector of component 2.}
16 |   \item{Sigma1}{covariance matrix of component 1.}
17 |   \item{Sigma2}{covariance matrix of component 2.}
18 | }
19 | 
20 | \value{
21 |   The Bhattacharyya distance between the two Gaussian distributions.
22 | }
23 | 
24 | \references{
25 | Fukunaga, K. (1990) \emph{Introduction to Statistical Pattern
26 | Recognition}, 2nd edition, Academic
27 | Press, New York.
28 | 
29 |   Hennig, C. (2010) Methods for merging Gaussian mixture components,
30 |   \emph{Advances in Data Analysis and Classification}, 4, 3-34.
31 | }
32 | \note{
33 |   Thanks to David Pinto for improving this function.
34 | }
35 | \author{Christian Hennig
36 |   \email{christian.hennig@unibo.it}
37 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
38 | }
39 | \examples{
40 |   round(bhattacharyya.dist(c(1,1),c(2,5),diag(2),diag(2)),digits=2)
41 | }
42 | \keyword{multivariate}
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/man/bhattacharyya.matrix.Rd:
--------------------------------------------------------------------------------
 1 | \name{bhattacharyya.matrix}
 2 | \alias{bhattacharyya.matrix}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Matrix of pairwise Bhattacharyya distances}
 5 | \description{
 6 |   Computes Bhattachryya distances for pairs of components
 7 |   given the parameters of a Gaussian mixture. 
 8 | }
 9 | \usage{
10 | bhattacharyya.matrix(muarray,Sigmaarray,ipairs="all", 
11 |                                  misclassification.bound=TRUE)
12 | }
13 | %- maybe also `usage' for other objects documented here.
14 | \arguments{
15 |   \item{muarray}{matrix of component means (different components are in
16 |     different columns).}
17 |   \item{Sigmaarray}{three dimensional array with component covariance
18 |     matrices (the third dimension refers to components).}
19 |   \item{ipairs}{\code{"all"} or list of vectors of two integers. If
20 |     \code{ipairs="all"}, computations are carried out for all pairs of
21 |     components. Otherwise, ipairs gives the pairs of components for
22 |     which computations are carried out.}
23 |   \item{misclassification.bound}{logical. If \code{TRUE}, upper bounds
24 |     for misclassification probabilities \code{exp(-b)}
25 |     are given out instead of the original Bhattacharyya distances \code{b}.}  
26 | }
27 | 
28 | \value{
29 |   A matrix with Bhattacharyya distances (or derived misclassification
30 |   bounds, see above) between pairs of Gaussian distributions with the
31 |   provided parameters. If \code{ipairs!="all"}, the Bhattacharyya
32 |   distance and the misclassification bound are given as \code{NA} for
33 |   pairs not included in \code{ipairs}.
34 | }
35 | 
36 | \references{
37 |   Fukunaga, K. (1990) \emph{Introduction to Statistical Pattern
38 | Recognition}, 2nd edition, Academic
39 | Press, New York.
40 |  
41 |   Hennig, C. (2010) Methods for merging Gaussian mixture components,
42 |   \emph{Advances in Data Analysis and Classification}, 4, 3-34.
43 |   }
44 | \author{Christian Hennig
45 |   \email{christian.hennig@unibo.it}
46 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
47 | }
48 | 
49 | \seealso{
50 |   \code{\link{bhattacharyya.dist}}
51 |   }
52 | 
53 | \examples{
54 |   muarray <-cbind(c(0,0),c(0,0.1),c(10,10))
55 |   sigmaarray <- array(c(diag(2),diag(2),diag(2)),dim=c(2,2,3))
56 |   bhattacharyya.matrix(muarray,sigmaarray,ipairs=list(c(1,2),c(2,3)))
57 | 
58 | }
59 | \keyword{cluster}% at least one, from doc/KEYWORDS
60 | \keyword{multivariate}
61 | 
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/man/calinhara.Rd:
--------------------------------------------------------------------------------
 1 | \name{calinhara}
 2 | \alias{calinhara}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Calinski-Harabasz index}
 5 | \description{
 6 |   Calinski-Harabasz index for estimating the number of clusters,
 7 |   based on an observations/variables-matrix here. A distance based
 8 |   version is available through \code{cluster.stats}.
 9 | }
10 | \usage{
11 |   calinhara(x,clustering,cn=max(clustering))
12 | }
13 | %- maybe also `usage' for other objects documented here.
14 | \arguments{
15 |   \item{x}{data matrix or data frame.}
16 |   \item{clustering}{vector of integers. Clustering.}
17 |   \item{cn}{integer. Number of clusters.}
18 | }
19 | 
20 | \value{
21 |   Calinski-Harabasz statistic, which is 
22 |   \code{(n-cn)*sum(diag(B))/((cn-1)*sum(diag(W)))}. B being the
23 |   between-cluster means, 
24 |   and W being the within-clusters covariance matrix.
25 | }
26 | 
27 | \references{
28 |   Calinski, T., and Harabasz, J. (1974) A Dendrite Method for Cluster 
29 | Analysis, \emph{Communications in Statistics}, 3, 1-27.
30 | }
31 | 
32 | \author{Christian Hennig
33 |   \email{christian.hennig@unibo.it}
34 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en}}
35 | 
36 | \seealso{\code{\link{cluster.stats}}}
37 | 
38 | \examples{
39 |   set.seed(98765)
40 |   iriss <- iris[sample(150,20),-5]
41 |   km <- kmeans(iriss,3)
42 |   round(calinhara(iriss,km$cluster),digits=2)
43 | }
44 | 
45 | 
46 | 
47 | \keyword{cluster}% __ONLY ONE__ keyword per line
48 | 


--------------------------------------------------------------------------------
/man/can.Rd:
--------------------------------------------------------------------------------
 1 | \name{can}
 2 | \alias{can}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Generation of the tuning constant for regression fixed point clusters}
 5 | \description{
 6 |   Generates tuning constants \code{ca}
 7 |   for \code{\link{fixreg}} dependent on
 8 |   the number of points and variables of the dataset.
 9 |   
10 |   Only thought for use in \code{\link{fixreg}}.
11 |   
12 | }
13 | \usage{
14 | can(n, p)
15 | }
16 | %- maybe also `usage' for other objects documented here.
17 | \arguments{
18 |   \item{n}{positive integer. Number of points.}
19 |   \item{p}{positive integer. Number of independent variables.}
20 | }
21 | \details{
22 |   The formula is
23 |   \eqn{3+33/(n*2^{-(p-1)/2})^{1/3}+2900000/(n*2^{-(p-1)/2})^3}. For
24 |   justification cf. Hennig (2002).
25 | }
26 | \value{
27 |   A number.
28 | }
29 | \references{
30 |   Hennig, C. (2002) Fixed point clusters for linear regression:
31 |   computation and comparison, \emph{Journal of
32 |     Classification} 19, 249-276.
33 | }
34 | 
35 | \author{Christian Hennig
36 |   \email{christian.hennig@unibo.it}
37 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
38 | 
39 | \seealso{\code{\link{fixreg}}}
40 | 
41 | \examples{
42 |   can(429,3)
43 | }
44 | \keyword{arith}% at least one, from doc/KEYWORDS
45 | 
46 | 


--------------------------------------------------------------------------------
/man/cat2bin.Rd:
--------------------------------------------------------------------------------
 1 | \name{cat2bin}
 2 | \alias{cat2bin}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Recode nominal variables to binary variables}
 5 | \description{
 6 |   Recodes a dataset with nominal variables so that the nominal
 7 |   variables are replaced by binary variables for the categories.
 8 | }
 9 | \usage{
10 |   cat2bin(x,categorical=NULL)
11 | }
12 | %- maybe also `usage' for other objects documented here.
13 | \arguments{
14 |   \item{x}{data matrix or data frame. The data need to be organised
15 |     case-wise, i.e., if there are categorical variables only, and 15
16 |     cases with values c(1,1,2) on the 3 variables, the data matrix needs
17 |     15 rows with values 1 1 2. (Categorical variables could take numbers
18 |     or strings or anything that can be coerced to factor levels as values.)}
19 |   \item{categorical}{vector of numbers of variables to be recoded.}
20 | }
21 | 
22 | \value{
23 |   A list with components
24 |   \item{data}{data matrix with variables specified in \code{categorical}
25 |     replaced by 0-1 variables, one for each category.}
26 |   \item{variableinfo}{list of lists. One list for every variable in the
27 |     original dataset, with four components each, namely \code{type}
28 |     (\code{"categorical"} or \code{"not recoded"}), \code{levels}
29 |   (levels of nominal recoded variables in order of binary variable in
30 |   output dataset), \code{ncat} (number of categories for recoded
31 |   variables), \code{varnum} (number of variables in output dataset
32 |   belonging to this original variable).}  
33 | }
34 | 
35 | \author{Christian Hennig
36 |   \email{christian.hennig@unibo.it}
37 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en}}
38 | 
39 | \seealso{\code{\link{discrete.recode}}}
40 | 
41 | \examples{
42 |   set.seed(776655)
43 |   v1 <- rnorm(20)
44 |   v2 <- rnorm(20)
45 |   d1 <- sample(1:5,20,replace=TRUE)
46 |   d2 <- sample(1:4,20,replace=TRUE)
47 |   ldata <-cbind(v1,v2,d1,d2)
48 |   lc <- cat2bin(ldata,categorical=3:4)
49 | }
50 | 
51 | 
52 | 
53 | \keyword{manip}% __ONLY ONE__ keyword per line
54 | 


--------------------------------------------------------------------------------
/man/cdbw.Rd:
--------------------------------------------------------------------------------
 1 | \name{cdbw}
 2 | \alias{cdbw}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{CDbw-index for cluster validation}
 5 | \description{
 6 |   CDbw-index for cluster validation, as defined in Halkidi and
 7 |   Vazirgiannis (2008), Halkidi et al. (2015).
 8 | }
 9 | \usage{
10 | cdbw(x,clustering,r=10,s=seq(0.1,0.8,by=0.1),
11 |                  clusterstdev=TRUE,trace=FALSE)
12 | 
13 | }
14 | %- maybe also `usage' for other objects documented here.
15 | \arguments{
16 |   \item{x}{something that can be coerced into a numerical
17 |   matrix. Euclidean dataset.}
18 |   \item{clustering}{vector of integers with length \code{=nrow(x)};
19 |   indicating the cluster for each observation.}
20 |   \item{r}{integer. Number of cluster border representatives.}
21 |   \item{s}{numerical vector of shrinking factors (between 0 and 1).}
22 |   \item{clusterstdev}{logical. If \code{TRUE}, the neighborhood radius
23 |   for intra-cluster density is the within-cluster estimated squared
24 |   distance from the mean of the cluster; otherwise it is the average of
25 |   these over all clusters.}
26 |   \item{trace}{logical. If \code{TRUE}, results are printed for the
27 |   steps to compute the index.}
28 | }
29 | 
30 | \value{
31 |   List with components (see Halkidi and Vazirgiannis (2008), Halkidi et
32 |   al. (2015) for details)
33 |   \item{cdbw}{value of CDbw index (the higher the better).}
34 |   \item{cohesion}{cohesion.}
35 |   \item{compactness}{compactness.}
36 |   \item{sep}{separation.} 
37 | }
38 | 
39 | 
40 | \references{
41 |   Halkidi, M. and Vazirgiannis, M. (2008) A density-based cluster
42 |   validity approach using multi-representatives. \emph{Pattern
43 |     Recognition Letters} 29, 773-786.
44 | 
45 |   Halkidi, M., Vazirgiannis, M. and Hennig, C. (2015) Method-independent
46 |     indices for cluster validation. In C. Hennig, M. Meila, F. Murtagh,
47 |     R. Rocci (eds.) \emph{Handbook of Cluster Analysis}, CRC
48 |     Press/Taylor \code{&} Francis, Boca Raton.
49 | }
50 | 
51 | \author{Christian Hennig
52 |   \email{christian.hennig@unibo.it}
53 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
54 | }
55 | \examples{
56 |   options(digits=3)
57 |   iriss <- as.matrix(iris[c(1:5,51:55,101:105),-5])
58 |   irisc <- as.numeric(iris[c(1:5,51:55,101:105),5])
59 |   cdbw(iriss,irisc)
60 | }
61 | \keyword{cluster}% at least one, from doc/KEYWORDS
62 | 
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/man/cgrestandard.Rd:
--------------------------------------------------------------------------------
  1 | \name{cgrestandard}
  2 | \alias{cgrestandard}
  3 | %- Also NEED an `\alias' for EACH other topic documented here.
  4 | \title{Standardise cluster validation statistics by random clustering results}
  5 | \description{
  6 |   Standardises cluster validity statistics as produced by
  7 |   \code{\link{clustatsum}} relative to results that were achieved by
  8 |   random clusterings on the same data by
  9 |   \code{\link{randomclustersim}}. The aim is to make differences between
 10 |   values comparable between indexes, see Hennig (2019), Akhanli and
 11 |   Hennig (2020).
 12 | 
 13 |   This is mainly for use within \code{\link{clusterbenchstats}}.
 14 | }
 15 | \usage{
 16 | cgrestandard(clusum,clusim,G,percentage=FALSE,
 17 |                                useallmethods=FALSE,
 18 |                              useallg=FALSE, othernc=list())
 19 | }
 20 | %- maybe also `usage' for other objects documented here.
 21 | \arguments{
 22 |   \item{clusum}{object of class "valstat", see \code{\link{clusterbenchstats}}.}
 23 |   \item{clusim}{list; output object of \code{\link{randomclustersim}},
 24 |     see there.} 
 25 |   \item{G}{vector of integers. Numbers of clusters to consider.}
 26 |   \item{percentage}{logical. If \code{FALSE}, standardisation is done to
 27 |     mean zero and standard deviation 1 using the random clusterings. If
 28 |     \code{TRUE}, the output is the percentage of simulated values below
 29 |     the result (more precisely, this number plus one divided by the
 30 |     total plus one).}
 31 |   \item{useallmethods}{logical. If \code{FALSE}, only random clustering
 32 |     results from \code{clusim} are used for standardisation. If
 33 |     \code{TRUE}, also clustering results from other methods as given in
 34 |     \code{clusum} are used.}
 35 |   \item{useallg}{logical. If \code{TRUE}, standardisation uses results
 36 |     from all numbers of clusters in \code{G}. If \code{FALSE},
 37 |     standardisation of results for a specific number of cluster only
 38 |     uses results from that number of clusters.} 
 39 |   \item{othernc}{list of integer vectors of length 2. This allows the
 40 |     incorporation of methods that bring forth other numbers of clusters
 41 |     than those in \code{G}, for example because a method may have
 42 |     automatically estimated a number of clusters. The first number is
 43 |     the number of the clustering method (the order is determined by
 44 |     argument \code{clustermethod} in
 45 |     \code{\link{clusterbenchstats}}), the second number is the
 46 |     number of clusters. Results specified here are only standardised in
 47 |     \code{useallg=TRUE}.}
 48 | }
 49 | 
 50 | \details{
 51 |   \code{cgrestandard} will add a statistic named \code{dmode} to the
 52 |   input set of validation statistics, which is defined as
 53 |   \code{0.75*dindex+0.25*highdgap}, aggregating these two closely
 54 |   related statistics, see \code{\link{clustatsum}}.
 55 | }
 56 | 
 57 | \value{
 58 |   List of class \code{"valstat"}, see
 59 |   \code{\link{valstat.object}}, with standardised results as
 60 |   explained above.
 61 | }
 62 | \references{
 63 | Hennig, C. (2019) Cluster validation by measurement of clustering
 64 |   characteristics relevant to the user. In C. H. Skiadas (ed.)
 65 |   \emph{Data Analysis and Applications 1: Clustering and Regression,
 66 |   Modeling-estimating, Forecasting and Data Mining, Volume 2}, Wiley,
 67 |   New York 1-24,
 68 |     \url{https://arxiv.org/abs/1703.09282}
 69 | 
 70 | Akhanli, S. and Hennig, C. (2020) Calibrating and aggregating cluster
 71 | validity indexes for context-adapted comparison of clusterings.
 72 | \emph{Statistics and Computing}, 30, 1523-1544,
 73 | \url{https://link.springer.com/article/10.1007/s11222-020-09958-2}, \url{https://arxiv.org/abs/2002.01822}
 74 | 
 75 | 
 76 | }
 77 | \author{Christian Hennig
 78 |   \email{christian.hennig@unibo.it}
 79 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
 80 | }
 81 | 
 82 | \seealso{
 83 |   \code{\link{valstat.object}}, \code{\link{clusterbenchstats}}, \code{\link{stupidkcentroids}}, \code{\link{stupidknn}}, \code{\link{stupidkfn}}, \code{\link{stupidkaven}}, \code{\link{clustatsum}}
 84 | }
 85 | 
 86 | \examples{
 87 |   
 88 |   set.seed(20000)
 89 |   options(digits=3)
 90 |   face <- rFace(10,dMoNo=2,dNoEy=0,p=2)
 91 |   dif <- dist(face)
 92 |   clusum <- list()
 93 |   clusum[[2]] <- list()
 94 |   cl12 <- kmeansCBI(face,2)
 95 |   cl13 <- kmeansCBI(face,3)
 96 |   cl22 <- claraCBI(face,2)
 97 |   cl23 <- claraCBI(face,2)
 98 |   ccl12 <- clustatsum(dif,cl12$partition)
 99 |   ccl13 <- clustatsum(dif,cl13$partition)
100 |   ccl22 <- clustatsum(dif,cl22$partition)
101 |   ccl23 <- clustatsum(dif,cl23$partition)
102 |   clusum[[1]] <- list()
103 |   clusum[[1]][[2]] <- ccl12
104 |   clusum[[1]][[3]] <- ccl13
105 |   clusum[[2]][[2]] <- ccl22
106 |   clusum[[2]][[3]] <- ccl23
107 |   clusum$maxG <- 3
108 |   clusum$minG <- 2
109 |   clusum$method <- c("kmeansCBI","claraCBI")
110 |   clusum$name <- c("kmeansCBI","claraCBI")
111 |   clusim <- randomclustersim(dist(face),G=2:3,nnruns=1,kmruns=1,
112 |     fnruns=1,avenruns=1,monitor=FALSE)
113 |   cgr <- cgrestandard(clusum,clusim,2:3)
114 |   cgr2 <- cgrestandard(clusum,clusim,2:3,useallg=TRUE)
115 |   cgr3 <- cgrestandard(clusum,clusim,2:3,percentage=TRUE)
116 |   print(str(cgr))
117 |   print(str(cgr2))
118 |   print(cgr3[[1]][[2]])
119 | }
120 | \keyword{multivariate}% at least one, from doc/KEYWORDS
121 | \keyword{cluster}% __ONLY ONE__ keyword per line
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/man/classifdist.Rd:
--------------------------------------------------------------------------------
  1 | \name{classifdist}
  2 | \alias{classifdist}
  3 | \alias{classifnp}
  4 | %- Also NEED an `\alias' for EACH other topic documented here.
  5 | \title{Classification of unclustered points}
  6 | \description{
  7 |   Various methods for classification of unclustered points from
  8 |   clustered points for use within functions \code{nselectboot}
  9 |   and \code{prediction.strength}. 
 10 | }
 11 | \usage{
 12 | classifdist(cdist,clustering,
 13 |                       method="averagedist",
 14 |                       centroids=NULL,nnk=1)
 15 | 
 16 | classifnp(data,clustering,
 17 |                       method="centroid",cdist=NULL,
 18 |                       centroids=NULL,nnk=1)
 19 | }
 20 | 
 21 | %- maybe also `usage' for other objects documented here.
 22 | \arguments{
 23 |   \item{cdist}{dissimilarity matrix or \code{dist}-object. Necessary for
 24 |     \code{classifdist} but optional for \code{classifnp} and there only
 25 |     used if \code{method="averagedist"} (if not provided, \code{dist} is
 26 |     applied to \code{data}).}
 27 |   \item{data}{something that can be coerced into a an
 28 |     \code{n*p}-data matrix.}
 29 |   \item{clustering}{integer vector. Gives the cluster number (between 1
 30 |     and k for k clusters) for clustered points and should be -1 for
 31 |     points to be classified.}
 32 |   \item{method}{one of \code{"averagedist", "centroid", "qda",
 33 |       "knn"}. See details.}
 34 |   \item{centroids}{for \code{classifnp} a k times p matrix of cluster
 35 |     centroids. For \code{classifdist} a vector of numbers of centroid
 36 |     objects as provided by \code{\link[cluster]{pam}}. Only used if
 37 |     \code{method="centroid"}; in that case mandatory for
 38 |     \code{classifdist} but optional for \code{classifnp}, where cluster mean
 39 |     vectors are computed if \code{centroids=NULL}.}
 40 |   \item{nnk}{number of nearest neighbours if \code{method="knn"}.}
 41 | }
 42 | 
 43 | \details{
 44 |   \code{classifdist} is for data given as dissimilarity matrix,
 45 |   \code{classifnp} is for data given as n times p data matrix. 
 46 |   The following methods are supported:
 47 |   \describe{
 48 |     \item{"centroid"}{assigns observations to the cluster with closest
 49 |       cluster centroid as specified in argument \code{centroids} (this
 50 |       is associated to k-means and pam/clara-clustering).}
 51 |     \item{"qda"}{only in \code{classifnp}. Classifies by quadratic
 52 |       discriminant analysis (this is associated to Gaussian clusters
 53 |       with flexible covariance matrices), calling
 54 |       \code{\link[MASS]{qda}} with default settings. If
 55 |       \code{\link[MASS]{qda}} gives an error (usually because a class
 56 |       was too small), \code{\link[MASS]{lda}} is used.}
 57 |     \item{"lda"}{only in \code{classifnp}. Classifies by linear
 58 |       discriminant analysis (this is associated to Gaussian clusters
 59 |       with equal covariance matrices), calling
 60 |       \code{\link[MASS]{lda}} with default settings.}
 61 |     \item{"averagedist"}{assigns to the cluster to which an observation
 62 |       has the minimum average dissimilarity to all points in the cluster
 63 |       (this is associated with average linkage clustering).}
 64 |     \item{"knn"}{classifies by \code{nnk} nearest neighbours (for
 65 |       \code{nnk=1}, this is associated with single linkage clustering).
 66 |       Calls \code{\link[class]{knn}} in \code{classifnp}.}
 67 |     \item{"fn"}{classifies by the minimum distance to the farthest
 68 |       neighbour. This is associated with complete linkage clustering).}
 69 |   }
 70 | }
 71 |     
 72 | \value{
 73 |   An integer vector giving cluster numbers for all observations; those
 74 |   for the observations already clustered in the input are the same as in
 75 |   the input.
 76 | }
 77 |   
 78 | \author{Christian Hennig
 79 |   \email{christian.hennig@unibo.it}
 80 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
 81 | }
 82 | \seealso{
 83 |   \code{\link{prediction.strength}}, \code{\link{nselectboot}}
 84 | }
 85 | \examples{  
 86 | set.seed(20000)
 87 | x1 <- rnorm(50)
 88 | y <- rnorm(100)
 89 | x2 <- rnorm(40,mean=20)
 90 | x3 <- rnorm(10,mean=25,sd=100)
 91 | x <-cbind(c(x1,x2,x3),y)
 92 | truec <- c(rep(1,50),rep(2,40),rep(3,10))
 93 | topredict <- c(1,2,51,52,91)
 94 | clumin <- truec
 95 | clumin[topredict] <- -1
 96 | 
 97 | classifnp(x,clumin, method="averagedist")
 98 | classifnp(x,clumin, method="qda")
 99 | classifdist(dist(x),clumin, centroids=c(3,53,93),method="centroid")
100 | classifdist(dist(x),clumin,method="knn")
101 | 
102 | }
103 | \keyword{cluster}% at least one, from doc/KEYWORDS
104 | \keyword{multivariate}
105 | 
106 | 
107 | 
108 | 


--------------------------------------------------------------------------------
/man/clucols.Rd:
--------------------------------------------------------------------------------
 1 | \name{clucols}
 2 | \alias{clucols}
 3 | \alias{clugrey}
 4 | \alias{clusym}
 5 | %- Also NEED an `\alias' for EACH other topic documented here.
 6 | \title{Sets of colours and symbols for cluster plotting}
 7 | \description{
 8 |   \code{clucols} gives out a vector of different random colours.
 9 |   \code{clugrey} gives out a vector of equidistant grey scales.
10 |   \code{clusym} is a vector of different symbols starting from "1",
11 |   "2",...
12 | }
13 | \usage{
14 |   clucols(i, seed=NULL)
15 |   clugrey(i,max=0.9)
16 |   clusym
17 | }
18 | %- maybe also `usage' for other objects documented here.
19 | \arguments{
20 |   \item{i}{integer. Length of output vector (number of clusters).}
21 |   \item{seed}{integer. Random seed.}
22 |   \item{max}{between 0 and 1. Maximum grey scale value, see
23 |     \code{\link{grey}} (close to 1 is bright).}    
24 | }
25 | 
26 | \value{
27 |   \code{clucols} gives out a vector of different random colours.
28 |   \code{clugrey} gives out a vector of equidistant grey scales.
29 |   \code{clusym} is a vector of different characters starting from "1",
30 |   "2",...
31 | }
32 | 
33 | \author{Christian Hennig
34 |   \email{christian.hennig@unibo.it}
35 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en}}
36 | 
37 | \examples{
38 |   set.seed(112233)
39 |   require(MASS)
40 |   require(flexmix)
41 |   data(Cars93)
42 |   Cars934 <- Cars93[,c(3,5,8,10)]
43 |   cc <-
44 |     discrete.recode(Cars934,xvarsorted=FALSE,continuous=c(2,3),discrete=c(1,4))
45 |   fcc <- flexmix(cc$data~1,k=3,
46 |   model=lcmixed(continuous=2,discrete=2,ppdim=c(6,3),diagonal=TRUE))
47 |   plot(Cars934[,c(2,3)],col=clucols(3)[fcc@cluster],pch=clusym[fcc@cluster])
48 | }
49 | 
50 | \keyword{cluster}% __ONLY ONE__ keyword per line
51 | 


--------------------------------------------------------------------------------
/man/clujaccard.Rd:
--------------------------------------------------------------------------------
 1 | \name{clujaccard}
 2 | \alias{clujaccard}
 3 | 
 4 | %- Also NEED an `\alias' for EACH other topic documented here.
 5 | \title{Jaccard similarity between logical vectors}
 6 | \description{
 7 |   Jaccard similarity between logical or 0-1 vectors:
 8 |   \code{sum(c1 & c2)/sum(c1 | c2)}.
 9 | }
10 | \usage{
11 | clujaccard(c1,c2,zerobyzero=NA)
12 | }
13 | \arguments{
14 |   \item{c1}{logical or 0-1-vector.}
15 |   \item{c2}{logical or 0-1-vector (same length).}
16 |   \item{zerobyzero}{result if \code{sum(c1 | c2)=0}.}
17 | }
18 | 
19 | \value{
20 |   Numeric between 0 and 1.
21 | }
22 | \author{Christian Hennig
23 |   \email{christian.hennig@unibo.it}
24 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
25 | }
26 | \examples{
27 |   c1 <- rep(TRUE,10)
28 |   c2 <- c(FALSE,rep(TRUE,9))
29 |   clujaccard(c1,c2)
30 | }
31 | \keyword{cluster}% at least one, from doc/KEYWORDS
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/man/clusexpect.Rd:
--------------------------------------------------------------------------------
 1 | \name{clusexpect}
 2 | \alias{clusexpect}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Expected value of the number of times a fixed point
 5 |   cluster is found}
 6 | \description{
 7 |   A rough approximation of the expectation of the number of times a well
 8 |   separated fixed point
 9 |   cluster (FPC) of size \code{n} is found in \code{ir} fixed point
10 |   iterations of \code{\link{fixreg}}.
11 | }
12 | \usage{
13 |   clusexpect(n, p, cn, ir)
14 | }
15 | %- maybe also `usage' for other objects documented here.
16 | \arguments{
17 |   \item{n}{positive integer. Total number of points.}
18 |   \item{p}{positive integer. Number of independent variables.}
19 |   \item{cn}{positive integer smaller or equal to \code{n}.
20 |     Size of the FPC.}
21 |   \item{ir}{positive integer. Number of fixed point iterations.}
22 | }
23 | \details{
24 |   The approximation is based on the assumption that a well separated FPC
25 |   is found iff all \code{p+2} points of the initial coinfiguration come
26 |   from the FPC. The value is \code{ir} times the probability for
27 |   this. For a discussion of this assumption cf. Hennig (2002).
28 | }
29 | \value{
30 |   A number.
31 | }
32 | 
33 | \references{
34 |   Hennig, C. (2002) Fixed point clusters for linear regression:
35 |   computation and comparison, \emph{Journal of
36 |     Classification} 19, 249-276.
37 | }
38 | 
39 | \author{Christian Hennig
40 |   \email{christian.hennig@unibo.it}
41 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
42 | 
43 | \seealso{\code{\link{fixreg}}}
44 | 
45 | \examples{
46 |   round(clusexpect(500,4,150,2000),digits=2)
47 | }
48 | \keyword{univar}% at least one, from doc/KEYWORDS
49 | \keyword{cluster}
50 | 


--------------------------------------------------------------------------------
/man/cluster.magazine.Rd:
--------------------------------------------------------------------------------
  1 | \name{cluster.magazine}
  2 | \alias{cluster.magazine}
  3 | %- Also NEED an `\alias' for EACH other topic documented here.
  4 | \title{Run many clustering methods on many numbers of clusters}
  5 | \description{
  6 |   Runs a user-specified set of clustering methods (CBI-functions, see
  7 |   \code{\link{kmeansCBI}} with several numbers of clusters on a dataset
  8 |   with unified output.
  9 | }
 10 | \usage{
 11 | cluster.magazine(data,G,diss = inherits(data, "dist"),
 12 |                              scaling=TRUE, clustermethod,
 13 |                              distmethod=rep(TRUE,length(clustermethod)),
 14 |                              ncinput=rep(TRUE,length(clustermethod)),
 15 |                              clustermethodpars,
 16 |                              trace=TRUE)
 17 | 
 18 | }
 19 | %- maybe also `usage' for other objects documented here.
 20 | \arguments{
 21 |   \item{data}{data matrix or \code{dist}-object.}
 22 |   \item{G}{vector of integers. Numbers of clusters to consider.}
 23 |   \item{diss}{logical. If \code{TRUE}, the data matrix is assumed to be
 24 |     a distance/dissimilariy matrix, otherwise it's observations times
 25 |     variables.}
 26 |   \item{scaling}{either a logical or a numeric vector of length equal to
 27 |     the number of columns of \code{data}. If \code{FALSE}, data won't be
 28 |     scaled, otherwise \code{scaling} is passed on to \code{\link{scale}} as
 29 |     argument\code{scale}.} 
 30 |   \item{clustermethod}{vector of strings specifying names of
 31 |     CBI-functions (see \code{\link{kmeansCBI}}). These are the
 32 |     clustering methods to be applied.}
 33 |   \item{distmethod}{vector of logicals, of the same length as
 34 |     \code{clustermethod}. \code{TRUE} means that the clustering method
 35 |     operates on distances. If \code{diss=TRUE}, all entries have to be
 36 |     \code{TRUE}. Otherwise, if an entry is true, the corresponding
 37 |     method will be applied on \code{dist(data)}.}  
 38 |   \item{ncinput}{vector of logicals, of the same length as
 39 |     \code{clustermethod}. \code{TRUE} indicates that the corresponding
 40 |     clustering method requires the number of clusters as input and will
 41 |     not estimate the number of clusters itself.}
 42 |   \item{clustermethodpars}{list of the same length as
 43 |     \code{clustermethod}. Specifies parameters for all involved
 44 |     clustering methods. Its jth entry is passed to clustermethod number
 45 |     k. Can be an empty entry in case all defaults are used for a
 46 |     clustering method. The number of clusters does not need to be
 47 |     specified here.}
 48 |   \item{trace}{logical. If \code{TRUE}, some runtime information is
 49 |     printed.}
 50 | }
 51 | 
 52 | % \details{
 53 | % }
 54 | \value{
 55 |   List of lists comprising
 56 |   \item{output}{Two-dimensional list. The first list index i is the number
 57 |     of the clustering method (ordering as specified in
 58 |     \code{clustermethod}), the second list index j is the number of
 59 |     clusters. This stores the full output of clustermethod i run on
 60 |     number of clusters j.}
 61 |   \item{clustering}{Two-dimensional list. The first list index i is the number
 62 |     of the clustering method (ordering as specified in
 63 |     \code{clustermethod}), the second list index j is the number of
 64 |     clusters. This stores the clustering integer vector (i.e., the
 65 |     \code{partition}-component of the CBI-function, see
 66 |     \code{\link{kmeansCBI}}) of clustermethod i run on
 67 |     number of clusters j.}
 68 |   \item{noise}{Two-dimensional list. The first list index i is the number
 69 |     of the clustering method (ordering as specified in
 70 |     \code{clustermethod}), the second list index j is the number of
 71 |     clusters. List entries are single logicals. If \code{TRUE}, the
 72 |     clustering method estimated some noise, i.e., points not belonging
 73 |     to any cluster, which in the clustering vector are indicated by the
 74 |     highest number (number of clusters plus one in case that the number
 75 |     of clusters was fixed).}
 76 |   \item{othernc}{list of integer vectors of length 2. The first number is
 77 |     the number of the clustering method (the order is determined by
 78 |     argument \code{clustermethod}), the second number is the
 79 |     number of clusters for those methods that estimate the number of
 80 |     clusters themselves and estimate a number that is smaller than
 81 |     \code{min(G)} or larger than \code{max(G)}.} 
 82 | }
 83 | \references{
 84 | Hennig, C. (2017) Cluster validation by measurement of clustering
 85 |   characteristics relevant to the user. In C. H. Skiadas (ed.)
 86 |   \emph{Proceedings of ASMDA 2017}, 501-520,
 87 |     \url{https://arxiv.org/abs/1703.09282}
 88 | 
 89 | 
 90 | }
 91 | \author{Christian Hennig
 92 |   \email{christian.hennig@unibo.it}
 93 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
 94 | }
 95 | 
 96 | \seealso{
 97 |   \code{\link{clusterbenchstats}}, \code{\link{kmeansCBI}}
 98 | }
 99 | 
100 | \examples{
101 |   
102 |   set.seed(20000)
103 |   options(digits=3)
104 |   face <- rFace(10,dMoNo=2,dNoEy=0,p=2)
105 |   clustermethod=c("kmeansCBI","hclustCBI","hclustCBI")
106 | # A clustering method can be used more than once, with different
107 | # parameters
108 |   clustermethodpars <- list()
109 |   clustermethodpars[[2]] <- clustermethodpars[[3]] <- list()
110 |   clustermethodpars[[2]]$method <- "complete"
111 |   clustermethodpars[[3]]$method <- "average"
112 |   cmf <-  cluster.magazine(face,G=2:3,clustermethod=clustermethod,
113 |     distmethod=rep(FALSE,3),clustermethodpars=clustermethodpars)
114 |   print(str(cmf))
115 | 
116 | }
117 | \keyword{multivariate}% at least one, from doc/KEYWORDS
118 | \keyword{cluster}% __ONLY ONE__ keyword per line
119 | 
120 | 
121 | 
122 | 


--------------------------------------------------------------------------------
/man/cluster.varstats.Rd:
--------------------------------------------------------------------------------
  1 | \name{cluster.varstats}
  2 | \alias{cluster.varstats}
  3 | \alias{print.varwisetables}
  4 | %- Also NEED an `\alias' for EACH other topic documented here.
  5 | \title{Variablewise statistics for clusters}
  6 | \description{
  7 |   This function gives some helpful variable-wise information for cluster
  8 |   interpretation, given a clustering and a data set. The output object
  9 |   contains some tables. For categorical variables, tables compare
 10 |   clusterwise distributions with overall distributions. Continuous
 11 |   variables are categorised for this.
 12 | 
 13 |   If desired, tables, histograms, some standard statistics of
 14 |   continuous variables and validation plots as available through
 15 |   \code{\link{discrproj}} (Hennig 2004) are given out on the fly. 
 16 | }
 17 | \usage{
 18 | cluster.varstats(clustering,vardata,contdata=vardata,
 19 |                              clusterwise=TRUE,
 20 |                             tablevar=NULL,catvar=NULL,
 21 |                              quantvar=NULL, catvarcats=10,
 22 |                              proportions=FALSE,
 23 |                             projmethod="none",minsize=ncol(contdata)+2,
 24 |                           ask=TRUE,rangefactor=1)
 25 | 
 26 | \method{print}{varwisetables}(x,digits=3,...)
 27 | }
 28 | %- maybe also `usage' for other objects documented here.
 29 | \arguments{
 30 |   \item{clustering}{vector of integers. Clustering (needs to be in
 31 |     standard coding, 1,2,...).}
 32 |   \item{vardata}{data matrix or data frame of which variables are
 33 |     summarised.}
 34 |   \item{contdata}{variable matrix or data frame, normally all or some
 35 |     variables from \code{vardata}, on which cluster visualisation by
 36 |     projection methods is performed unless \code{projmethod="none"}. It
 37 |     should make sense to interpret these variables in a quantitative
 38 |     (interval-scaled) way.}
 39 |   \item{clusterwise}{logical. If \code{FALSE}, only the output tables
 40 |     are computed but no more detail and graphs are given on the fly.}
 41 |   \item{tablevar}{vector of integers. Numbers of variables treated as
 42 |     categorical (i.e., no histograms and statistics, just tables) if
 43 |     \code{clusterwise=TRUE}. Note
 44 |     that an error will be produced by factor type variables unless they
 45 |     are declared as categorical here.}
 46 |   \item{catvar}{vector of integers. Numbers of variables to be
 47 |     categorised by proportional quantiles for table computation.
 48 |     Recommended for all continuous variables.}
 49 |   \item{quantvar}{vector of integers. Variables for which means,
 50 |     standard deviations and quantiles should be given out if
 51 |     \code{clusterwise=TRUE}.}
 52 |   \item{catvarcats}{integer. Number of categories used for
 53 |     categorisation of variables specified in \code{quantvar}.}
 54 |   \item{proportions}{logical. If \code{TRUE}, output tables contain
 55 |     proportions, otherwise numbers of observations.}
 56 |   \item{projmethod}{one of \code{"none"}, \code{"dc"}, \code{"bc"},
 57 |     \code{"vbc"}, \code{"mvdc"}, \code{"adc"}, \code{"awc"} (recommended
 58 |     if not \code{"none"}), \code{"arc"}, \code{"nc"}, \code{"wnc"},
 59 |     \code{"anc"}. Cluster validation projection method introduced in
 60 |     Hennig (2004), passed on as \code{method} argument in
 61 |     \code{\link{discrproj}}.}
 62 |   \item{minsize}{integer. Projection is not carried out for clusters
 63 |     with fewer points than this. (If this is chosen smaller, it may lead
 64 |     to errors with some projection methods.)}
 65 |   \item{ask}{logical. If \code{TRUE}, \code{par(ask=TRUE)} is set in the
 66 |     beginning to prompt the user before plots and \code{par(ask=FALSE)}
 67 |     in the end.}
 68 |   \item{rangefactor}{numeric. Factor by which to multiply the range for
 69 |     projection plot ranges.}
 70 |   \item{x}{an object of class \code{"varwisetables"}, output object of
 71 |     \code{cluster.varstats}.}
 72 |   \item{digits}{integer. Number of digits after the decimal point to
 73 |     print out.}
 74 |   \item{...}{not used.}
 75 | }
 76 | 
 77 | \value{
 78 |   An object of class \code{"varwisetables"}, which is a
 79 |   list with a table for each variable, giving (categorised) marginal
 80 |   distributions by cluster.
 81 | }
 82 | 
 83 | \references{
 84 |   Hennig, C. (2004) Asymmetric linear dimension reduction for classification.
 85 |   Journal of Computational and Graphical Statistics 13, 930-945 .
 86 |   }
 87 | 
 88 | \author{Christian Hennig
 89 |   \email{christian.hennig@unibo.it}
 90 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en}}
 91 | 
 92 | \examples{
 93 |   set.seed(112233)
 94 |   options(digits=3)
 95 |   require(MASS)
 96 |   require(flexmix)
 97 |   data(Cars93)
 98 |   Cars934 <- Cars93[,c(3,5,8,10)]
 99 |   cc <-
100 |     discrete.recode(Cars934,xvarsorted=FALSE,continuous=c(2,3),discrete=c(1,4))
101 |   fcc <- flexmix(cc$data~1,k=2,
102 |   model=lcmixed(continuous=2,discrete=2,ppdim=c(6,3),diagonal=TRUE))
103 |   cv <-
104 |     cluster.varstats(fcc@cluster,Cars934, contdata=Cars934[,c(2,3)],
105 |     tablevar=c(1,4),catvar=c(2,3),quantvar=c(2,3),projmethod="awc",
106 |     ask=FALSE)
107 |   print(cv)
108 | }
109 | 
110 | \keyword{cluster}% __ONLY ONE__ keyword per line
111 | 


--------------------------------------------------------------------------------
/man/cmahal.Rd:
--------------------------------------------------------------------------------
 1 | \name{cmahal}
 2 | \alias{cmahal}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Generation of tuning constant for Mahalanobis fixed point clusters.}
 5 | \description{
 6 |   Generates tuning constants \code{ca}
 7 |   for \code{\link{fixmahal}} dependent on
 8 |   the number of points and variables of the current fixed point cluster
 9 |   (FPC).
10 |   
11 |   This is experimental and only thought for use in \code{\link{fixmahal}}.
12 | }
13 | \usage{
14 | cmahal(n, p, nmin, cmin, nc1, c1 = cmin, q = 1)
15 | }
16 | %- maybe also `usage' for other objects documented here.
17 | \arguments{
18 |   \item{n}{positive integer. Number of points.}
19 |   \item{p}{positive integer. Number of variables.}
20 |   \item{nmin}{integer larger than 1. Smallest number of points for which
21 |     \code{ca} is computed. For smaller FPC sizes, \code{ca} is set to
22 |     the value for \code{nmin}.}
23 |   \item{cmin}{positive number. Minimum value for \code{ca}.}
24 |   \item{nc1}{positive integer. Number of points at which \code{ca=c1}.}
25 |   \item{c1}{positive numeric. Tuning constant for \code{cmahal}.
26 |     Value for \code{ca} for FPC size equal to \code{nc1}.}
27 |   \item{q}{numeric between 0 and 1. 1 for steepest possible descent of
28 |     \code{ca} as function of the FPC size. Should presumably always be 1.}
29 | }
30 | \details{
31 |   Some experiments suggest that the tuning constant \code{ca} should
32 |   decrease with increasing FPC size and increase with increasing
33 |   \code{p} in \code{\link{fixmahal}}. This is to prevent too small
34 |   meaningless FPCs while maintaining the significant larger
35 |   ones. \code{cmahal} with \code{q=1} computes \code{ca} in such a way
36 |   that as long as \code{ca>cmin}, the decrease in \code{n} is as steep
37 |   as possible in order to maintain the validity of the convergence
38 |   theorem in Hennig and Christlieb (2002).
39 | }
40 | \value{
41 |   A numeric vector of length \code{n}, giving the values for \code{ca}
42 |   for all FPC sizes smaller or equal to \code{n}.
43 | }
44 | \references{
45 |   Hennig, C. and Christlieb, N. (2002) Validating visual clusters in
46 |   large datasets: Fixed point clusters of spectral features,
47 |   \emph{Computational Statistics and Data Analysis} 40, 723-739.
48 | }
49 | \author{Christian Hennig
50 |   \email{christian.hennig@unibo.it}
51 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
52 | 
53 | \seealso{\code{\link{fixmahal}}}
54 | 
55 | \examples{
56 |   plot(1:100,cmahal(100,3,nmin=5,cmin=qchisq(0.99,3),nc1=90),
57 |        xlab="FPC size", ylab="cmahal")
58 | }
59 | \keyword{cluster}% at least one, from doc/KEYWORDS
60 | 
61 | 


--------------------------------------------------------------------------------
/man/concomp.Rd:
--------------------------------------------------------------------------------
 1 | \name{con.comp}
 2 | \alias{con.comp}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Connectivity components of an undirected graph}
 5 | \description{
 6 |   Computes the connectivity components of an undirected graph from a
 7 |   matrix giving the edges.
 8 | }
 9 | \usage{
10 | con.comp(comat)
11 | }
12 | %- maybe also `usage' for other objects documented here.
13 | \arguments{
14 |   \item{comat}{a symmetric logical or 0-1 matrix, where \code{comat[i,j]=TRUE}
15 |     means that there is an edge between vertices \code{i} and
16 |     \code{j}. The diagonal is ignored.}
17 | }
18 | \details{
19 |   The "depth-first search" algorithm of Cormen, Leiserson and Rivest
20 |   (1990, p. 477) is used.
21 | }
22 | \value{
23 |   An integer vector, giving the number of the connectivity component for
24 |   each vertice.
25 | }
26 | \references{
27 |   Cormen, T. H., Leiserson, C. E. and Rivest, R. L. (1990), \emph{Introduction
28 |     to Algorithms}, Cambridge: MIT Press.
29 | }
30 | 
31 | \author{Christian Hennig
32 |   \email{christian.hennig@unibo.it}
33 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
34 | }
35 | 
36 | \seealso{
37 |   \code{\link{hclust}}, \code{\link{cutree}} for cutted single linkage
38 |   trees (often equivalent).
39 | }
40 | 
41 | \examples{
42 |   set.seed(1000)
43 |   x <- rnorm(20)
44 |   m <- matrix(0,nrow=20,ncol=20)
45 |   for(i in 1:20)
46 |     for(j in 1:20)
47 |       m[i,j] <- abs(x[i]-x[j])
48 |   d <- m<0.2
49 |   cc <- con.comp(d)
50 |   max(cc) # number of connectivity components
51 |   plot(x,cc)
52 |   # The same should be produced by
53 |   # cutree(hclust(as.dist(m),method="single"),h=0.2).
54 | }
55 | \keyword{array}% at least one, from doc/KEYWORDS
56 | \keyword{cluster}% __ONLY ONE__ keyword per line
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/man/confusion.Rd:
--------------------------------------------------------------------------------
 1 | \name{confusion}
 2 | \alias{confusion}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Misclassification probabilities in mixtures}
 5 | \description{
 6 |    Estimates a misclassification probability in a mixture distribution
 7 |    between two mixture components from estimated posterior probabilities
 8 |    regardless of component parameters, see Hennig (2010).
 9 | }
10 | \usage{
11 | confusion(z,pro,i,j,adjustprobs=FALSE)
12 | }
13 | %- maybe also `usage' for other objects documented here.
14 | \arguments{
15 |   \item{z}{matrix of posterior probabilities for observations (rows) to
16 |     belong to mixture components (columns), so entries need to sum up to
17 |     1 for each row.}
18 |   \item{pro}{vector of component proportions, need to sum up to 1.}
19 |   \item{i}{integer. Component number.}
20 |   \item{j}{integer. Component number.}
21 |   \item{adjustprobs}{logical. If \code{TRUE}, probabilities are
22 |     initially standardised so that those for components \code{i} and
23 |     \code{j} add up to one (i.e., if they were the only components).}
24 | }
25 | 
26 | \value{
27 |   Estimated probability that an observation generated by component
28 |   \code{j} is classified to component \code{i} by maximum a posteriori rule. 
29 | }
30 | 
31 | \references{
32 |   Hennig, C. (2010) Methods for merging Gaussian mixture components,
33 |   \emph{Advances in Data Analysis and Classification}, 4, 3-34.
34 |   }
35 | \author{Christian Hennig
36 |   \email{christian.hennig@unibo.it}
37 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
38 | }
39 | \examples{
40 |   set.seed(12345)
41 |   m <- rpois(20,lambda=5)
42 |   dim(m) <- c(5,4)
43 |   pro <- apply(m,2,sum)
44 |   pro <- pro/sum(pro)
45 |   m <- m/apply(m,1,sum)
46 |   round(confusion(m,pro,1,2),digits=2)
47 | }
48 | \keyword{cluster}% at least one, from doc/KEYWORDS
49 | \keyword{multivariate}
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/man/cov.wml.Rd:
--------------------------------------------------------------------------------
 1 | \name{cov.wml}
 2 | \alias{cov.wml}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Weighted Covariance Matrices (Maximum Likelihood)}
 5 | \description{
 6 |      Returns a list containing estimates of the weighted covariance
 7 |      matrix and the mean of the data, and optionally of the (weighted)
 8 |      correlation matrix. The
 9 |      covariance matrix is divided by the sum of the weights,
10 |      corresponding to \code{n} and the ML-estimator in the case of equal
11 |      weights, as opposed to \code{n-1} for \code{\link{cov.wt}}. 
12 | }
13 | \usage{
14 | cov.wml(x, wt = rep(1/nrow(x), nrow(x)), cor = FALSE, center = TRUE)
15 | }
16 | %- maybe also `usage' for other objects documented here.
17 | \arguments{
18 |   \item{x}{a matrix or data frame.  As usual, rows are observations and
19 |           columns are variables.}
20 |   \item{wt}{a non-negative and non-zero vector of weights for each
21 |           observation.  Its length must equal the number of rows of
22 |           \code{x}.}
23 |   \item{cor}{A logical indicating whether the estimated correlation
24 |           weighted matrix will be returned as well.}
25 |   \item{center}{Either a logical or a numeric vector specifying the centers
26 |           to be used when computing covariances.  If \code{TRUE}, the
27 |           (weighted) mean of each variable is used, if `\code{FALSE}, zero is
28 |           used.  If \code{center} is numeric, its length must equal the
29 |           number of columns of \code{x}.}
30 | }
31 | \value{
32 |   A list containing the following named components:
33 |   \item{cov}{the estimated (weighted) covariance matrix.}
34 |   \item{center}{an estimate for the center (mean) of the data.}
35 |   \item{n.obs}{the number of observations (rows) in \code{x}.}
36 |   \item{wt}{the weights used in the estimation.  Only returned if given
37 |     as an argument.}
38 |   \item{cor}{the estimated correlation matrix.  Only returned if `cor' is
39 |           `TRUE'.}
40 | }
41 | \author{Christian Hennig
42 |   \email{christian.hennig@unibo.it}
43 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
44 | 
45 | \seealso{\code{\link{cov.wt}}, \code{\link{cov}}, \code{\link{var}}}
46 | 
47 | \examples{
48 |   x <- c(1,2,3,4,5,6,7,8,9,10)
49 |   y <- c(1,2,3,8,7,6,5,8,9,10)
50 |   cov.wml(cbind(x,y),wt=c(0,0,0,1,1,1,1,1,0,0))
51 |   cov.wt(cbind(x,y),wt=c(0,0,0,1,1,1,1,1,0,0))
52 | }
53 | \keyword{multivariate}% at least one, from doc/KEYWORDS
54 | 


--------------------------------------------------------------------------------
/man/cvnn.Rd:
--------------------------------------------------------------------------------
 1 | \name{cvnn}
 2 | \alias{cvnn}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Cluster validation based on nearest neighbours}
 5 | \description{
 6 |   Cluster validity index based on nearest neighbours as defined in Liu
 7 |   et al. (2013) with a correction explained in Halkidi et al. (2015).
 8 | }
 9 | \usage{
10 | cvnn(d=NULL,clusterings,k=5)
11 | }
12 | %- maybe also `usage' for other objects documented here.
13 | \arguments{
14 |   \item{d}{dissimilarity matrix or \code{dist}-object.}
15 |   \item{clusterings}{list of vectors of integers with length \code{=nrow(d)};
16 |   indicating the cluster for each observation for several clusterings
17 |   (list elements) to be compared.}
18 |   \item{k}{integer. Number of nearest neighbours.}
19 | }
20 | 
21 | \value{
22 |   List with components (see Liu et al. (2013), Halkidi et al. (2015) for
23 |   details)
24 |   \item{cvnnindex}{vector of index values for the various clusterings,
25 |     see Liu et al. (2013), the lower the better.}
26 |   \item{sep}{vector of separation values.}
27 |   \item{comp}{vector of compactness values.}
28 | }
29 | 
30 | \references{
31 |   Halkidi, M., Vazirgiannis, M. and Hennig, C. (2015) Method-independent
32 |     indices for cluster validation. In C. Hennig, M. Meila, F. Murtagh,
33 |     R. Rocci (eds.) \emph{Handbook of Cluster Analysis}, CRC
34 |     Press/Taylor \code{&} Francis, Boca Raton.
35 | 
36 |   Liu, Y, Li, Z., Xiong, H., Gao, X., Wu, J. and Wu, S. (2013)
37 |   Understanding and enhancement of internal clustering validation
38 |   measures. \emph{IEEE Transactions on Cybernetics} 43, 982-994.
39 |   
40 | }
41 | 
42 | \author{Christian Hennig
43 |   \email{christian.hennig@unibo.it}
44 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
45 | }
46 | \examples{
47 |   options(digits=3)
48 |   iriss <- as.matrix(iris[c(1:10,51:55,101:105),-5])
49 |   irisc <- as.numeric(iris[c(1:10,51:55,101:105),5])
50 |   print(cvnn(dist(iriss),list(irisc,rep(1:4,5))))
51 | }
52 | \keyword{cluster}% at least one, from doc/KEYWORDS
53 | 
54 | 
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/man/cweight.Rd:
--------------------------------------------------------------------------------
 1 | \name{cweight}
 2 | \alias{cweight}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Weight function for AWC}
 5 | \description{
 6 |   For use in \code{awcoord} only.
 7 | }
 8 | \usage{
 9 | cweight(x, ca) 
10 | 
11 | }
12 | %- maybe also `usage' for other objects documented here.
13 | \arguments{
14 |   \item{x}{numerical.}
15 |   \item{ca}{numerical.}
16 | }
17 | % \details{
18 | % }
19 | \value{
20 |   \code{ca/x} if smaller than 1, else 1.
21 | }
22 | 
23 | \author{Christian Hennig
24 |   \email{christian.hennig@unibo.it}
25 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
26 | }
27 | 
28 | \seealso{
29 |   \code{\link{awcoord}}
30 | }
31 | 
32 | \examples{
33 |   cweight(4,1)
34 | }
35 | \keyword{arith}% at least one, from doc/KEYWORDS
36 | 
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/man/dbscan.Rd:
--------------------------------------------------------------------------------
  1 | \name{dbscan}
  2 | \alias{dbscan}
  3 | \alias{print.dbscan}
  4 | \alias{plot.dbscan}
  5 | \alias{predict.dbscan}
  6 | \title{DBSCAN density reachability and connectivity clustering}
  7 | \description{
  8 |   Generates a density based clustering of arbitrary shape as introduced
  9 |   in Ester et al. (1996).
 10 | }
 11 | \usage{
 12 |   dbscan(data, eps, MinPts = 5, scale = FALSE, method = c("hybrid", "raw",
 13 |     "dist"), seeds = TRUE, showplot = FALSE, countmode = NULL)
 14 |   \method{print}{dbscan}(x, ...)
 15 |   \method{plot}{dbscan}(x, data, ...)
 16 |   \method{predict}{dbscan}(object, data, newdata = NULL,
 17 | predict.max=1000, ...)
 18 | }
 19 | \arguments{
 20 |   \item{data}{data matrix, data.frame, dissimilarity matrix or
 21 |     \code{dist}-object. Specify \code{method="dist"} if the data should
 22 |     be interpreted as dissimilarity matrix or object. Otherwise
 23 |     Euclidean distances will be used.}
 24 |   \item{eps}{ Reachability distance, see Ester et al. (1996). }
 25 |   \item{MinPts}{ Reachability minimum no. of points, see Ester et al. (1996). }
 26 |   \item{scale}{ scale the data if \code{TRUE}. }
 27 |   \item{method}{ "dist" treats data as distance matrix (relatively fast
 28 |     but memory expensive), "raw" treats data as raw data and avoids
 29 |     calculating a distance matrix (saves memory but may be slow),
 30 |     "hybrid" expects also raw data, but calculates partial distance
 31 |     matrices (very fast with moderate memory requirements).}
 32 |   \item{seeds}{FALSE to not include the \code{isseed}-vector in the
 33 |     \code{dbscan}-object.}
 34 |   \item{showplot}{ 0 = no plot, 1 = plot per iteration, 2 = plot per
 35 |     subiteration. }
 36 |   \item{countmode}{ NULL or vector of point numbers at which to report
 37 |     progress. }
 38 |   \item{x}{object of class \code{dbscan}.}
 39 |   \item{object}{object of class \code{dbscan}.}
 40 |   \item{newdata}{ matrix or data.frame with raw data to predict. }
 41 |   \item{predict.max}{ max. batch size for predictions. } 
 42 |   \item{...}{Further arguments transferred to plot methods.}
 43 | }
 44 | \details{
 45 |   Clusters require a minimum no of points (MinPts) within a maximum distance
 46 |   (eps) around one of its members (the seed).
 47 |   Any point within eps around any point which satisfies the seed condition
 48 |   is a cluster member (recursively).
 49 |   Some points may not belong to any clusters (noise).
 50 |   
 51 |   We have clustered a 100.000 x 2 dataset in 40 minutes on a Pentium M 1600
 52 |   MHz.
 53 | 
 54 |   \code{print.dbscan} shows a statistic of the number of points
 55 |   belonging to the clusters that are seeds and border points.
 56 | 
 57 |   \code{plot.dbscan} distinguishes between seed and border points by
 58 |   plot symbol.
 59 | 
 60 | }
 61 | \value{
 62 |   \code{predict.dbscan} gives out a vector of predicted clusters for the
 63 |   points in \code{newdata}.
 64 |   
 65 |   \code{dbscan} gives out 
 66 |   an object of class 'dbscan' which is a LIST with components
 67 |   \item{cluster}{integer vector coding cluster membership with noise
 68 |     observations (singletons) coded as 0 }
 69 |   \item{isseed}{logical vector indicating whether a point is a seed (not
 70 |     border, not noise)}
 71 |   \item{eps}{parameter eps}
 72 |   \item{MinPts}{parameter MinPts}
 73 | }
 74 | \references{ Martin Ester, Hans-Peter Kriegel, Joerg Sander, Xiaowei Xu
 75 | (1996). A Density-Based Algorithm for Discovering Clusters in Large Spatial
 76 | Databases with Noise. Institute for Computer Science, University of Munich.
 77 | Proceedings of 2nd International Conference on Knowledge Discovery and Data
 78 | Mining (KDD-96). }
 79 | \author{Jens Oehlschlaegel, based on a draft by Christian Hennig.}
 80 | \note{ this is a simplified version of the original algorithm (no K-D-trees
 81 |   used), thus we have \eqn{o(n^2)} instead of \eqn{o(n*log(n))} }
 82 | \examples{
 83 |   set.seed(665544)
 84 |   n <- 600
 85 |   x <- cbind(runif(10, 0, 10)+rnorm(n, sd=0.2), runif(10, 0, 10)+rnorm(n,
 86 |     sd=0.2))
 87 |   par(bg="grey40")
 88 |   ds <- dbscan(x, 0.2)
 89 | # run with showplot=1 to see how dbscan works.
 90 |   ds
 91 |   plot(ds, x)
 92 | 
 93 |   x2 <- matrix(0,nrow=4,ncol=2)
 94 |   x2[1,] <- c(5,2)
 95 |   x2[2,] <- c(8,3)
 96 |   x2[3,] <- c(4,4)
 97 |   x2[4,] <- c(9,9)
 98 |   predict(ds, x, x2)
 99 | 
100 |   n <- 600
101 |   x <- cbind((1:3)+rnorm(n, sd=0.2), (1:3)+rnorm(n, sd=0.2))
102 | 
103 | # Not run, but results from my machine are 0.105 - 0.068 - 0.255:
104 | #  system.time(ds <- dbscan(x, 0.3, countmode=NULL, method="raw"))[3] 
105 | #  system.time(dsb <- dbscan(x, 0.3, countmode=NULL, method="hybrid"))[3]
106 | #  system.time(dsc <- dbscan(dist(x), 0.3, countmode=NULL,
107 | #    method="dist"))[3]
108 | }
109 | \keyword{multivariate}
110 | \keyword{cluster}
111 | 
112 | 


--------------------------------------------------------------------------------
/man/dipp.tantrum.Rd:
--------------------------------------------------------------------------------
 1 | \name{dipp.tantrum}
 2 | \alias{dipp.tantrum}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Simulates p-value for dip test}
 5 | \description{
 6 |   Simulates p-value for dip test (see \code{\link[diptest]{dip}})
 7 |   in the way suggested by Tantrum, Murua and Stuetzle (2003) from the
 8 |   clostest unimodal distribution determined by kernel density estimation
 9 |   with bandwith chosen so that the density just becomes unimodal. This is
10 |   less conservative (and in fact sometimes anti-conservative) than the
11 |   values from \code{\link[diptest]{dip.test}}.
12 | }
13 | \usage{
14 |   dipp.tantrum(xdata,d,M=100)
15 | }
16 | %- maybe also `usage' for other objects documented here.
17 | \arguments{
18 |   \item{xdata}{numeric vector. One-dimensional dataset.}
19 |   \item{d}{numeric. Value of dip statistic.}
20 |   \item{M}{integer. Number of artificial datasets generated in order to
21 |     estimate the p-value.}
22 | }
23 | 
24 | \value{
25 |   List with components
26 |   \item{p.value}{approximated p-value.}
27 |   \item{bw}{borderline unimodality bandwith in \code{\link{density}}
28 |     with default settings.}
29 |   \item{dv}{vector of dip statistic values from simulated artificial data.}
30 | }
31 | 
32 | \references{
33 |      J. A. Hartigan and P. M. Hartigan (1985) The Dip Test of
34 |      Unimodality, \emph{Annals of Statistics}, 13, 70-84.
35 | 
36 |      Tantrum, J., Murua, A. and Stuetzle, W. (2003) Assessment and 
37 | Pruning of Hierarchical Model Based Clustering, \emph{Proceedings of the 
38 | ninth ACM SIGKDD international conference on Knowledge discovery and 
39 | data mining}, Washington, D.C., 197-205.
40 | }
41 | \author{Christian Hennig
42 |   \email{christian.hennig@unibo.it}
43 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
44 | }
45 | \examples{
46 | # not run, requires package diptest
47 | #  x <- runif(100)
48 | #  d <- dip(x)
49 | #  dt <- dipp.tantrum(x,d,M=10)
50 | }
51 | \keyword{cluster}% at least one, from doc/KEYWORDS
52 | % \keyword{multivariate}
53 | 
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/man/diptest.multi.Rd:
--------------------------------------------------------------------------------
 1 | \name{diptest.multi}
 2 | \alias{diptest.multi}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Diptest for discriminant coordinate projection}
 5 | \description{
 6 |   Diptest (Hartigan and Hartigan, 1985, see \code{\link[diptest]{dip}})
 7 |   for data projected in discriminant coordinate separating optimally two
 8 |   class means (see \code{discrcoord}) as suggested by Tantrum, Murua and
 9 |   Stuetzle (2003).
10 | }
11 | \usage{
12 |   diptest.multi(xdata,class,pvalue="uniform",M=100)
13 | }
14 | %- maybe also `usage' for other objects documented here.
15 | \arguments{
16 |   \item{xdata}{matrix. Potentially multidimensional dataset.}
17 |   \item{class}{vector of integers giving class numbers for observations.}
18 |   \item{pvalue}{\code{"uniform"} or \code{"tantrum"}. Defines whether
19 |     the p-value is computed from a uniform null model as suggested in
20 |     Hartigan and Hartigan (1985, using \code{\link[diptest]{dip.test}}) or as
21 |     suggested in Tantrum et al. (2003, using \code{dipp.tantrum}).}
22 |   \item{M}{integer. Number of artificial datasets generated in order to
23 |     estimate the p-value if \code{pvalue="tantrum"}.}
24 | }
25 | 
26 | \value{
27 |   The resulting p-value.
28 | }
29 | 
30 | \references{
31 |      J. A. Hartigan and P. M. Hartigan (1985) The Dip Test of
32 |      Unimodality, \emph{Annals of Statistics}, 13, 70-84.
33 | 
34 |      Tantrum, J., Murua, A. and Stuetzle, W. (2003) Assessment and 
35 | Pruning of Hierarchical Model Based Clustering, \emph{Proceedings of the 
36 | ninth ACM SIGKDD international conference on Knowledge discovery and 
37 | data mining}, Washington, D.C., 197-205.
38 | }
39 | \author{Christian Hennig
40 |   \email{christian.hennig@unibo.it}
41 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
42 | }
43 | \examples{
44 |   require(diptest)
45 |   x <- cbind(runif(100),runif(100))
46 |   partition <- 1+(x[,1]<0.5)
47 |   d1 <- diptest.multi(x,partition)
48 |   d2 <- diptest.multi(x,partition,pvalue="tantrum",M=10)
49 | }
50 | \keyword{cluster}% at least one, from doc/KEYWORDS
51 | \keyword{multivariate}
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/man/discrcoord.Rd:
--------------------------------------------------------------------------------
 1 | \name{discrcoord}
 2 | \alias{discrcoord}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Discriminant coordinates/canonical variates}
 5 | \description{
 6 |   Computes discriminant coordinates, sometimes referred to as "canonical
 7 |   variates" as described in Seber (1984).
 8 | }
 9 | \usage{
10 | discrcoord(xd, clvecd, pool = "n", ...)
11 | }
12 | %- maybe also `usage' for other objects documented here.
13 | \arguments{
14 |   \item{xd}{the data matrix; a numerical object which can be coerced
15 |     to a matrix.}
16 |   \item{clvecd}{integer vector of class numbers; length must equal
17 |     \code{nrow(xd)}.}
18 |   \item{pool}{string. Determines how the within classes
19 |     covariance is pooled. "n" means that the class covariances are
20 |     weighted corresponding to the number of points in each class
21 |     (default). "equal" means that all classes get equal weight.}
22 |   \item{...}{no effect}
23 | }
24 | \details{
25 |   The matrix T (see Seber (1984), p. 270) is inverted by use of
26 |   \code{\link{tdecomp}}, which can be expected to give
27 |   reasonable results for singular within-class covariance matrices.
28 | }
29 | \value{
30 |   List with the following components
31 |   \item{ev}{eigenvalues in descending order.}
32 |   \item{units}{columns are coordinates of projection basis vectors.
33 |   New points \code{x} can be projected onto the projection basis vectors
34 |   by \code{x \%*\% units}}
35 |   \item{proj}{projections of \code{xd} onto \code{units}.} 
36 | }
37 | \references{
38 |   Seber, G. A. F. (1984). \emph{Multivariate Observations}. New York: Wiley.
39 | }
40 | \author{Christian Hennig
41 |   \email{christian.hennig@unibo.it}
42 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
43 | }
44 | 
45 | \seealso{
46 |   \code{\link{plotcluster}} for straight forward discriminant plots.
47 |   
48 |   \code{\link{batcoord}} for discriminating projections for two classes,
49 |   so that also the differences in variance are shown (\code{discrcoord} is
50 |   based only on differences in mean).
51 | 
52 |   \code{\link{rFace}} for generation of the example data used below.
53 | }
54 | 
55 | \examples{
56 |   set.seed(4634)
57 |   face <- rFace(600,dMoNo=2,dNoEy=0)
58 |   grface <- as.integer(attr(face,"grouping"))
59 |   dcf <- discrcoord(face,grface)
60 |   plot(dcf$proj,col=grface)
61 |   # ...done in one step by function plotcluster.
62 | }
63 | \keyword{multivariate}% at least one, from doc/KEYWORDS
64 | \keyword{classif}% __ONLY ONE__ keyword per line
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/man/discrete.recode.Rd:
--------------------------------------------------------------------------------
 1 | \name{discrete.recode}
 2 | \alias{discrete.recode}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Recodes mixed variables dataset}
 5 | \description{
 6 |   Recodes a dataset with mixed continuous and categorical variables so
 7 |   that the continuous variables come first and the categorical variables
 8 |   have standard coding 1, 2, 3,... (in lexicographical ordering of
 9 |   values coerced to strings).
10 | }
11 | \usage{
12 |   discrete.recode(x,xvarsorted=TRUE,continuous=0,discrete)
13 | }
14 | %- maybe also `usage' for other objects documented here.
15 | \arguments{
16 |   \item{x}{data matrix or data frame (not a tibble).
17 |     The data need to be organised
18 |     case-wise, i.e., if there are categorical variables only, and 15
19 |     cases with values c(1,1,2) on the 3 variables, the data matrix needs
20 |     15 rows with values 1 1 2. (Categorical variables could take numbers
21 |     or strings or anything that can be coerced to factor levels as values.)}
22 |   \item{xvarsorted}{logical. If \code{TRUE}, the continuous variables
23 |     are assumed to be the first ones, and the categorical variables to
24 |     be behind them.}
25 |   \item{continuous}{vector of integers giving positions of the
26 |     continuous variables. If \code{xvarsorted=TRUE}, a single integer,
27 |     number of continuous variables.}
28 |   \item{discrete}{vector of integers giving positions of the
29 |     categorical variables (the variables need to be coded in such a way that
30 |     \code{\link{data.matrix}} converts them to something numeric). If
31 |     \code{xvarsorted=TRUE}, a single integer, number of categorical variables.}
32 | }
33 | 
34 | \value{
35 |   A list with components
36 |   \item{data}{data matrix with continuous variables first and
37 |   categorical variables in standard coding behind them.}
38 |   \item{ppdim}{vector of categorical variable-wise numbers of
39 |     categories.}
40 |   \item{discretelevels}{list of levels of the categorical variables
41 |     belonging to what is treated by \code{flexmixedruns} as category
42 |     1, 2, 3 etc.} 
43 |   \item{continuous}{number of continuous variables.}
44 |   \item{discrete}{number of categorical variables.}
45 | }
46 | 
47 | \author{Christian Hennig
48 |   \email{christian.hennig@unibo.it}
49 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en}}
50 | 
51 | \seealso{\code{\link{lcmixed}}}
52 | 
53 | \examples{
54 |   set.seed(776655)
55 |   v1 <- rnorm(20)
56 |   v2 <- rnorm(20)
57 |   d1 <- sample(c(2,4,6,8),20,replace=TRUE)
58 |   d2 <- sample(1:4,20,replace=TRUE)
59 |   ldata <- cbind(v1,d1,v2,d2)
60 |   lc <-
61 |   discrete.recode(ldata,xvarsorted=FALSE,continuous=c(1,3),discrete=c(2,4))
62 |   require(MASS)
63 |   data(Cars93)
64 |   Cars934 <- Cars93[,c(3,5,8,10)]
65 |   cc <- discrete.recode(Cars934,xvarsorted=FALSE,continuous=c(2,3),discrete=c(1,4))
66 | }
67 | 
68 | \keyword{manip}% __ONLY ONE__ keyword per line
69 | 


--------------------------------------------------------------------------------
/man/discrproj.Rd:
--------------------------------------------------------------------------------
  1 | \name{discrproj}
  2 | \alias{discrproj}
  3 | %- Also NEED an `\alias' for EACH other topic documented here.
  4 | \title{Linear dimension reduction for classification}
  5 | \description{
  6 |    An interface for ten methods of linear dimension reduction in order
  7 |    to separate the groups optimally in the projected data. Includes
  8 |    classical discriminant coordinates, methods to project differences in
  9 |    mean and covariance structure, asymmetric methods (separation of a
 10 |    homogeneous class from a heterogeneous one), local neighborhood-based
 11 |    methods and methods based on robust covariance matrices.
 12 | }
 13 | \usage{
 14 |  discrproj(x, clvecd, method="dc", clnum=NULL, ignorepoints=FALSE,
 15 |            ignorenum=0, ...)
 16 | }
 17 | %- maybe also `usage' for other objects documented here.
 18 | \arguments{
 19 |   \item{x}{the data matrix; a numerical object which can be coerced
 20 |     to a matrix.}
 21 |   \item{clvecd}{vector of class numbers which can be coerced into
 22 |     integers; length must equal
 23 |     \code{nrow(xd)}.}
 24 |   \item{method}{one of
 25 |     \describe{
 26 |       \item{"dc"}{usual discriminant coordinates, see \code{\link{discrcoord}},}
 27 |       \item{"bc"}{Bhattacharyya coordinates, first coordinate showing
 28 | 	mean differences, second showing covariance matrix differences,
 29 | 	see \code{\link{batcoord}},}
 30 |       \item{"vbc"}{variance dominated Bhattacharyya coordinates,
 31 | 	see \code{\link{batcoord}},}
 32 |       \item{"mvdc"}{added meana and variance differences optimizing
 33 | 	coordinates, see \code{\link{mvdcoord}},}
 34 |       \item{"adc"}{asymmetric discriminant coordinates, see
 35 | 	\code{\link{adcoord}},}
 36 |       \item{"awc"}{asymmetric discriminant coordinates with weighted
 37 | 	observations, see \code{\link{awcoord}},}
 38 |       \item{"arc"}{asymmetric discriminant coordinates with weighted
 39 | 	observations and robust MCD-covariance matrix,
 40 | 	see \code{\link{awcoord}},}
 41 |       \item{"nc"}{neighborhood based coordinates,
 42 | 	see \code{\link{ncoord}},}
 43 |       \item{"wnc"}{neighborhood based coordinates with weighted neighborhoods,
 44 | 	see \code{\link{ncoord}},}
 45 |       \item{"anc"}{asymmetric neighborhood based coordinates,
 46 | 	see \code{\link{ancoord}}.}
 47 |     }
 48 |     Note that "bc", "vbc", "adc", "awc", "arc" and "anc" assume that
 49 |     there are only two classes.}
 50 |   \item{clnum}{integer. Number of the class which is attempted to plot
 51 |     homogeneously by "asymmetric methods", which are the methods
 52 |     assuming that there are only two classes, as indicated above.} 
 53 |   \item{ignorepoints}{logical. If \code{TRUE}, points with label
 54 |     \code{ignorenum} in \code{clvecd} are ignored in the computation for
 55 |     \code{method} and are only projected afterwards onto the resulting
 56 |     units. If \code{pch=NULL}, the plot symbol for these points is "N".}
 57 |   \item{ignorenum}{one of the potential values of the components of
 58 |     \code{clvecd}. Only has effect if \code{ignorepoints=TRUE}, see above.}
 59 |   \item{...}{additional parameters passed to the
 60 |     projection methods.}
 61 | }
 62 | % \details{
 63 | % }
 64 | 
 65 | \value{
 66 |   \code{discrproj} returns the output of the chosen projection method,
 67 |   which is a list with at least the components \code{ev, units, proj}.
 68 |   For detailed informations see the help pages of the projection methods.
 69 |   \item{ev}{eigenvalues in descending order, usually indicating portion
 70 |     of information in the corresponding direction.}
 71 |   \item{units}{columns are coordinates of projection basis vectors.
 72 |   New points \code{x} can be projected onto the projection basis vectors
 73 |   by \code{x \%*\% units}}
 74 |   \item{proj}{projections of \code{xd} onto \code{units}.} 
 75 | }
 76 |   
 77 | \references{
 78 |   Hennig, C. (2004) Asymmetric linear dimension reduction for classification.
 79 |   Journal of Computational and Graphical Statistics 13, 930-945 .
 80 |   
 81 |   Hennig, C. (2005)  A method for visual cluster validation.  In:
 82 |   Weihs, C. and Gaul, W. (eds.): Classification - The Ubiquitous
 83 |   Challenge. Springer, Heidelberg 2005, 153-160.
 84 |   
 85 |   Seber, G. A. F. (1984). \emph{Multivariate Observations}. New York: Wiley.
 86 | 
 87 |   Fukunaga (1990). \emph{Introduction to Statistical Pattern
 88 |   Recognition} (2nd ed.). Boston: Academic Press.
 89 | }
 90 | \author{Christian Hennig
 91 |   \email{christian.hennig@unibo.it}
 92 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
 93 | 
 94 | \seealso{
 95 |   \code{\link{discrcoord}}, \code{\link{batcoord}},
 96 |   \code{\link{mvdcoord}}, \code{\link{adcoord}},
 97 |   \code{\link{awcoord}}, \code{\link{ncoord}},
 98 |   \code{\link{ancoord}}.
 99 | 
100 |   \code{\link{rFace}} for generation of the example data used below.
101 | }
102 | 
103 | \examples{
104 | set.seed(4634)
105 | face <- rFace(300,dMoNo=2,dNoEy=0,p=3)
106 | grface <- as.integer(attr(face,"grouping"))
107 | 
108 | # The abs in the following is there to unify the output,
109 | # because eigenvectors are defined only up to their sign.
110 | # Statistically it doesn't make sense to compute absolute values. 
111 | round(abs(discrproj(face,grface, method="nc")$units),digits=2)
112 | round(abs(discrproj(face,grface, method="wnc")$units),digits=2)
113 | round(abs(discrproj(face,grface, clnum=1, method="arc")$units),digits=2)
114 | }
115 | \keyword{multivariate}% at least one, from doc/KEYWORDS
116 | \keyword{classif}% __ONLY ONE__ keyword per line
117 | 
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/man/distancefactor.Rd:
--------------------------------------------------------------------------------
 1 | \name{distancefactor}
 2 | \alias{distancefactor}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Factor for dissimilarity of mixed type data}
 5 | \description{
 6 |   Computes a factor that can be used to standardise ordinal categorical
 7 |   variables and binary dummy variables coding categories of nominal scaled
 8 |   variables for Euclidean
 9 |   dissimilarity computation in mixed type data. See Hennig and Liao (2013).
10 | }
11 | \usage{
12 | distancefactor(cat,n=NULL, catsizes=NULL,type="categorical",
13 |                normfactor=2,qfactor=ifelse(type=="categorical",1/2,
14 |                              1/(1+1/(cat-1))))
15 | 
16 | }
17 | %- maybe also `usage' for other objects documented here.
18 | \arguments{
19 |   \item{cat}{integer. Number of categories of the variable to be standardised.
20 |     Note that for \code{type="categorical"} the number of categories of
21 |     the original variable is required, although the
22 |     \code{distancefactor} is used to standardise dummy
23 |     variables for the categories.}
24 |   \item{n}{integer. Number of data points.}
25 |   \item{catsizes}{vector of integers giving numbers of observations per
26 |     category. One of \code{n} and \code{catsizes} must be supplied. If
27 |     \code{catsizes=NULL}, \code{rep(round(n/cat),cat)} is used (this may
28 |     be appropriate as well if numbers of observations of categories are
29 |     unequal, if the researcher decides that the dissimilarity measure
30 |     should not be influenced by empirical category sizes.} 
31 |    \item{type}{\code{"categorical"} if the factor is used for dummy
32 |      variables belonging to a nominal variable, \code{"ordinal"} if the
33 |      factor is used for an ordinal variable ind standard Likert coding.}
34 |    \item{normfactor}{numeric. Factor on which standardisation is based.
35 |      As a default, this is \code{E(X_1-X_2)^2=2} for independent unit
36 |      variance variables.}
37 |    \item{qfactor}{numeric. Factor q in Hennig and Liao (2013) to
38 |      adjust for clumping effects due to discreteness.}
39 | }
40 | 
41 | \value{
42 |   A factor by which to multiply the variable in order to make it
43 |   comparable to a unit variance continuous variable when aggregated in
44 |   Euclidean fashion for dissimilarity computation, so that expected
45 |   effective difference between two realisations of the variable equals
46 |   \code{qfactor*normfactor}. 
47 | }
48 | 
49 | 
50 | \references{
51 |   Hennig, C. and Liao, T. (2013) How to find an appropriate clustering
52 |   for mixed-type variables with application to socio-economic
53 |   stratification, \emph{Journal of the Royal Statistical Society, Series
54 |   C Applied Statistics}, 62, 309-369.
55 | 
56 | 
57 |   }
58 | 
59 | \author{Christian Hennig
60 |   \email{christian.hennig@unibo.it}
61 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en}}
62 | 
63 | \seealso{\code{\link{lcmixed}}, \code{\link[cluster]{pam}}}
64 | 
65 | \examples{
66 |   set.seed(776655)
67 |   d1 <- sample(1:5,20,replace=TRUE)
68 |   d2 <- sample(1:4,20,replace=TRUE)
69 |   ldata <- cbind(d1,d2)
70 |   lc <- cat2bin(ldata,categorical=1)$data
71 |   lc[,1:5] <- lc[,1:5]*distancefactor(5,20,type="categorical")
72 |   lc[,6] <- lc[,6]*distancefactor(4,20,type="ordinal")
73 | }
74 | 
75 | \keyword{cluster}% __ONLY ONE__ keyword per line
76 | 


--------------------------------------------------------------------------------
/man/distcritmulti.Rd:
--------------------------------------------------------------------------------
 1 | \name{distcritmulti}
 2 | \alias{distcritmulti}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Distance based validity criteria for large data sets}
 5 | \description{
 6 |   Approximates average silhouette width or the Pearson version of
 7 |   Hubert's gamma criterion by hacking the
 8 |   dataset into pieces and averaging the subset-wise values, see Hennig
 9 |   and Liao (2013).
10 | }  
11 | \usage{
12 | distcritmulti(x,clustering,part=NULL,ns=10,criterion="asw",
13 |                     fun="dist",metric="euclidean",
14 |                      count=FALSE,seed=NULL,...)
15 | }
16 | %- maybe also `usage' for other objects documented here.
17 | \arguments{
18 |   \item{x}{cases times variables data matrix.}
19 |   \item{clustering}{vector of integers indicating the clustering.}
20 |   \item{part}{vector of integer subset sizes; sum should be smaller or
21 |      equal to the number of cases of \code{x}. If \code{NULL}, subset sizes are
22 |      chosen approximately equal.} 
23 |   \item{ns}{integer. Number of subsets, only used if \code{part==NULL}.}
24 |   \item{criterion}{\code{"asw"} or \code{"pearsongamma"}, specifies
25 |       whether the average silhouette width or the Pearson version of
26 |       Hubert's gamma is computed.}
27 |   \item{fun}{\code{"dist"} or \code{"daisy"}, specifies
28 |       which function is used for computing dissimilarities.}
29 |   \item{metric}{passed on to \code{\link{dist}} (as argument
30 |       \code{method}) or \code{\link[cluster]{daisy}} to determine which
31 |     dissimilarity is used.}
32 |   \item{count}{logical. if \code{TRUE}, the subset number just processed
33 |     is printed.}
34 |   \item{seed}{integer, random seed. (If \code{NULL}, result depends on
35 |     random numbers.)}
36 |   \item{...}{further arguments to be passed on to \code{\link{dist}} or
37 |     \code{\link[cluster]{daisy}}.}
38 | }
39 | 
40 | \value{
41 |   A list with components \code{crit.overall,crit.sub,crit.sd,part}.
42 |   \item{crit.overall}{value of criterion.}
43 |   \item{crit.sub}{vector of subset-wise criterion values.}
44 |   \item{crit.sd}{standard deviation of \code{crit.sub}, can be used to
45 |     assess stability.}
46 |   \item{subsets}{list of case indexes in subsets.}
47 | }
48 | 
49 | 
50 | \references{
51 |   Halkidi, M., Batistakis, Y., Vazirgiannis, M. (2001) On Clustering
52 |   Validation Techniques, \emph{Journal of Intelligent Information
53 |     Systems}, 17, 107-145.
54 |   
55 |   Hennig, C. and Liao, T. (2013) How to find an appropriate clustering
56 |   for mixed-type variables with application to socio-economic
57 |   stratification, \emph{Journal of the Royal Statistical Society, Series
58 |   C Applied Statistics}, 62, 309-369.
59 | 
60 |     Kaufman, L. and Rousseeuw, P.J. (1990). "Finding Groups in Data:
61 |   An Introduction to Cluster Analysis". Wiley, New York.
62 |   }
63 | 
64 | \author{Christian Hennig
65 |   \email{christian.hennig@unibo.it}
66 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en}}
67 | 
68 | \seealso{\code{\link{cluster.stats}}, \code{\link[cluster]{silhouette}}}
69 | 
70 | \examples{
71 |     set.seed(20000)
72 |     options(digits=3)
73 |     face <- rFace(50,dMoNo=2,dNoEy=0,p=2)
74 |     clustering <- as.integer(attr(face,"grouping"))
75 |     distcritmulti(face,clustering,ns=3,seed=100000,criterion="pearsongamma")
76 | }
77 | 
78 | \keyword{cluster}% __ONLY ONE__ keyword per line
79 | 


--------------------------------------------------------------------------------
/man/distrsimilarity.Rd:
--------------------------------------------------------------------------------
  1 | \name{distrsimilarity}
  2 | \alias{distrsimilarity}
  3 | %- Also NEED an `\alias' for EACH other topic documented here.
  4 | \title{Similarity of within-cluster distributions to normal and uniform}
  5 | \description{
  6 |   Two measures of dissimilarity between the within-cluster distributions of
  7 |   a dataset and normal or uniform distribution. For the normal it's the
  8 |   Kolmogorov dissimilarity between the Mahalanobis distances to the
  9 |   center and a chi-squared distribution. For the uniform it is the
 10 |   Kolmogorov distance between the distance to the kth nearest neighbour
 11 |   and a Gamma distribution (this is based on Byers and Raftery (1998)).
 12 |   The clusterwise values are aggregated by weighting with the cluster sizes.
 13 | }
 14 | \usage{
 15 | distrsimilarity(x,clustering,noisecluster = FALSE,
 16 | distribution=c("normal","uniform"),nnk=2,
 17 | largeisgood=FALSE,messages=FALSE)
 18 | }
 19 | %- maybe also `usage' for other objects documented here.
 20 | \arguments{
 21 |   \item{x}{the data matrix; a numerical object which can be coerced
 22 |     to a matrix.}
 23 |   \item{clustering}{integer vector of class numbers; length must equal
 24 |     \code{nrow(x)}, numbers must go from 1 to the number of clusters.}
 25 |   \item{noisecluster}{logical. If \code{TRUE}, the cluster with the
 26 |     largest number is ignored for the computations.}
 27 |   \item{distribution}{vector of \code{"normal", "uniform"} or
 28 |     both. Indicates which of the two dissimilarities is/are computed.}
 29 |   \item{nnk}{integer. Number of nearest neighbors to use for
 30 |     dissimilarity to the uniform.}
 31 |   \item{largeisgood}{logical. If \code{TRUE}, dissimilarities are
 32 |     transformed to \code{1-d} (this means that larger values indicate a
 33 |     better fit).}
 34 |   \item{messages}{logical. If \code{TRUE}, warnings are given if
 35 |     within-cluster covariance matrices are not invertible (in which case
 36 |     all within-cluster Mahalanobis distances are set to zero).}
 37 | }
 38 | \note{
 39 |   It is very hard to capture similarity to a multivariate normal or
 40 |   uniform in a single value, and both used here have their
 41 |   shortcomings. Particularly, the dissimilarity to the uniform can still
 42 |   indicate a good fit if there are holes or it's a uniform distribution
 43 |   concentrated on several not connected sets.  
 44 | }
 45 | % \details{
 46 | % }
 47 | \value{
 48 |   List with the following components
 49 |   \item{kdnorm}{Kolmogorov distance between distribution of
 50 |     within-cluster Mahalanobis
 51 |     distances and appropriate chi-squared distribution, aggregated over
 52 |     clusters (I am grateful to Agustin Mayo-Iscar for the idea).}
 53 |   \item{kdunif}{Kolmogorov distance between distribution of distances to
 54 |     \code{nnk}th nearest within-cluster neighbor and appropriate
 55 |     Gamma-distribution, see Byers and Raftery (1998), aggregated over
 56 |     clusters.}
 57 |   \item{kdnormc}{vector of cluster-wise Kolmogorov distances between
 58 |     distribution of within-cluster Mahalanobis
 59 |     distances and appropriate chi-squared distribution.}
 60 |   \item{kdunifc}{vector of cluster-wise Kolmogorov distances between
 61 |     distribution of distances to \code{nnk}th nearest within-cluster
 62 |     neighbor and appropriate Gamma-distribution.}
 63 |   \item{xmahal}{vector of Mahalanobs distances to the respective cluster
 64 |     center.}
 65 |   \item{xdknn}{vector of distance to \code{nnk}th nearest within-cluster
 66 |     neighbor.}
 67 | }
 68 | \references{
 69 | Byers, S. and Raftery, A. E. (1998) Nearest-Neighbor Clutter
 70 | Removal for Estimating Features in Spatial Point Processes,
 71 | \emph{Journal of the American Statistical Association}, 93, 577-584.
 72 | 
 73 | Hennig, C. (2017) Cluster validation by measurement of clustering
 74 |   characteristics relevant to the user. In C. H. Skiadas (ed.)
 75 |   \emph{Proceedings of ASMDA 2017}, 501-520,
 76 |     \url{https://arxiv.org/abs/1703.09282}
 77 | 
 78 | 
 79 | }
 80 | \author{Christian Hennig
 81 |   \email{christian.hennig@unibo.it}
 82 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
 83 | }
 84 | 
 85 | \seealso{
 86 |   \code{\link{cqcluster.stats}},\code{\link{cluster.stats}}
 87 |   for more cluster validity statistics.
 88 | }
 89 | 
 90 | \examples{
 91 |   set.seed(20000)
 92 |   options(digits=3)
 93 |   face <- rFace(200,dMoNo=2,dNoEy=0,p=2)
 94 |   km3 <- kmeans(face,3)
 95 |   distrsimilarity(face,km3$cluster) 
 96 | }
 97 | \keyword{multivariate}% at least one, from doc/KEYWORDS
 98 | \keyword{classif}% __ONLY ONE__ keyword per line
 99 | \keyword{cluster}% __ONLY ONE__ keyword per line
100 | 
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/man/dridgeline.Rd:
--------------------------------------------------------------------------------
 1 | \name{dridgeline}
 2 | \alias{dridgeline}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Density along the ridgeline}
 5 | \description{
 6 |   Computes the density of a two-component Gaussian mixture along the 
 7 |   ridgeline (Ray and Lindsay, 2005), along which 
 8 |   all its density extrema are located.
 9 | }
10 | \usage{
11 | dridgeline(alpha=seq(0,1,0.001), prop,
12 |                           mu1, mu2, Sigma1, Sigma2, showplot=FALSE, ...)
13 | }
14 | %- maybe also `usage' for other objects documented here.
15 | \arguments{
16 |   \item{alpha}{sequence of values between 0 and 1 for which the density
17 | is computed.}
18 |   \item{prop}{mixture proportion of first component.}
19 |   \item{mu1}{mean vector of component 1.}
20 |   \item{mu2}{mean vector of component 2.}
21 |   \item{Sigma1}{covariance matrix of component 1.}
22 |   \item{Sigma2}{covariance matrix of component 2.}
23 |   \item{showplot}{logical. If \code{TRUE}, the density is plotted
24 | against \code{alpha}.}
25 |   \item{...}{further arguments to be passed on to plot.}
26 | }
27 | 
28 | \value{
29 |   Vector of density values for values of \code{alpha}.
30 | }
31 | 
32 | \references{
33 |   Ray, S. and Lindsay, B. G. (2005) The Topography of Multivariate 
34 | Normal Mixtures, \emph{Annals of Statistics}, 33, 2042-2065.
35 | }
36 | \author{Christian Hennig
37 |   \email{christian.hennig@unibo.it}
38 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
39 | }
40 | \examples{
41 |   q <- dridgeline(seq(0,1,0.1),0.5,c(1,1),c(2,5),diag(2),diag(2))
42 | }
43 | \keyword{cluster}% at least one, from doc/KEYWORDS
44 | \keyword{multivariate}
45 | 
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/man/dudahart2.Rd:
--------------------------------------------------------------------------------
 1 | \name{dudahart2}
 2 | \alias{dudahart2}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Duda-Hart test for splitting}
 5 | \description{
 6 |   Duda-Hart test for whether a data set should be split into two
 7 |   clusters. 
 8 | }
 9 | \usage{
10 |   dudahart2(x,clustering,alpha=0.001)
11 | }
12 | %- maybe also `usage' for other objects documented here.
13 | \arguments{
14 |   \item{x}{data matrix or data frame.}
15 |   \item{clustering}{vector of integers. Clustering into two clusters.}
16 |   \item{alpha}{numeric between 0 and 1. Significance level (recommended
17 |     to be small if this is used for estimating the number of clusters).}
18 | }
19 | 
20 | \value{
21 |   A list with components
22 |   \item{p.value}{p-value against null hypothesis of homogemeity.}
23 |   \item{dh}{ratio of within-cluster sum of squares for two clusters and
24 |     overall sum of squares.}
25 |   \item{compare}{critical value for \code{dh} at level \code{alpha}.}
26 |   \item{cluster1}{\code{FALSE} if the null hypothesis of homogemeity is
27 |     rejected.}
28 |   \item{alpha}{see above.}
29 |   \item{z}{\code{1-alpha}-quantile of a standard Gaussian.}
30 | }
31 | 
32 | \references{
33 |   Duda, R. O. and Hart, P. E. (1973) \emph{Pattern Classification and
34 |   Scene Analysis}. Wiley, New York.
35 | }
36 | 
37 | \author{Christian Hennig
38 |   \email{christian.hennig@unibo.it}
39 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en}}
40 | 
41 | \seealso{\code{\link{cluster.stats}}}
42 | 
43 | \examples{
44 |   options(digits=2)
45 |   set.seed(98765)
46 |   iriss <- iris[sample(150,20),-5]
47 |   km <- kmeans(iriss,2)
48 |   dudahart2(iriss,km$cluster)
49 | }
50 | 
51 | \keyword{cluster}% __ONLY ONE__ keyword per line
52 | 


--------------------------------------------------------------------------------
/man/extract.mixturepars.Rd:
--------------------------------------------------------------------------------
 1 | \name{extract.mixturepars}
 2 | \alias{extract.mixturepars}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Extract parameters for certain components from mclust}
 5 | \description{
 6 |   Extracts parameters of certain mixture components from the output of
 7 |   \code{\link[mclust]{summary.mclustBIC}} and updates proportions so that
 8 |   they sum up to 1. 
 9 | }
10 | \usage{
11 |   extract.mixturepars(mclustsum,compnumbers,noise=FALSE)
12 | }
13 | %- maybe also `usage' for other objects documented here.
14 | \arguments{
15 |   \item{mclustsum}{output object of \code{\link[mclust]{summary.mclustBIC}}.}
16 |   \item{compnumbers}{vector of integers. Numbers of mixture components.}
17 |   \item{noise}{logical. Should be \code{TRUE} if a noise component was fitted by
18 |     \code{\link[mclust]{mclustBIC}}.}
19 | }
20 | 
21 | \value{
22 |   Object as component \code{parameters} of
23 |   \code{\link[mclust]{summary.mclustBIC}}-output, but for specified
24 |   components only. (Orientation information from all components is kept.)
25 | }
26 | 
27 | \author{Christian Hennig
28 |   \email{christian.hennig@unibo.it}
29 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
30 | }
31 | \examples{
32 |   set.seed(98765)
33 |   require(mclust)
34 |   iriss <- iris[sample(150,20),-5]
35 |   irisBIC <- mclustBIC(iriss,G=5,modelNames="VEV")
36 |   siris <- summary(irisBIC,iriss)
37 |   emp <- extract.mixturepars(siris,2)
38 |   emp$pro
39 |   round(emp$mean,digits=1)
40 |   emp$variance$modelName
41 |   round(emp$variance$scale,digits=2)
42 |   
43 | }
44 | \keyword{cluster}% at least one, from doc/KEYWORDS
45 | \keyword{multivariate}
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/man/findrep.Rd:
--------------------------------------------------------------------------------
 1 | \name{findrep}
 2 | \alias{findrep}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Finding representatives for cluster border}
 5 | \description{
 6 |   Finds representative objects for the border of a cluster and the
 7 |   within-cluster variance as defined in the framework of the \code{\link{cdbw}}
 8 |   cluster validation index (and meant to be used in that context).
 9 | }
10 | \usage{
11 | findrep(x,xcen,clustering,cluster,r,p=ncol(x),n=nrow(x),
12 |                     nc=sum(clustering==cluster))
13 | 
14 | }
15 | %- maybe also `usage' for other objects documented here.
16 | \arguments{
17 |   \item{x}{matrix. Euclidean dataset.}
18 |   \item{xcen}{mean vector of cluster.}
19 |   \item{clustering}{vector of integers with length \code{=nrow(x)};
20 |   indicating the cluster for each observation.}
21 |   \item{cluster}{integer. Number of cluster to be treated.}
22 |   \item{r}{integer. Number of representatives.}
23 |   \item{p}{integer. Number of dimensions.}
24 |   \item{n}{integer. Number of observations.}
25 |   \item{nc}{integer. Number of observations in \code{cluster}.}
26 | }
27 | 
28 | \value{
29 |   List with components
30 |   \item{repc}{vector of index of representatives (out of all
31 |     observations).}
32 |   \item{repx}{vector of index of representatives (out of only the
33 |     observations in \code{cluster}).}
34 |   \item{maxr}{number of representatives (this can be smaller than
35 |     \code{r} if fewer pairwise different observations are in
36 |     \code{cluster}.}
37 |   \item{wvar}{estimated average within-cluster squared distance to mean.} 
38 | }
39 | 
40 | 
41 | \references{
42 |   Halkidi, M. and Vazirgiannis, M. (2008) A density-based cluster
43 |   validity approach using multi-representatives. \emph{Pattern
44 |     Recognition Letters} 29, 773-786.
45 | 
46 |   Halkidi, M., Vazirgiannis, M. and Hennig, C. (2015) Method-independent
47 |     indices for cluster validation. In C. Hennig, M. Meila, F. Murtagh,
48 |     R. Rocci (eds.) \emph{Handbook of Cluster Analysis}, CRC
49 |     Press/Taylor \code{&} Francis, Boca Raton.
50 | 
51 |   
52 | }
53 | 
54 | \seealso{
55 |   \code{\link{cdbw}}
56 | }
57 | 
58 | \author{Christian Hennig
59 |   \email{christian.hennig@unibo.it}
60 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
61 | }
62 | \examples{
63 |   options(digits=3)
64 |   iriss <- as.matrix(iris[c(1:5,51:55,101:105),-5])
65 |   irisc <- as.numeric(iris[c(1:5,51:55,101:105),5])
66 |   findrep(iriss,colMeans(iriss),irisc,cluster=1,r=2)
67 | }
68 | \keyword{cluster}% at least one, from doc/KEYWORDS
69 | 
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/man/fpclusters.Rd:
--------------------------------------------------------------------------------
 1 | \name{fpclusters}
 2 | \alias{fpclusters}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Extracting clusters from fixed point cluster objects}
 5 | \description{
 6 |   \code{fpclusters} is a generic function which extracts the
 7 |   representative fixed point clusters (FPCs)
 8 |   from FPC objects generated by \code{\link{fixmahal}} and
 9 |   \code{\link{fixreg}}. For documentation and examples see
10 |   \code{\link{fixmahal}} and  \code{\link{fixreg}}.
11 | }
12 | \usage{
13 | fpclusters(object, ...)
14 | }
15 | %- maybe also `usage' for other objects documented here.
16 | \arguments{
17 |   \item{object}{object of class \code{rfpc} or \code{mfpc}.}
18 |   \item{...}{further arguments depending on the method.}
19 | }
20 | 
21 | \value{
22 |   a list of logical or numerical vectors indicating or giving the
23 |   weights of the cluster memberships.
24 | }
25 | 
26 | \author{Christian Hennig
27 |   \email{christian.hennig@unibo.it}
28 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
29 | }
30 | 
31 | \seealso{\code{\link{fixmahal}}, \code{\link{fixreg}}}
32 | 
33 | \keyword{cluster}% at least one, from doc/KEYWORDS
34 | 
35 | 


--------------------------------------------------------------------------------
/man/itnumber.Rd:
--------------------------------------------------------------------------------
 1 | \name{itnumber}
 2 | \alias{itnumber}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Number of regression fixed point cluster iterations}
 5 | \description{
 6 |   Computes the number of fixed point iterations needed by
 7 |   \code{\link{fixreg}} to find \code{mtf} times
 8 |   a fixed point cluster (FPC) of size
 9 |   \code{cn} with an approximated probability of \code{prob}.
10 | 
11 |   Thought for use within \code{\link{fixreg}}.
12 | }
13 | \usage{
14 | itnumber(n, p, cn, mtf, prob = 0.95, maxir = 20000)
15 | }
16 | %- maybe also `usage' for other objects documented here.
17 | \arguments{
18 |   \item{n}{positive integer. Total number of points.}
19 |   \item{p}{positive integer. Number of independent variables.}
20 |   \item{cn}{positive integer smaller or equal to \code{n}.
21 |     Size of the FPC.}
22 |   \item{mtf}{positive integer.}
23 |   \item{prob}{number between 0 and 1.}
24 |   \item{maxir}{positive integer. \code{itnumber} is set to this value if
25 |     it would otherwise be larger.}
26 | }
27 | \details{
28 |   The computation is based on the binomial distribution with probability
29 |   given by \code{\link{clusexpect}} with \code{ir=1}.
30 | }
31 | \value{
32 |   An integer.
33 | }
34 | \references{
35 |   Hennig, C. (2002) Fixed point clusters for linear regression:
36 |   computation and comparison, \emph{Journal of
37 |     Classification} 19, 249-276.
38 | }
39 | 
40 | \author{Christian Hennig
41 |   \email{christian.hennig@unibo.it}
42 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
43 | 
44 | \seealso{\code{\link{fixreg}}, \code{\link{clusexpect}}}
45 | 
46 | \examples{
47 |   itnumber(500,4,150,2)
48 | }
49 | \keyword{univar}% at least one, from doc/KEYWORDS
50 | \keyword{cluster}% __ONLY ONE__ keyword per line
51 | 


--------------------------------------------------------------------------------
/man/jittervar.Rd:
--------------------------------------------------------------------------------
 1 | \name{jittervar}
 2 | \alias{jittervar}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Jitter variables in a data matrix}
 5 | \description{
 6 |   Jitters some variables in a data matrix.
 7 | }
 8 | \usage{
 9 |   jittervar(x,jitterv=NULL,factor=1)
10 | }
11 | %- maybe also `usage' for other objects documented here.
12 | \arguments{
13 |   \item{x}{data matrix or data frame.}
14 |   \item{jitterv}{vector of numbers of variables to be jittered.}
15 |   \item{factor}{numeric. Passed on to \code{\link{jitter}}. See the
16 |     documentation there. The higher, the more jittering.}
17 | }
18 | 
19 | \value{
20 |   data matrix or data frame with jittered variables.
21 |   }
22 | 
23 | \author{Christian Hennig
24 |   \email{christian.hennig@unibo.it}
25 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en}}
26 | 
27 | \seealso{\code{\link{jitter}}}
28 | 
29 | \examples{
30 |   set.seed(776655)
31 |   v1 <- rnorm(20)
32 |   v2 <- rnorm(20)
33 |   d1 <- sample(1:5,20,replace=TRUE)
34 |   d2 <- sample(1:4,20,replace=TRUE)
35 |   ldata <- cbind(v1,v2,d1,d2)
36 |   jv <- jittervar(ldata,jitterv=3:4)
37 | }
38 | 
39 | \keyword{manip}% __ONLY ONE__ keyword per line
40 | 


--------------------------------------------------------------------------------
/man/kmeansruns.Rd:
--------------------------------------------------------------------------------
 1 | \name{kmeansruns}
 2 | \alias{kmeansruns}
 3 | 
 4 | %- Also NEED an `\alias' for EACH other topic documented here.
 5 | \title{k-means with estimating k and initialisations}
 6 | \description{
 7 |   This calls the function \code{\link{kmeans}} to perform a k-means
 8 |   clustering, but initializes the k-means algorithm several times with
 9 |   random points from the data set as means. Furthermore, it is more
10 |   robust against the occurrence of empty clusters in the algorithm and
11 |   it estimates the number of clusters by either the Calinski Harabasz
12 |   index (\code{\link{calinhara}}) or average silhouette width (see
13 |   \code{\link[cluster]{pam.object}}). The Duda-Hart test
14 |   (\code{\link{dudahart2}}) is applied to decide whether there should be
15 |   more than one cluster (unless 1 is excluded as number of clusters).
16 | }
17 | \usage{
18 | kmeansruns(data,krange=2:10,criterion="ch",
19 |                        iter.max=100,runs=100,
20 |                        scaledata=FALSE,alpha=0.001,
21 |                        critout=FALSE,plot=FALSE,...)
22 | }
23 | \arguments{
24 |   \item{data}{A numeric matrix of data, or an object that can be coerced to
25 |           such a matrix (such as a numeric vector or a data frame with
26 |           all numeric columns). }
27 |   \item{krange}{integer vector. Numbers of clusters which are to be
28 |     compared by the average silhouette width criterion. Note: average
29 |     silhouette width and Calinski-Harabasz can't estimate number of
30 |     clusters \code{nc=1}. If 1 is included, a Duda-Hart test is applied
31 |     and 1 is estimated if this is not significant.}
32 |   \item{criterion}{one of \code{"asw"} or \code{"ch"}. Determines
33 |     whether average silhouette width or Calinski-Harabasz is applied.}
34 |   \item{iter.max}{integer. The maximum number of iterations allowed.}
35 |   \item{runs}{integer. Number of starts of the k-means algorithm.}
36 |   \item{scaledata}{logical. If \code{TRUE}, the variables are centered
37 |     and scaled to unit variance before execution.}
38 |   \item{alpha}{numeric between 0 and 1, tuning constant for
39 |     \code{\link{dudahart2}} (only used for 1-cluster test).}
40 |   \item{critout}{logical. If \code{TRUE}, the criterion value is printed
41 |     out for every number of clusters.}
42 |   \item{plot}{logical. If \code{TRUE}, every clustering resulting from a
43 |     run of the algorithm is plotted.}
44 |   \item{...}{further arguments to be passed on to \code{\link{kmeans}}.}
45 | }
46 | 
47 | \value{
48 |   The output of the optimal run of the \code{\link{kmeans}}-function
49 |   with added components \code{bestk} and \code{crit}.
50 |   A list with components
51 |   \item{cluster}{A vector of integers indicating the cluster to which each
52 |           point is allocated.} 
53 |   \item{centers}{A matrix of cluster centers.}
54 |   \item{withinss}{The within-cluster sum of squares for each cluster.}
55 |   \item{size}{The number of points in each cluster.}
56 |   \item{bestk}{The optimal number of clusters.}
57 |   \item{crit}{Vector with values of the \code{criterion} for all used numbers of
58 |   clusters (0 if number not tried).}
59 | }
60 | \author{Christian Hennig
61 |   \email{christian.hennig@unibo.it}
62 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
63 | }
64 | 
65 | \references{
66 |     Calinski, T., and Harabasz, J. (1974) A Dendrite Method for Cluster 
67 | Analysis, \emph{Communications in Statistics}, 3, 1-27.
68 | 
69 |   Duda, R. O. and Hart, P. E. (1973) \emph{Pattern Classification and
70 |   Scene Analysis}. Wiley, New York.
71 | 
72 | Hartigan, J. A. and Wong, M. A. (1979).  A K-means clustering
73 | algorithm. \emph{Applied Statistics}, 28, 100-108.
74 | 
75 |      Kaufman, L. and Rousseeuw, P.J. (1990). "Finding Groups in Data:
76 |      An Introduction to Cluster Analysis". Wiley, New York.
77 | }
78 | \seealso{
79 |   \code{\link{kmeans}}, \code{\link{pamk}},
80 |   \code{\link{calinhara}}, \code{\link{dudahart2}})  
81 | }
82 | \examples{
83 |   options(digits=3)
84 |   set.seed(20000)
85 |   face <- rFace(50,dMoNo=2,dNoEy=0,p=2)
86 |   pka <- kmeansruns(face,krange=1:5,critout=TRUE,runs=2,criterion="asw")
87 |   pkc <- kmeansruns(face,krange=1:5,critout=TRUE,runs=2,criterion="ch")
88 | }
89 | \keyword{cluster}% at least one, from doc/KEYWORDS
90 | \keyword{multivariate}
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/man/lcmixed.Rd:
--------------------------------------------------------------------------------
  1 | \name{lcmixed}
  2 | \alias{lcmixed}
  3 | %- Also NEED an `\alias' for EACH other topic documented here.
  4 | \title{flexmix method for mixed Gaussian/multinomial mixtures}
  5 | \description{
  6 |   \code{lcmixed} is a method for the
  7 |   \code{\link[flexmix]{flexmix}}-function in package
  8 |   \code{flexmix}. It provides the necessary information to run an
  9 |   EM-algorithm for maximum likelihood estimation for a latent class
 10 |   mixture (clustering) model where some variables are continuous
 11 |   and modelled within the mixture components by Gaussian distributions
 12 |   and some variables are categorical and modelled within components by
 13 |   independent multinomial distributions. \code{lcmixed} can be called
 14 |   within \code{flexmix}. The function \code{\link{flexmixedruns}} is a wrapper
 15 |   function that can be run to apply \code{lcmixed}.
 16 | 
 17 |   Note that at least one categorical variable is needed, but it is
 18 |   possible to use data without continuous variable.
 19 | 
 20 |   There are further format restrictions to the data (see below in the
 21 |   documentation of \code{continuous} and \code{discrete}), which
 22 |   can be ignored when running \code{lcmixed} through
 23 |   \code{\link{flexmixedruns}}.
 24 | }
 25 | \usage{
 26 | lcmixed( formula = .~. , continuous, discrete, ppdim,
 27 |                      diagonal = TRUE, pred.ordinal=FALSE, printlik=FALSE )
 28 | }
 29 | %- maybe also `usage' for other objects documented here.
 30 | \arguments{
 31 |   \item{formula}{a formula to specify response and explanatory
 32 |     variables. For \code{lcmixed} this always has the form \code{x~1},
 33 |     where \code{x} is a matrix or data frome of all variables to be
 34 |     involved, because regression and explanatory variables are not
 35 |     implemented.}
 36 |   \item{continuous}{number of continuous variables. Note that the
 37 |     continuous variables always need to be the first variables in the
 38 |     matrix or data frame.}
 39 |   \item{discrete}{number of categorical variables. Always the last
 40 |     variables in the matrix or data frame. Note that categorical
 41 |     variables always must be coded as integers 1,2,3, etc. without
 42 |     interruption.}
 43 |   \item{ppdim}{vector of integers specifying the number of (in the data)
 44 |     existing categories for each categorical variable.}
 45 |   \item{diagonal}{logical. If \code{TRUE}, Gaussian models are fitted
 46 |     restricted to diagonal covariance matrices. Otherwise, covariance
 47 |     matrices are unrestricted. \code{TRUE} is consistent with the
 48 |     "within class independence" assumption for the multinomial variables.}
 49 |   \item{pred.ordinal}{logical. If \code{FALSE}, the within-component
 50 |     predicted value for categorical variables is the probability mode,
 51 |     otherwise it is the mean of the standard (1,2,3,...) scores, which
 52 |     may be better for ordinal variables.}
 53 |   \item{printlik}{logical. If \code{TRUE}, the loglikelihood is printed
 54 |     out whenever computed.}
 55 | }
 56 | 
 57 | \details{
 58 |   The data need to be organised case-wise, i.e., if there are
 59 |   categorical variables only, and 15 cases with values c(1,1,2) on the
 60 |   3 variables, the data matrix needs 15 rows with values 1 1 2.
 61 |   
 62 |   General documentation on flexmix methods can be found in Chapter 4 of
 63 |   Friedrich Leisch's "FlexMix: A General Framework for Finite Mixture
 64 |   Models and Latent Class Regression in R",
 65 |   \url{https://CRAN.R-project.org/package=flexmix}
 66 | }
 67 |  
 68 | \value{
 69 |   An object of class \code{FLXMC} (not documented; only used
 70 |   internally by \code{flexmix}).
 71 | }
 72 | 
 73 | \references{
 74 |   Hennig, C. and Liao, T. (2013) How to find an appropriate clustering
 75 |   for mixed-type variables with application to socio-economic
 76 |   stratification, \emph{Journal of the Royal Statistical Society, Series
 77 |   C Applied Statistics}, 62, 309-369.
 78 | 
 79 |   }
 80 | 
 81 | \author{Christian Hennig
 82 |   \email{christian.hennig@unibo.it}
 83 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en}}
 84 | 
 85 | 
 86 | \seealso{
 87 |   \code{\link{flexmixedruns}}, \code{\link[flexmix]{flexmix}},
 88 |   \code{\link[flexmix]{flexmix-class}},
 89 |   \code{\link{discrete.recode}}, which recodes a dataset into the format
 90 |   required by \code{lcmixed}
 91 | }
 92 | 
 93 | \examples{
 94 |   set.seed(112233)
 95 |   options(digits=3)
 96 |   require(MASS)
 97 |   require(flexmix)
 98 |   data(Cars93)
 99 |   Cars934 <- Cars93[,c(3,5,8,10)]
100 |   cc <-
101 |   discrete.recode(Cars934,xvarsorted=FALSE,continuous=c(2,3),discrete=c(1,4))
102 |   fcc <- flexmix(cc$data~1,k=2,
103 |   model=lcmixed(continuous=2,discrete=2,ppdim=c(6,3),diagonal=TRUE))
104 |   summary(fcc)
105 | }
106 | \keyword{cluster}% __ONLY ONE__ keyword per line
107 | 


--------------------------------------------------------------------------------
/man/localshape.Rd:
--------------------------------------------------------------------------------
 1 | \name{localshape}
 2 | \alias{localshape}
 3 | \title{Local shape matrix}
 4 | \description{
 5 |   This computes a matrix formalising 'local shape', i.e., aggregated
 6 |   standardised variance/covariance in a Mahalanobis neighbourhood of the data
 7 |   points. This can be used for finding clusters when used as one of the
 8 |   covariance matrices in 
 9 |   Invariant Coordinate Selection (function \code{ics} in package
10 |   \code{ICS}), see Hennig's
11 |   discussion and rejoinder of Tyler et al. (2009).
12 | }
13 | \usage{
14 |   localshape(xdata,proportion=0.1,mscatter="mcd",mcdalpha=0.8,
15 |                        covstandard="det")
16 | }
17 | \arguments{
18 |   \item{xdata}{objects times variables data matrix.}
19 |   \item{proportion}{proportion of points to be considered as neighbourhood.}
20 |   \item{mscatter}{"mcd" or "cov"; specified minimum covariance
21 |     determinant or
22 |     classical covariance matrix to be used for Mahalanobis distance
23 |     computation.}
24 |   \item{mcdalpha}{if \code{mscatter="mcd"}, this is the alpha parameter
25 |     to be used by the MCD covariance matrix, i.e. one minus the
26 |     asymptotic breakdown point, see \code{\link[robustbase]{covMcd}}.}
27 |   \item{covstandard}{one of "trace", "det" or "none", determining by
28 |     what constant the pointwise neighbourhood covariance matrices are
29 |     standardised. "det" makes the affine equivariant, as noted in the
30 |     discussion rejoinder of Tyler et al. (2009).}
31 | }
32 | 
33 | \value{
34 |   The local shape matrix.
35 | }
36 | 
37 | \references{
38 |   Tyler, D. E., Critchley, F., Duembgen, L., Oja, H. (2009)
39 |   Invariant coordinate selection (with discussion).
40 |   \emph{Journal of the Royal Statistical Society, Series B}, 549-592.
41 | }
42 | \author{Christian Hennig
43 |   \email{christian.hennig@unibo.it}
44 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en}}
45 | \examples{
46 |   options(digits=3)
47 |   data(iris)
48 |   localshape(iris[,-5],mscatter="cov")
49 | }
50 | \keyword{multivariate}
51 | 
52 | 


--------------------------------------------------------------------------------
/man/mahalanodisc.Rd:
--------------------------------------------------------------------------------
 1 | \name{mahalanodisc}
 2 | \alias{mahalanodisc}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Mahalanobis for AWC}
 5 | \description{
 6 |   Vector of Mahalanobis distances or their root. For use in \code{awcoord} only.
 7 | }
 8 | \usage{
 9 | mahalanodisc(x2, mg, covg, modus="square") 
10 | 
11 | }
12 | %- maybe also `usage' for other objects documented here.
13 | \arguments{
14 |   \item{x2}{numerical data matrix.}
15 |   \item{mg}{mean vector.}
16 |   \item{covg}{covariance matrix.}
17 |   \item{modus}{"md" (roots of Mahalanobis distances) or "square"
18 |     (original squared form of Mahalanobis distances).}
19 | }
20 | \details{
21 |   The covariance matrix
22 |   is inverted by use of
23 |   \code{\link{solvecov}}, which can be expected to give
24 |   reasonable results for singular within-class covariance matrices.
25 | }
26 | % \details{
27 | % }
28 | \value{
29 |   vector of (rooted) Mahalanobis distances.
30 | }
31 | 
32 | \author{Christian Hennig
33 |   \email{christian.hennig@unibo.it}
34 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
35 | }
36 | 
37 | \seealso{
38 |   \code{\link{awcoord}}, \code{\link{solvecov}}
39 | }
40 | 
41 | \examples{
42 |   options(digits=3)
43 |   x <- cbind(rnorm(50),rnorm(50))
44 |   mahalanodisc(x,c(0,0),cov(x))
45 |   mahalanodisc(x,c(0,0),matrix(0,ncol=2,nrow=2))
46 | }
47 | \keyword{multivariate}% at least one, from doc/KEYWORDS
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/man/mahalanofix.Rd:
--------------------------------------------------------------------------------
 1 | \name{mahalanofix}
 2 | \alias{mahalanofix}
 3 | \alias{mahalanofuz}
 4 | %- Also NEED an `\alias' for EACH other topic documented here.
 5 | \title{Mahalanobis distances from center of indexed points}
 6 | \description{
 7 |   Computes the vector of (classical or robust)
 8 |   Mahalanobis distances of all points of \code{x}
 9 |   to the center of the points indexed (or weighted)
10 |   by \code{gv}. The latter also determine
11 |   the covariance matrix.
12 | 
13 |   Thought for use within \code{\link{fixmahal}}.
14 | }
15 | \usage{
16 | mahalanofix(x, n = nrow(as.matrix(x)), p = ncol(as.matrix(x)), gv =
17 | rep(1, times = n), cmax = 1e+10, method = "ml")
18 | 
19 | mahalanofuz(x, n = nrow(as.matrix(x)), p = ncol(as.matrix(x)),
20 |                          gv = rep(1, times=n), cmax = 1e+10) 
21 | }
22 | %- maybe also `usage' for other objects documented here.
23 | \arguments{
24 |   \item{x}{a numerical data matrix, rows are points, columns are variables.}
25 |   \item{n}{positive integer. Number of points.}
26 |   \item{p}{positive integer. Number of variables.}
27 |   \item{gv}{for \code{mahalanofix}
28 |     a logical or 0-1 vector of length \code{n}. For \code{mahalanofuz} a
29 |     numerical vector with values between 0 and 1.}
30 |   \item{cmax}{positive number. used in \code{\link{solvecov}} if
31 |     covariance matrix is singular.}
32 |   \item{method}{\code{"ml"}, \code{"classical"},
33 |     \code{"mcd"} or \code{"mve"}. Method to compute the covariance
34 |     matrix estimator. See \code{\link[MASS]{cov.rob}}, \code{\link{fixmahal}}.}
35 |   
36 | }
37 | \details{
38 |   \code{\link{solvecov}} is used to invert the covariance matrix. The methods
39 |   \code{"mcd"} and \code{"mve"} in \code{mahalanofix} do not work properly
40 |   with point constellations with singular covariance matrices!
41 | }
42 | \value{
43 |   A list of the following components:
44 |   \item{md}{vector of Mahalanobis distances.}
45 |   \item{mg}{mean of the points indexed by \code{gv}, weighted mean in
46 |     \code{mahalanofuz}.}
47 |   \item{covg}{covariance matrix of the points indexed by \code{gv},
48 |     weighted covariance matrix in \code{mahalanofuz}.} 
49 |   \item{covinv}{\code{covg} inverted by \code{\link{solvecov}}.}
50 |   \item{coll}{logical. If \code{TRUE}, \code{covg} has been
51 |     (numerically) singular.}
52 | }
53 | \author{Christian Hennig
54 |   \email{christian.hennig@unibo.it}
55 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
56 | 
57 | \note{Methods \code{"mcd"} and \code{"mve"} require library \code{lqs}.}
58 | 
59 | \seealso{\code{\link{fixmahal}}, \code{\link{solvecov}}, \code{\link[MASS]{cov.rob}}}
60 | 
61 | \examples{
62 |   x <- c(1,2,3,4,5,6,7,8,9,10)
63 |   y <- c(1,2,3,8,7,6,5,8,9,10)
64 |   mahalanofix(cbind(x,y),gv=c(0,0,0,1,1,1,1,1,0,0))
65 |   mahalanofix(cbind(x,y),gv=c(0,0,0,1,1,1,1,0,0,0))
66 |   mahalanofix(cbind(x,y),gv=c(0,0,0,1,1,1,1,1,0,0),method="mcd")
67 |   mahalanofuz(cbind(x,y),gv=c(0,0,0.5,0.5,1,1,1,0.5,0.5,0))
68 | }
69 | \keyword{multivariate}% at least one, from doc/KEYWORDS
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/man/mahalconf.Rd:
--------------------------------------------------------------------------------
 1 | \name{mahalconf}
 2 | \alias{mahalconf}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Mahalanobis fixed point clusters initial configuration}
 5 | \description{
 6 |   Generates an initial configuration of \code{startn} points from
 7 |   dataset \code{x} for the \code{\link{fixmahal}}
 8 |   fixed point iteration.
 9 | 
10 |   Thought only for use within \code{\link{fixmahal}}.
11 | }
12 | \usage{
13 | mahalconf(x, no, startn, covall, plot)
14 | }
15 | %- maybe also `usage' for other objects documented here.
16 | \arguments{
17 |   \item{x}{numerical matrix. Rows are points, columns are variables.}
18 |   \item{no}{integer between 1 and \code{nrow(x)}. Number of the first
19 |     point of the configuration.}
20 |   \item{startn}{integer between 1 and \code{nrow(x)}.}
21 |   \item{covall}{covariance matrix for the computation of the first
22 |     Mahalanobis distances.}
23 |   \item{plot}{a string. If equal to \code{"start"} or \code{"both"},the
24 |     first two variables and the first \code{ncol(x)+1} points are plotted.}
25 | }
26 | \details{
27 |   \code{mahalconf} first chooses the \eqn{p} (number of variables)
28 |   nearest points to point no. \code{no} in terms of the Mahalanobis
29 |   distance w.r.t. \code{covall}, so that there are \eqn{p+1} points.
30 |   In every further step, the covariance
31 |   matrix of the current configuration is computed and the nearest point
32 |   in terms of the new Mahalanobis distance is
33 |   added. \code{\link{solvecov}} is used to invert singular covariance
34 |   matrices. 
35 | }
36 | \value{
37 |   A logical vector of length \code{nrow(x)}.
38 | }
39 | 
40 | \author{Christian Hennig
41 |   \email{christian.hennig@unibo.it}
42 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
43 | 
44 | \seealso{\code{\link{fixmahal}}, \code{\link{solvecov}}}
45 | 
46 | \examples{
47 |   set.seed(4634)
48 |   face <- rFace(600,dMoNo=2,dNoEy=0,p=2)
49 |   mahalconf(face,no=200,startn=20,covall=cov(face),plot="start")
50 | }
51 | \keyword{multivariate}% at least one, from doc/KEYWORDS
52 | \keyword{cluster}
53 | 


--------------------------------------------------------------------------------
/man/mergeparameters.Rd:
--------------------------------------------------------------------------------
 1 | \name{mergeparameters}
 2 | \alias{mergeparameters}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{New parameters from merging two Gaussian mixture components}
 5 | \description{
 6 |   Re-computes pointwise posterior probabilities, mean and covariance
 7 |   matrix for a mixture component obtained by merging two mixture
 8 |   components in a Gaussian mixture.
 9 | }
10 | \usage{
11 |   mergeparameters(xdata, j1, j2, probs, muarray,Sigmaarray, z)
12 | }
13 | %- maybe also `usage' for other objects documented here.
14 | \arguments{
15 |   \item{xdata}{data (something that can be coerced into a matrix).}
16 |   \item{j1}{integer. Number of first mixture component to be merged.}
17 |   \item{j2}{integer. Number of second mixture component to be merged.}
18 |   \item{probs}{vector of component proportions (for all components;
19 |     should sum up to one).}
20 |   \item{muarray}{matrix of component means (rows).}
21 |   \item{Sigmaarray}{array of component covariance matrices (third
22 |     dimension refers to component number).}
23 |   \item{z}{matrix of observation- (row-)wise posterior probabilities of
24 |     belonging to the components (columns).} 
25 | }
26 | 
27 | \value{
28 |   List with components
29 |   \item{probs}{see above; sum of probabilities for original components
30 |   \code{j1} and \code{j2} is now \code{probs[j1]}. Note that generally,
31 |   also for the further components, values for the merged component are
32 |   in place \code{j1} and values in place \code{j2} are not changed. This
33 |   means that in order to have only the information for the new mixture
34 |   after merging, the entries in places \code{j2} need to be suppressed.}  
35 |   \item{muarray}{see above; weighted mean of means of component
36 |   \code{j1} and \code{j2} is now in place \code{j1}.}
37 |   \item{Sigmaarray}{see above; weighted covariance matrix handled as
38 |   above.} 
39 |   \item{z}{see above; original entries for columns \code{j1} and
40 |   \code{j2} are summed up and now in column \code{j1}.}
41 | }
42 | 
43 | \references{
44 |   Hennig, C. (2010) Methods for merging Gaussian mixture components,
45 |   \emph{Advances in Data Analysis and Classification}, 4, 3-34.
46 | }
47 | \author{Christian Hennig
48 |   \email{christian.hennig@unibo.it}
49 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
50 | }
51 | \examples{
52 |   options(digits=3)
53 |   set.seed(98765)
54 |   require(mclust)
55 |   iriss <- iris[sample(150,20),-5]
56 |   irisBIC <- mclustBIC(iriss)
57 |   siris <- summary(irisBIC,iriss)
58 |   probs <- siris$parameters$pro
59 |   muarray <- siris$parameters$mean
60 |   Sigmaarray <- siris$parameters$variance$sigma
61 |   z <- siris$z
62 |   mpi <- mergeparameters(iriss,1,2,probs,muarray,Sigmaarray,z)
63 |   mpi$probs
64 |   mpi$muarray
65 | }
66 | \keyword{multivariate}
67 | \keyword{cluster}
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/man/minsize.Rd:
--------------------------------------------------------------------------------
 1 | \name{minsize}
 2 | \alias{minsize}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Minimum size of regression fixed point cluster}
 5 | \description{
 6 |   Computes the minimum size of a fixed point cluster (FPC) which is
 7 |   found at least \code{mtf} times with approximated
 8 |   probability \code{prob} by
 9 |   \code{ir} fixed point iterations of \code{\link{fixreg}}.
10 | 
11 |   Thought for use within \code{\link{fixreg}}.
12 | }
13 | \usage{
14 | minsize(n, p, ir, mtf, prob = 0.5)
15 | }
16 | %- maybe also `usage' for other objects documented here.
17 | \arguments{
18 |   \item{n}{positive integer. Total number of points.}
19 |   \item{p}{positive integer. Number of independent variables.}
20 |   \item{ir}{positive integer. Number of fixed point iterations.}
21 |   \item{mtf}{positive integer.}
22 |   \item{prob}{numerical between 0 and 1.}
23 | }
24 | \details{
25 |   The computation is based on the binomial distribution with probability
26 |   given by \code{\link{clusexpect}} with \code{ir=1}.
27 | }
28 | \value{
29 |   An integer.
30 | }
31 | \references{
32 |   Hennig, C. (2002) Fixed point clusters for linear regression:
33 |   computation and comparison, \emph{Journal of
34 |     Classification} 19, 249-276.
35 | }
36 | 
37 | \author{Christian Hennig
38 |   \email{christian.hennig@unibo.it}
39 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
40 | 
41 | \seealso{\code{\link{fixreg}}, \code{\link{clusexpect}},
42 |   \code{\link{itnumber}}}
43 | 
44 | \examples{
45 |   minsize(500,4,7000,2)
46 | }
47 | \keyword{univar}% at least one, from doc/KEYWORDS
48 | \keyword{cluster}% __ONLY ONE__ keyword per line
49 | 


--------------------------------------------------------------------------------
/man/mixdens.Rd:
--------------------------------------------------------------------------------
 1 | \name{mixdens}
 2 | \alias{mixdens}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Density of multivariate Gaussian mixture, mclust parameterisation}
 5 | \description{
 6 |   Computes density values for data from a mixture of multivariate Gaussian
 7 |   distributions with parameters based on the way models are specified
 8 |   and parameters are stored in package mclust.
 9 | }
10 | \usage{
11 |   mixdens(modelName,data,parameters)
12 | }
13 | %- maybe also `usage' for other objects documented here.
14 | \arguments{
15 |   \item{modelName}{an mclust model name.
16 |     See \code{\link[mclust]{mclustModelNames}}.}
17 |   \item{data}{data matrix; density values are computed for every
18 |     observation (row).}
19 |   \item{parameters}{parameters of Gaussian mixture in the format used in
20 |     the output of \code{\link[mclust]{summary.mclustBIC}}.}
21 | }
22 | 
23 | \value{
24 |   Vector of density values for the observations.
25 | }
26 | 
27 | \author{Christian Hennig
28 |   \email{christian.hennig@unibo.it}
29 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
30 | }
31 | \examples{
32 |   set.seed(98765)
33 |   require(mclust)
34 |   iriss <- iris[sample(150,20),-5]
35 |   irisBIC <- mclustBIC(iriss)
36 |   siris <- summary(irisBIC,iriss)
37 |   round(mixdens(siris$modelName,iriss,siris$parameters),digits=2)
38 | }
39 | \keyword{cluster}% at least one, from doc/KEYWORDS
40 | \keyword{multivariate}
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/man/mixpredictive.Rd:
--------------------------------------------------------------------------------
 1 | \name{mixpredictive}
 2 | \alias{mixpredictive}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Prediction strength of merged Gaussian mixture}
 5 | \description{
 6 |   Computes the prediction strength of clustering by
 7 |   merging Gaussian mixture components, see \code{\link{mergenormals}}.
 8 |   The predictive strength is
 9 |   defined according to Tibshirani and Walther (2005), carried out as
10 |   described in Hennig (2010), see details. 
11 | }
12 | \usage{
13 |   mixpredictive(xdata, Gcomp, Gmix, M=50, ...)
14 | }
15 | %- maybe also `usage' for other objects documented here.
16 | \arguments{
17 |   \item{xdata}{data (something that can be coerced into a matrix).}
18 |   \item{Gcomp}{integer. Number of components of the underlying Gaussian mixture.}
19 |   \item{Gmix}{integer. Number of clusters after merging Gaussian components.}
20 |   \item{M}{integer. Number of times the dataset is divided into two
21 |     halves.} 
22 |   \item{...}{further arguments that can potentially arrive in calls but
23 |     are currently not used.}
24 | }
25 | 
26 | \value{
27 |   List with components
28 |   \item{predcorr}{vector of length \code{M} with relative frequencies of
29 |     correct predictions (clusterwise minimum).}
30 |   \item{mean.pred}{mean of \code{predcorr}.}
31 | }
32 | 
33 | \details{
34 |   The prediction strength for a certain number of clusters \code{Gmix} under a
35 |   random partition of the dataset in halves A and B is defined as
36 |   follows. Both halves are clustered with \code{Gmix}
37 |   clusters. Then the points of
38 |   A are classified to the clusters of B. This is done by use of the
39 |   maximum a posteriori rule for mixtures as in Hennig (2010),
40 |   differently from Tibshirani and Walther (2005). A pair of points A in
41 |   the same A-cluster is defined to be correctly predicted if both points
42 |   are classified into the same cluster on B. The same is done with the
43 |   points of B relative to the clustering on A. The prediction strength
44 |   for each of the clusterings is the minimum (taken over all clusters)
45 |   relative frequency of correctly predicted pairs of points of that
46 |   cluster. The final mean prediction strength statistic is the mean over
47 |   all 2M clusterings.
48 | }
49 | 
50 | \references{
51 |   Hennig, C. (2010) Methods for merging Gaussian mixture components,
52 |   \emph{Advances in Data Analysis and Classification}, 4, 3-34.
53 |   
54 |   Tibshirani, R. and Walther, G. (2005) Cluster Validation by 
55 | Prediction Strength, \emph{Journal of Computational and Graphical 
56 |   Statistics}, 14, 511-528.
57 | }
58 | 
59 | \author{Christian Hennig
60 |   \email{christian.hennig@unibo.it}
61 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
62 | }
63 | 
64 | \seealso{
65 |   \code{\link{prediction.strength}} for Tibshirani and Walther's
66 |   original method.
67 |   \code{\link{mergenormals}} for the clustering method applied here.
68 | }
69 | 
70 | \examples{
71 |   set.seed(98765)
72 |   iriss <- iris[sample(150,20),-5]
73 |   mp <- mixpredictive(iriss,2,2,M=2)
74 | }
75 | \keyword{cluster}% at least one, from doc/KEYWORDS
76 | \keyword{multivariate}
77 | 
78 | 


--------------------------------------------------------------------------------
/man/mvdcoord.Rd:
--------------------------------------------------------------------------------
 1 | \name{mvdcoord}
 2 | \alias{mvdcoord}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Mean/variance differences discriminant coordinates}
 5 | \description{
 6 |   Discriminant projections as defined in Young, Marco and Odell (1987).
 7 |   The principle is to maximize the projection of a matrix consisting of
 8 |   the differences between the means of all classes and the first mean
 9 |   and the differences between the covariance matrices of all classes and
10 |   the forst covariance matrix.
11 | }
12 | \usage{
13 | mvdcoord(xd, clvecd, clnum=1, sphere="mcd", ...)
14 | }
15 | %- maybe also `usage' for other objects documented here.
16 | \arguments{
17 |   \item{xd}{the data matrix; a numerical object which can be coerced
18 |     to a matrix.}
19 |   \item{clvecd}{integer vector of class numbers; length must equal
20 |     \code{nrow(xd)}.}
21 |   \item{clnum}{integer. Number of the class to which all differences are
22 |     computed.}
23 |   \item{sphere}{a covariance matrix or one of
24 |     "mve", "mcd", "classical", "none". The matrix used for sphering the
25 |     data. "mcd" and "mve" are robust covariance matrices as implemented
26 |     in \code{\link[MASS]{cov.rob}}. "classical" refers to the classical
27 |     covariance matrix. "none" means no sphering and use of the raw
28 |     data.}
29 |   \item{...}{no effect}
30 | }
31 | % \details{
32 | % }
33 | \value{
34 |   List with the following components
35 |   \item{ev}{eigenvalues in descending order.}
36 |   \item{units}{columns are coordinates of projection basis vectors.
37 |   New points \code{x} can be projected onto the projection basis vectors
38 |   by \code{x \%*\% units}}
39 |   \item{proj}{projections of \code{xd} onto \code{units}.} 
40 | }
41 | \references{
42 |   Young, D. M., Marco, V. R. and Odell, P. L. (1987). Quadratic
43 |   discrimination: some results on optimal low-dimensional
44 |   representation, \emph{Journal of Statistical Planning and Inference},
45 |   17, 307-319.
46 | }
47 | \author{Christian Hennig
48 |   \email{christian.hennig@unibo.it}
49 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
50 | }
51 | 
52 | \seealso{
53 |   \code{\link{plotcluster}} for straight forward discriminant plots.
54 |   \code{\link{discrproj}} for alternatives.
55 |   \code{\link{rFace}} for generation of the example data used below.
56 | }
57 | 
58 | \examples{
59 |   set.seed(4634)
60 |   face <- rFace(300,dMoNo=2,dNoEy=0,p=3)
61 |   grface <- as.integer(attr(face,"grouping"))
62 |   mcf <- mvdcoord(face,grface)
63 |   plot(mcf$proj,col=grface)
64 |   # ...done in one step by function plotcluster.
65 | }
66 | \keyword{multivariate}% at least one, from doc/KEYWORDS
67 | \keyword{classif}% __ONLY ONE__ keyword per line
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/man/ncoord.Rd:
--------------------------------------------------------------------------------
 1 | \name{ncoord}
 2 | \alias{ncoord}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Neighborhood based discriminant coordinates}
 5 | \description{
 6 |   Neighborhood based discriminant coordinates as defined in Hastie and
 7 |   Tibshirani (1996) and a robustified version as defined in Hennig (2003).
 8 |   The principle is to maximize the projection of a between
 9 |   classes covariance matrix, which is defined by averaging the
10 |   between classes covariance matrices in the neighborhoods of all points.  
11 | }
12 | \usage{
13 | ncoord(xd, clvecd, nn=50, weighted=FALSE,
14 |                     sphere="mcd", orderall=TRUE, countmode=1000, ...)
15 | }
16 | %- maybe also `usage' for other objects documented here.
17 | \arguments{
18 |   \item{xd}{the data matrix; a numerical object which can be coerced
19 |     to a matrix.}
20 |   \item{clvecd}{integer vector of class numbers; length must equal
21 |     \code{nrow(xd)}.}
22 |   \item{nn}{integer. Number of points which belong to the neighborhood
23 |     of each point (including the point itself).}
24 |   \item{weighted}{logical. \code{FALSE} corresponds to the original
25 |     method of Hastie and Tibshirani (1996). If \code{TRUE},
26 |     the between classes
27 |     covariance matrices B are weighted by w/trace B, where w is some
28 |     weight depending on the sizes of the
29 |     classes in the neighborhood. Division by trace B reduces the effect
30 |     of outliers. \code{TRUE} cooresponds to WNC as defined in Hennig
31 |     (2003).}
32 |   \item{sphere}{a covariance matrix or one of
33 |     "mve", "mcd", "classical", "none". The matrix used for sphering the
34 |     data. "mcd" and "mve" are robust covariance matrices as implemented
35 |     in \code{\link[MASS]{cov.rob}}. "classical" refers to the classical
36 |     covariance matrix. "none" means no sphering and use of the raw
37 |     data.}
38 |   \item{orderall}{logical. By default, the neighborhoods are computed by
39 |     ordering all points each time. If \code{FALSE}, the neighborhoods
40 |     are computed by selecting \code{nn} times the nearest point from the
41 |     remaining points, which may be faster sometimes.}
42 |   \item{countmode}{optional positive integer. Every \code{countmode}
43 |     algorithm runs \code{ncoord} shows a message.}
44 |   \item{...}{no effect}
45 | }
46 | % \details{
47 | % }
48 | \value{
49 |   List with the following components
50 |   \item{ev}{eigenvalues in descending order.}
51 |   \item{units}{columns are coordinates of projection basis vectors.
52 |   New points \code{x} can be projected onto the projection basis vectors
53 |   by \code{x \%*\% units}}
54 |   \item{proj}{projections of \code{xd} onto \code{units}.} 
55 | }
56 | \references{
57 |   Hastie, T. and Tibshirani, R.  (1996). Discriminant adaptive nearest
58 |   neighbor classification. \emph{IEEE Transactions on Pattern Analysis
59 |   and Machine Intelligence} 18, 607-616. 
60 | 
61 |   Hennig, C. (2004) Asymmetric linear dimension reduction for classification.
62 |   Journal of Computational and Graphical Statistics 13, 930-945 .
63 |   
64 |   Hennig, C. (2005)  A method for visual cluster validation.  In:
65 |   Weihs, C. and Gaul, W. (eds.): Classification - The Ubiquitous
66 |   Challenge. Springer, Heidelberg 2005, 153-160.
67 | }
68 | \author{Christian Hennig
69 |   \email{christian.hennig@unibo.it}
70 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
71 | }
72 | 
73 | \seealso{
74 |   \code{\link{plotcluster}} for straight forward discriminant plots.
75 |   \code{\link{discrproj}} for alternatives.
76 |   \code{\link{rFace}} for generation of the example data used below.
77 | }
78 | 
79 | \examples{
80 |   set.seed(4634)
81 |   face <- rFace(600,dMoNo=2,dNoEy=0)
82 |   grface <- as.integer(attr(face,"grouping"))
83 |   ncf <- ncoord(face,grface)
84 |   plot(ncf$proj,col=grface)
85 |   ncf2 <- ncoord(face,grface,weighted=TRUE)
86 |   plot(ncf2$proj,col=grface)
87 |   # ...done in one step by function plotcluster.
88 | }
89 | \keyword{multivariate}% at least one, from doc/KEYWORDS
90 | \keyword{classif}% __ONLY ONE__ keyword per line
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/man/neginc.Rd:
--------------------------------------------------------------------------------
 1 | \name{neginc}
 2 | \alias{neginc}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Neg-entropy normality index for cluster validation}
 5 | \description{
 6 |   Cluster validity index based on the neg-entropy distances of
 7 |   within-cluster distributions to normal distribution, see
 8 |   Lago-Fernandez and Corbacho (2010). 
 9 | }
10 | \usage{
11 | neginc(x,clustering)
12 | }
13 | %- maybe also `usage' for other objects documented here.
14 | \arguments{
15 |   \item{x}{something that can be coerced into a numerical
16 |   matrix. Euclidean dataset.}
17 |   \item{clustering}{vector of integers with length \code{=nrow(x)};
18 |   indicating the cluster for each observation.}
19 | }
20 | 
21 | \value{
22 |   Index value, see
23 |   Lago-Fernandez and Corbacho (2010). The lower (i.e., the more
24 |   negative) the better. 
25 | }
26 | 
27 | \references{
28 |   Lago-Fernandez, L. F. and Corbacho, F. (2010) Normality-based
29 |   validation for crisp clustering. \emph{Pattern Recognition} 43, 782-795.
30 | }
31 | 
32 | \author{Christian Hennig
33 |   \email{christian.hennig@unibo.it}
34 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
35 | }
36 | \examples{
37 |   options(digits=3)
38 |   iriss <- as.matrix(iris[c(1:10,51:55,101:105),-5])
39 |   irisc <- as.numeric(iris[c(1:10,51:55,101:105),5])
40 |   neginc(iriss,irisc)
41 | }
42 | \keyword{cluster}% at least one, from doc/KEYWORDS
43 | 
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/man/nselectboot.Rd:
--------------------------------------------------------------------------------
  1 | \name{nselectboot}
  2 | \alias{nselectboot}
  3 | %- Also NEED an `\alias' for EACH other topic documented here.
  4 | \title{Selection of the number of clusters via bootstrap}
  5 | \description{
  6 |   Selection of the number of clusters via bootstrap as explained in Fang
  7 |   and Wang (2012). Several times 2 bootstrap samples are drawn from the
  8 |   data and the number of clusters is chosen by optimising an instability
  9 |   estimation from these pairs.
 10 | 
 11 |   In principle all clustering methods can be used that have a
 12 |   CBI-wrapper, see \code{\link{clusterboot}},
 13 |   \code{\link{kmeansCBI}}. However, the currently implemented
 14 |   classification methods are not necessarily suitable for all of them,
 15 |   see argument \code{classification}.
 16 | }
 17 | \usage{
 18 | nselectboot(data,B=50,distances=inherits(data,"dist"),
 19 |                         clustermethod=NULL,
 20 |                         classification="averagedist",centroidname = NULL,
 21 |                         krange=2:10, count=FALSE,nnk=1,
 22 |                         largeisgood=FALSE,...)
 23 | }
 24 | 
 25 | %- maybe also `usage' for other objects documented here.
 26 | \arguments{
 27 |   \item{data}{something that can be coerced into a matrix. The data
 28 |     matrix - either an \code{n*p}-data matrix (or data frame) or an
 29 |     \code{n*n}-dissimilarity matrix (or \code{dist}-object).}
 30 |   \item{B}{integer. Number of resampling runs.}
 31 |   \item{distances}{logical. If \code{TRUE}, the data is interpreted as
 32 |     dissimilarity matrix. If \code{data} is a \code{dist}-object,
 33 |     \code{distances=TRUE} automatically, otherwise
 34 |     \code{distances=FALSE} by default. This means that you have to set
 35 |     it to \code{TRUE} manually if \code{data} is a dissimilarity matrix.}
 36 |   \item{clustermethod}{an interface function (the function name, not a
 37 |     string containing the name, has to be provided!). This defines the
 38 |     clustering method. See the "Details"-section of \code{\link{clusterboot}}
 39 |     and \code{\link{kmeansCBI}} for the format. Clustering methods for
 40 |     \code{nselectboot} must have a \code{k}-argument for the number of
 41 |     clusters and must otherwise follow the specifications in
 42 |     \code{\link{clusterboot}}. Note that \code{nselectboot} won't work
 43 |     with CBI-functions that implicitly already estimate the number of
 44 |     clusters such as \code{\link{pamkCBI}}; use \code{\link{claraCBI}}
 45 |     if you want to run it for pam/clara clustering. 
 46 |   }
 47 |   \item{classification}{string.
 48 |     This determines how non-clustered points are classified to given
 49 |     clusters. Options are explained in \code{\link{classifdist}} (if
 50 |     \code{distances=TRUE}) and \code{\link{classifnp}} (otherwise).
 51 |     Certain classification methods are connected to certain clustering
 52 |     methods. \code{classification="averagedist"} is recommended for
 53 |     average linkage, \code{classification="centroid"} is recommended for
 54 |     k-means, clara and pam (with distances it will work with
 55 |     \code{\link{claraCBI}} only), \code{classification="knn"} with
 56 |     \code{nnk=1} is recommended for single linkage and
 57 |     \code{classification="qda"} is recommended for Gaussian mixtures
 58 |     with flexible covariance matrices.  
 59 |   }
 60 |   \item{centroidname}{string. Indicates the name of the component of
 61 |     \code{CBIoutput$result} that contains the cluster centroids in case of
 62 |     \code{classification="centroid"}, where \code{CBIoutput} is the
 63 |     output object of \code{clustermethod}. If \code{clustermethod} is
 64 |     \code{kmeansCBI} or \code{claraCBI}, centroids are recognised
 65 |     automatically if \code{centroidname=NULL}. If
 66 |     \code{centroidname=NULL} and \code{distances=FALSE}, cluster means
 67 |     are computed as the cluster centroids.}  
 68 |   \item{krange}{integer vector; numbers of clusters to be tried.}
 69 |   \item{count}{logical. If \code{TRUE}, numbers of clusters and
 70 |     bootstrap runs are printed.}
 71 |   \item{nnk}{number of nearest neighbours if
 72 |     \code{classification="knn"}, see  \code{\link{classifdist}} (if
 73 |     \code{distances=TRUE}) and \code{\link{classifnp}} (otherwise).}
 74 |   \item{largeisgood}{logical. If \code{TRUE}, output component
 75 |     \code{stabk} is taken as one minus the original instability value
 76 |     so that larger values of \code{stabk} are better.}
 77 |   \item{...}{arguments to be passed on to the clustering method.}
 78 | }
 79 |     
 80 | \value{
 81 |   \code{nselectboot} returns a list with components
 82 |   \code{kopt,stabk,stab}.
 83 |   \item{kopt}{optimal number of clusters.}
 84 |   \item{stabk}{mean instability values for numbers of clusters (or one
 85 |     minus this if \code{largeisgood=TRUE}).}
 86 |   \item{stab}{matrix of instability values for all bootstrap runs and
 87 |     numbers of clusters.} 
 88 | }
 89 | \references{
 90 |   Fang, Y. and Wang, J. (2012) Selection of the number of clusters via
 91 |   the bootstrap method. \emph{Computational Statistics and Data
 92 |     Analysis}, 56, 468-477.
 93 | }
 94 | \author{Christian Hennig
 95 |   \email{christian.hennig@unibo.it}
 96 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
 97 | }
 98 | \seealso{
 99 |   \code{\link{classifdist}}, \code{\link{classifnp}},
100 |   \code{\link{clusterboot}},\code{\link{kmeansCBI}}
101 | }
102 | \examples{  
103 |   set.seed(20000)
104 |   face <- rFace(50,dMoNo=2,dNoEy=0,p=2)
105 |   nselectboot(dist(face),B=2,clustermethod=disthclustCBI,
106 |    method="average",krange=5:7)
107 |   nselectboot(dist(face),B=2,clustermethod=claraCBI,
108 |    classification="centroid",krange=5:7)
109 |   nselectboot(face,B=2,clustermethod=kmeansCBI,
110 |    classification="centroid",krange=5:7)
111 | # Of course use larger B in a real application.
112 | }
113 | \keyword{cluster}% at least one, from doc/KEYWORDS
114 | \keyword{multivariate}
115 | 
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/man/pamk.Rd:
--------------------------------------------------------------------------------
  1 | \name{pamk}
  2 | \alias{pamk}
  3 | 
  4 | %- Also NEED an `\alias' for EACH other topic documented here.
  5 | \title{Partitioning around medoids with estimation of number of clusters}
  6 | \description{
  7 |   This calls the function \code{\link[cluster]{pam}} or
  8 |   \code{\link[cluster]{clara}} to perform a
  9 |   partitioning around medoids clustering with the number of clusters
 10 |   estimated by optimum average silhouette width (see
 11 |   \code{\link[cluster]{pam.object}}) or Calinski-Harabasz
 12 |   index (\code{\link{calinhara}}). The Duda-Hart test
 13 |   (\code{\link{dudahart2}}) is applied to decide whether there should be
 14 |   more than one cluster (unless 1 is excluded as number of clusters or
 15 |   data are dissimilarities).  
 16 | }
 17 | \usage{
 18 | pamk(data,krange=2:10,criterion="asw", usepam=TRUE,
 19 |      scaling=FALSE, alpha=0.001, diss=inherits(data, "dist"),
 20 |      critout=FALSE, ns=10, seed=NULL, ...)
 21 | }
 22 | \arguments{
 23 |   \item{data}{a data matrix or data frame or something that can be
 24 |     coerced into a matrix, or dissimilarity matrix or
 25 |     object. See \code{\link[cluster]{pam}} for more information.}
 26 |   \item{krange}{integer vector. Numbers of clusters which are to be
 27 |     compared by the average silhouette width criterion. Note: average
 28 |     silhouette width and Calinski-Harabasz can't estimate number of
 29 |     clusters \code{nc=1}. If 1 is included, a Duda-Hart test is applied
 30 |     and 1 is estimated if this is not significant.}
 31 |   \item{criterion}{one of \code{"asw"}, \code{"multiasw"} or
 32 |     \code{"ch"}. Determines whether average silhouette width (as given
 33 |     out by  \code{\link[cluster]{pam}}/\code{\link[cluster]{clara}}, or
 34 |     as computed by \code{\link{distcritmulti}} if \code{"multiasw"} is
 35 |     specified; recommended for large data sets with \code{usepam=FALSE})
 36 |     or Calinski-Harabasz is applied. Note that the original
 37 |     Calinski-Harabasz index is not defined for dissimilarities; if
 38 |     dissimilarity data is run with \code{criterion="ch"}, the
 39 |     dissimilarity-based generalisation in Hennig and Liao (2013) is
 40 |     used.}
 41 |   \item{usepam}{logical. If \code{TRUE}, \code{\link[cluster]{pam}} is
 42 |     used, otherwise \code{\link[cluster]{clara}} (recommended for large
 43 |     datasets with 2,000 or more observations; dissimilarity matrices can
 44 |     not be used with \code{\link[cluster]{clara}}).}
 45 |   \item{scaling}{either a logical value or a numeric vector of length
 46 |     equal to the number of variables. If \code{scaling} is a numeric
 47 |     vector with length equal to the number of variables, then each
 48 |     variable is divided by the corresponding value from \code{scaling}.
 49 |     If \code{scaling} is \code{TRUE} then scaling is done by dividing
 50 |     the (centered) variables by their root-mean-square, and if
 51 |     \code{scaling} is \code{FALSE}, no scaling is done.}
 52 |   \item{alpha}{numeric between 0 and 1, tuning constant for
 53 |     \code{\link{dudahart2}} (only used for 1-cluster test).} 
 54 |   \item{diss}{logical flag: if \code{TRUE} (default for \code{dist} or
 55 |     \code{dissimilarity}-objects), then \code{data} will be considered
 56 |     as a dissimilarity matrix (and the potential number of clusters 1
 57 |     will be ignored).  If \code{FALSE}, then \code{data} will
 58 |     be considered as a matrix of observations by variables.}
 59 |   \item{critout}{logical. If \code{TRUE}, the criterion value is printed
 60 |     out for every number of clusters.}
 61 |   \item{ns}{passed on to \code{\link{distcritmulti}} if
 62 |     \code{criterion="multiasw"}.}
 63 |   \item{seed}{passed on to \code{\link{distcritmulti}} if
 64 |     \code{criterion="multiasw"}.}
 65 |   \item{...}{further arguments to be transferred to
 66 |     \code{\link[cluster]{pam}} or \code{\link[cluster]{clara}}.}
 67 | }
 68 | 
 69 | \note{
 70 |   \code{\link[cluster]{clara}} and \code{\link[cluster]{pam}}
 71 |   can handle \code{NA}-entries (see their documentation) but
 72 |   \code{\link{dudahart2}} cannot. Therefore \code{NA} should not occur
 73 |   if 1 is in \code{krange}.
 74 | }
 75 |   
 76 | \value{
 77 |   A list with components
 78 |   \item{pamobject}{The output of the optimal run of the
 79 |     \code{\link[cluster]{pam}}-function.}
 80 |   \item{nc}{the optimal number of clusters.}
 81 |   \item{crit}{vector of criterion values for numbers of
 82 |     clusters. \code{crit[1]} is the p-value of the Duda-Hart test
 83 |     if 1 is in \code{krange} and \code{diss=FALSE}.}
 84 | }
 85 | \author{Christian Hennig
 86 |   \email{christian.hennig@unibo.it}
 87 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
 88 | }
 89 | \references{
 90 |   Calinski, R. B., and Harabasz, J. (1974) A Dendrite Method for Cluster 
 91 | Analysis, \emph{Communications in Statistics}, 3, 1-27.
 92 | 
 93 |   Duda, R. O. and Hart, P. E. (1973) \emph{Pattern Classification and
 94 |   Scene Analysis}. Wiley, New York.
 95 | 
 96 |   Hennig, C. and Liao, T. (2013) How to find an appropriate clustering
 97 |   for mixed-type variables with application to socio-economic
 98 |   stratification, \emph{Journal of the Royal Statistical Society, Series
 99 |   C Applied Statistics}, 62, 309-369.
100 | 
101 |   Kaufman, L. and Rousseeuw, P.J. (1990). "Finding Groups in Data:
102 |      An Introduction to Cluster Analysis". Wiley, New York.
103 | }
104 | \seealso{
105 |   \code{\link[cluster]{pam}}, \code{\link[cluster]{clara}}
106 |   \code{\link{distcritmulti}}
107 | }
108 | \examples{
109 |   options(digits=3)
110 |   set.seed(20000)
111 |   face <- rFace(50,dMoNo=2,dNoEy=0,p=2)
112 |   pk1 <- pamk(face,krange=1:5,criterion="asw",critout=TRUE)
113 |   pk2 <- pamk(face,krange=1:5,criterion="multiasw",ns=2,critout=TRUE)
114 | # "multiasw" is better for larger data sets, use larger ns then.
115 |   pk3 <- pamk(face,krange=1:5,criterion="ch",critout=TRUE)
116 | }
117 | \keyword{cluster}% at least one, from doc/KEYWORDS
118 | \keyword{multivariate}
119 | 
120 | 
121 | 
122 | 


--------------------------------------------------------------------------------
/man/piridge.Rd:
--------------------------------------------------------------------------------
 1 | \name{piridge}
 2 | \alias{piridge}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Ridgeline Pi-function}
 5 | \description{
 6 |   The Pi-function is given in (6) in Ray and Lindsay, 2005. Equating it
 7 |   to the mixture proportion yields locations of two-component Gaussian
 8 |   mixture density extrema. 
 9 | }
10 | \usage{
11 | piridge(alpha, mu1, mu2, Sigma1, Sigma2, showplot=FALSE)
12 | }
13 | %- maybe also `usage' for other objects documented here.
14 | \arguments{
15 |   \item{alpha}{sequence of values between 0 and 1 for which the Pi-function
16 | is computed.}
17 |   \item{mu1}{mean vector of component 1.}
18 |   \item{mu2}{mean vector of component 2.}
19 |   \item{Sigma1}{covariance matrix of component 1.}
20 |   \item{Sigma2}{covariance matrix of component 2.}
21 |   \item{showplot}{logical. If \code{TRUE}, the Pi-function is plotted
22 | against \code{alpha}.}
23 | }
24 | 
25 | \value{
26 |   Vector of values of the Pi-function for values of \code{alpha}.
27 | }
28 | 
29 | \references{
30 |   Ray, S. and Lindsay, B. G. (2005) The Topography of Multivariate 
31 | Normal Mixtures, \emph{Annals of Statistics}, 33, 2042-2065.
32 | }
33 | \author{Christian Hennig
34 |   \email{christian.hennig@unibo.it}
35 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
36 | }
37 | \examples{
38 |   q <- piridge(seq(0,1,0.1),c(1,1),c(2,5),diag(2),diag(2))
39 | }
40 | \keyword{cluster}% at least one, from doc/KEYWORDS
41 | \keyword{multivariate}
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/man/piridge.zeroes.Rd:
--------------------------------------------------------------------------------
 1 | \name{piridge.zeroes}
 2 | \alias{piridge.zeroes}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Extrema of two-component Gaussian mixture}
 5 | \description{
 6 |   By use of the Pi-function in Ray and Lindsay, 2005, locations of 
 7 |   two-component Gaussian mixture density extrema or saddlepoints are computed. 
 8 | }
 9 | \usage{
10 | piridge.zeroes(prop, mu1, mu2, Sigma1, Sigma2, alphamin=0,
11 |                           alphamax=1,by=0.001)
12 | }
13 | %- maybe also `usage' for other objects documented here.
14 | \arguments{
15 |   \item{prop}{proportion of mixture component 1.}
16 |   \item{mu1}{mean vector of component 1.}
17 |   \item{mu2}{mean vector of component 2.}
18 |   \item{Sigma1}{covariance matrix of component 1.}
19 |   \item{Sigma2}{covariance matrix of component 2.}
20 |   \item{alphamin}{minimum alpha value.}
21 |   \item{alphamax}{maximum alpha value.}
22 |   \item{by}{interval between alpha-values where to look for extrema.}
23 | }
24 | 
25 | \value{
26 |   list with components
27 |   \item{number.zeroes}{number of zeroes of Pi-function, i.e.,
28 |     extrema or saddlepoints of density.}
29 |   \item{estimated.roots}{estimated \code{alpha}-values at which extrema
30 |   or saddlepoints occur.}
31 | }
32 | 
33 | \references{
34 |   Ray, S. and Lindsay, B. G. (2005) The Topography of Multivariate 
35 | Normal Mixtures, \emph{Annals of Statistics}, 33, 2042-2065.
36 | }
37 | \author{Christian Hennig
38 |   \email{christian.hennig@unibo.it}
39 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
40 | }
41 | \examples{
42 |   q <- piridge.zeroes(0.2,c(1,1),c(2,5),diag(2),diag(2),by=0.1)
43 | }
44 | \keyword{cluster}% at least one, from doc/KEYWORDS
45 | \keyword{multivariate}
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/man/plotcluster.Rd:
--------------------------------------------------------------------------------
  1 | \name{plotcluster}
  2 | \alias{plotcluster}
  3 | %- Also NEED an `\alias' for EACH other topic documented here.
  4 | \title{Discriminant projection plot.}
  5 | \description{
  6 |   Plots to distinguish given classes by ten available projection
  7 |   methods. Includes classical discriminant
  8 |   coordinates, methods to project differences in
  9 |   mean and covariance structure, asymmetric methods (separation of a
 10 |   homogeneous class from a heterogeneous one), local neighborhood-based
 11 |   methods and methods based on robust covariance matrices.
 12 |   One-dimensional data is plotted against the cluster number.
 13 | }
 14 | \usage{
 15 | plotcluster(x, clvecd, clnum=NULL,
 16 |             method=ifelse(is.null(clnum),"dc","awc"),
 17 |             bw=FALSE,
 18 |             ignorepoints=FALSE, ignorenum=0, pointsbyclvecd=TRUE,
 19 |             xlab=NULL, ylab=NULL,
 20 |             pch=NULL, col=NULL, ...)
 21 | }
 22 | %- maybe also `usage' for other objects documented here.
 23 | \arguments{
 24 |   \item{x}{the data matrix; a numerical object which can be coerced
 25 |     to a matrix.}
 26 |   \item{clvecd}{vector of class numbers which can be coerced into
 27 |     integers; length must equal
 28 |     \code{nrow(xd)}.}
 29 |   \item{method}{one of
 30 |     \describe{
 31 |       \item{"dc"}{usual discriminant coordinates, see \code{\link{discrcoord}},}
 32 |       \item{"bc"}{Bhattacharyya coordinates, first coordinate showing
 33 | 	mean differences, second showing covariance matrix differences,
 34 | 	see \code{\link{batcoord}},}
 35 |       \item{"vbc"}{variance dominated Bhattacharyya coordinates,
 36 | 	see \code{\link{batcoord}},}
 37 |       \item{"mvdc"}{added mean and variance differences optimizing
 38 | 	coordinates, see \code{\link{mvdcoord}},}
 39 |       \item{"adc"}{asymmetric discriminant coordinates, see
 40 | 	\code{\link{adcoord}},}
 41 |       \item{"awc"}{asymmetric discriminant coordinates with weighted
 42 | 	observations, see \code{\link{awcoord}},}
 43 |       \item{"arc"}{asymmetric discriminant coordinates with weighted
 44 | 	observations and robust MCD-covariance matrix,
 45 | 	see \code{\link{awcoord}},}
 46 |       \item{"nc"}{neighborhood based coordinates,
 47 | 	see \code{\link{ncoord}},}
 48 |       \item{"wnc"}{neighborhood based coordinates with weighted neighborhoods,
 49 | 	see \code{\link{ncoord}},}
 50 |       \item{"anc"}{asymmetric neighborhood based coordinates,
 51 | 	see \code{\link{ancoord}}.}
 52 |     }
 53 |     Note that "bc", "vbc", "adc", "awc", "arc" and "anc" assume that
 54 |     there are only two classes.}
 55 |   \item{clnum}{integer. Number of the class which is attempted to plot
 56 |     homogeneously by "asymmetric methods", which are the methods
 57 |     assuming that there are only two classes, as indicated above.
 58 |     \code{clnum} is ignored for methods "dc" and "nc".}
 59 |   \item{bw}{logical. If \code{TRUE}, the classes are distinguished by
 60 |     symbols, and the default color is black/white.
 61 |     If \code{FALSE}, the classes are distinguished by
 62 |     colors, and the default symbol is \code{pch=1}.}
 63 |   \item{ignorepoints}{logical. If \code{TRUE}, points with label
 64 |     \code{ignorenum} in \code{clvecd} are ignored in the computation for
 65 |     \code{method} and are only projected afterwards onto the resulting
 66 |     units. If \code{pch=NULL}, the plot symbol for these points is "N".}
 67 |   \item{ignorenum}{one of the potential values of the components of
 68 |     \code{clvecd}. Only has effect if \code{ignorepoints=TRUE}, see above.}
 69 |   \item{pointsbyclvecd}{logical. If \code{TRUE} and \code{pch=NULL}
 70 |     and/or \code{col=NULL}, some hopefully suitable
 71 |     plot symbols (numbers and letters) and colors are chosen to
 72 |     distinguish the values of \code{clvecd}, starting with "1"/"black"
 73 |     for the cluster with the smallest \code{clvecd}-code (note that
 74 |     colors for clusters with numbers larger than minimum number
 75 |     \code{+3} are drawn at random from all available colors).
 76 |     \code{FALSE} produces
 77 |     potentially less reasonable (but nonrandom) standard colors and symbols if
 78 |     \code{method} is "dc" or "nc", and will only distinguish whether
 79 |     \code{clvecd=clnum} or not for the other methods.} 
 80 |   \item{xlab}{label for x-axis. If \code{NULL}, a default text is used.}
 81 |   \item{ylab}{label for y-axis. If \code{NULL}, a default text is used.}
 82 |   \item{pch}{plotting symbol, see \code{\link{par}}.
 83 |     If \code{NULL}, the default is used.}
 84 |   \item{col}{plotting color, see \code{\link{par}}.
 85 |     If \code{NULL}, the default is used.}
 86 |   \item{...}{additional parameters passed to \code{plot} or the
 87 |     projection methods.}
 88 | }
 89 | % \details{
 90 | 
 91 | % }
 92 | \note{
 93 |   For some of the asymmetric methods, the area in the plot
 94 |   occupied by the "homogeneous class" (see \code{clnum} above) may be
 95 |   very small, and it may make sense to run \code{plotcluster} a second
 96 |   time specifying plot parameters \code{xlim} and \code{ylim} in a
 97 |   suitable way. It often makes sense to magnify the plot region
 98 |   containing the homogeneous class in this way
 99 |   so that its separation from the rest can be
100 |   seen more clearly.
101 |   }
102 |   
103 | 
104 | \references{
105 |   Hennig, C. (2004) Asymmetric linear dimension reduction for classification.
106 |   Journal of Computational and Graphical Statistics 13, 930-945 .
107 |   
108 |   Hennig, C. (2005)  A method for visual cluster validation.  In:
109 |   Weihs, C. and Gaul, W. (eds.): Classification - The Ubiquitous
110 |   Challenge. Springer, Heidelberg 2005, 153-160.
111 |   
112 |   Seber, G. A. F. (1984). \emph{Multivariate Observations}. New York: Wiley.
113 | 
114 |   Fukunaga (1990). \emph{Introduction to Statistical Pattern
115 |   Recognition} (2nd ed.). Boston: Academic Press.
116 | }
117 | \author{Christian Hennig
118 |   \email{christian.hennig@unibo.it}
119 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
120 | 
121 | \seealso{
122 |   \code{\link{discrcoord}}, \code{\link{batcoord}},
123 |   \code{\link{mvdcoord}}, \code{\link{adcoord}},
124 |   \code{\link{awcoord}}, \code{\link{ncoord}},
125 |   \code{\link{ancoord}}.
126 | 
127 |   \code{\link{discrproj}} is an interface to all these projection methods.
128 | 
129 |   \code{\link{rFace}} for generation of the example data used below.
130 | }
131 | 
132 | \examples{
133 | set.seed(4634)
134 | face <- rFace(300,dMoNo=2,dNoEy=0)
135 | grface <- as.integer(attr(face,"grouping"))
136 | plotcluster(face,grface)
137 | plotcluster(face,grface==1)
138 | plotcluster(face,grface, clnum=1, method="vbc")
139 | }
140 | \keyword{multivariate}% at least one, from doc/KEYWORDS
141 | \keyword{classif}% __ONLY ONE__ keyword per line
142 | 
143 | 
144 | 
145 | 


--------------------------------------------------------------------------------
/man/rFace.Rd:
--------------------------------------------------------------------------------
 1 | \name{rFace}
 2 | \alias{rFace}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{"Face-shaped" clustered benchmark datasets}
 5 | \description{
 6 |   Generates "face-shaped" clustered benchmark datasets.
 7 |   This is based on a collaboration with Martin Maechler.
 8 | }
 9 | \usage{
10 | rFace(n, p = 6, nrep.top = 2, smile.coef = 0.6, dMoNo = 1.2, dNoEy = 1)
11 | }
12 | %- maybe also `usage' for other objects documented here.
13 | \arguments{
14 |   \item{n}{integer greater or equal to 10. Number of points.}
15 |   \item{p}{integer greater or equal to 2. Dimension.}
16 |   \item{nrep.top}{integer. Number of repetitions of the hair-top point.}
17 |   \item{smile.coef}{numeric. Coefficient for quadratic term used for
18 |     generation of mouth-points. Positive values=>smile.}
19 |   \item{dMoNo}{number. Distance from mouth to nose.}
20 |   \item{dNoEy}{number. Minimum vertical distance from mouth to eyes.}
21 | }
22 | \details{
23 |   The function generates a nice benchmark example for cluster
24 |   analysis. 
25 |   There are six "clusters" in this data, of which the first five are
26 |   clearly homogeneous patterns, but with different distributional
27 |   shapes and different qualities of separation. The clusters are
28 |   distinguished only in the first two dimensions. The attribute
29 |   \code{grouping} is a factor giving the cluster numbers, see below.
30 |   The sixth group of
31 |   points corresponds to some hairs, and is rather a collection of
32 |   outliers than a cluster in itself. This group contains
33 |   \code{nrep.top+2} points. Of the remaining points, 20\% belong to
34 |   cluster 1, the chin (quadratic function plus noise).
35 |   10\% belong to cluster 2, the right eye (Gaussian). 30\% belong to
36 |   cluster 3, the mouth (Gaussian/squared Gaussian). 
37 |   20\% belong to cluster 4, the nose (Gaussian/gamma), and
38 |   20\% belong to cluster 5, the left eye (uniform).
39 | 
40 |   The distributions of the further
41 |   variables are homogeneous over
42 |   all points. The third dimension is exponentially distributed, the
43 |   fourth dimension is Cauchy distributed, all further distributions are
44 |   Gaussian.
45 |   
46 |   Please consider the source code for exact generation of the clusters.
47 | }
48 | \value{
49 |   An \code{n} times \code{p} numeric matrix with attributes
50 |   \item{grouping}{a factor giving the cluster memberships of the points.}
51 |   \item{indexlist}{a list of six vectors containing the indices of points
52 |     belonging to the six groups.}
53 | }
54 | 
55 | \author{
56 |   
57 |   Christian Hennig
58 |   \email{christian.hennig@unibo.it}
59 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
60 | 
61 | \examples{
62 |   set.seed(4634)
63 |   face <- rFace(600,dMoNo=2,dNoEy=0)
64 |   grface <- as.integer(attr(face,"grouping"))
65 |   plot(face, col = grface)
66 | #  pairs(face, col = grface, main ="rFace(600,dMoNo=2,dNoEy=0)")
67 | }
68 | \keyword{data}% at least one, from doc/KEYWORDS
69 | 


--------------------------------------------------------------------------------
/man/randcmatrix.Rd:
--------------------------------------------------------------------------------
 1 | \name{randcmatrix}
 2 | \alias{randcmatrix}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Random partition matrix}
 5 | \description{
 6 |   For use within \code{regmix}. Generates a random
 7 |   0-1-matrix with \code{n} rows
 8 |   and \code{cln} columns so that every row contains exactly one one and
 9 |   every columns contains at least \code{p+3} ones.
10 | }
11 | \usage{
12 | randcmatrix(n,cln,p)
13 | }
14 | %- maybe also `usage' for other objects documented here.
15 | \arguments{
16 |   \item{n}{positive integer. Number of rows.}
17 |   \item{cln}{positive integer. Number of columns.}
18 |   \item{p}{positive integer. See above.}
19 | }
20 | \value{
21 |   An \code{n*cln}-matrix.
22 | }
23 | \author{Christian Hennig
24 |   \email{christian.hennig@unibo.it}
25 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
26 | \seealso{
27 |   \code{\link{regmix}}
28 | }
29 | \examples{
30 | set.seed(111)
31 | randcmatrix(10,2,1)
32 | }
33 | \keyword{cluster}% at least one, from doc/KEYWORDS
34 | 
35 | 


--------------------------------------------------------------------------------
/man/randconf.Rd:
--------------------------------------------------------------------------------
 1 | \name{randconf}
 2 | \alias{randconf}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Generate a sample indicator vector}
 5 | \description{
 6 |   Generates a logical vector of length \code{n} with \code{p TRUE}s.
 7 | }
 8 | \usage{
 9 | randconf(n, p)
10 | }
11 | %- maybe also `usage' for other objects documented here.
12 | \arguments{
13 |   \item{n}{positive integer.}
14 |   \item{p}{positive integer.}
15 | }
16 | 
17 | \value{
18 |   A logical vector.
19 | }
20 | 
21 | \author{Christian Hennig
22 |   \email{christian.hennig@unibo.it}
23 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
24 | 
25 | \seealso{\code{\link{sample}}}
26 | 
27 | \examples{
28 |   randconf(10,3)
29 | }
30 | \keyword{distribution}% at least one, from doc/KEYWORDS
31 | 
32 | 


--------------------------------------------------------------------------------
/man/ridgeline.Rd:
--------------------------------------------------------------------------------
 1 | \name{ridgeline}
 2 | \alias{ridgeline}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Ridgeline computation}
 5 | \description{
 6 |   Computes \eqn{(\alpha*\Sigma_1^{-1}+(1-\alpha)*\Sigma_2^{-1})^{-1}* 
 7 |     \alpha*(\Sigma_1^{-1}*\mu_1)+(1-\alpha)*(\Sigma_2^{-1}*\mu_2)}{%
 8 |   (alpha*Sigma1^{-1}+(1-alpha)*Sigma2^{-1})^{-1}* 
 9 |     alpha*(Sigma_1^{-1}*mu_1)+(1-alpha)*(Sigma_2^{-1}*mu_2)}
10 |   as required for the
11 |   computation of the ridgeline (Ray and Lindsay, 2005) to find
12 |   all density extrema of a two-component Gaussian mixture with
13 |   mean vectors mu1 and mu2 and covariance matrices Sigma1, Sigma2.
14 | }
15 | \usage{
16 | ridgeline(alpha, mu1, mu2, Sigma1, Sigma2)
17 | }
18 | %- maybe also `usage' for other objects documented here.
19 | \arguments{
20 |   \item{alpha}{numeric between 0 and 1.}
21 |   \item{mu1}{mean vector of component 1.}
22 |   \item{mu2}{mean vector of component 2.}
23 |   \item{Sigma1}{covariance matrix of component 1.}
24 |   \item{Sigma2}{covariance matrix of component 2.}
25 | }
26 | 
27 | \value{
28 |   A vector. See above.
29 | }
30 | 
31 | \references{
32 |   Ray, S. and Lindsay, B. G. (2005) The Topography of Multivariate 
33 | Normal Mixtures, \emph{Annals of Statistics}, 33, 2042-2065.
34 | }
35 | \author{Christian Hennig
36 |   \email{christian.hennig@unibo.it}
37 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
38 | }
39 | \examples{
40 |   ridgeline(0.5,c(1,1),c(2,5),diag(2),diag(2))
41 | }
42 | \keyword{cluster}% at least one, from doc/KEYWORDS
43 | \keyword{multivariate}
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/man/ridgeline.diagnosis.Rd:
--------------------------------------------------------------------------------
 1 | \name{ridgeline.diagnosis}
 2 | \alias{ridgeline.diagnosis}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Ridgeline plots, ratios and unimodality}
 5 | \description{
 6 |   Computes ridgeline ratios and unimodality checks for pairs of components
 7 |   given the parameters of a Gaussian mixture. Produces ridgeline plots.
 8 | }
 9 | \usage{
10 |   ridgeline.diagnosis (propvector,muarray,Sigmaarray,
11 |                                 k=length(propvector),
12 |                                 ipairs="all", compute.ratio=TRUE,by=0.001,
13 |                                 ratiocutoff=NULL,ridgelineplot="matrix")
14 | 
15 | }
16 | %- maybe also `usage' for other objects documented here.
17 | \arguments{
18 |   \item{propvector}{vector of component proportions. Length must be
19 |     number of components, and must sum up to 1.}
20 |   \item{muarray}{matrix of component means (different components are in
21 |     different columns).}
22 |   \item{Sigmaarray}{three dimensional array with component covariance
23 |     matrices (the third dimension refers to components).}
24 |   \item{k}{integer. Number of components.}
25 |   \item{ipairs}{\code{"all"} or list of vectors of two integers. If
26 |     \code{ipairs="all"}, computations are carried out for all pairs of
27 |     components. Otherwise, ipairs gives the pairs of components for
28 |     which computations are carried out.}
29 |   \item{compute.ratio}{logical. If \code{TRUE}, a matrix of ridgeline
30 |     ratios is computed, see Hennig (2010a).}
31 |   \item{by}{real between 0 and 1. Interval width for density computation
32 |     along the ridgeline.}
33 |   \item{ratiocutoff}{real between 0 and 1. If not \code{NULL}, the
34 |     \code{connection.matrix} (see below) is computed by checking whether
35 |     ridgeline ratios between components are below \code{ratiocutoff}.}
36 |   \item{ridgelineplot}{one of \code{"none"}, \code{"matrix"},
37 |     \code{"pairwise"}. If \code{"matrix"}, a matrix of pairwise
38 |     ridgeline plots (see Hennig 2010b) will be plotted. If
39 |     \code{"pairwise"}, pairwise ridgeline plots are plotted (you may
40 |     want to set \code{par(ask=TRUE)} to see them all). No plotting if
41 |     \code{"none"}.}
42 | }
43 | 
44 | \value{
45 |   A list with components
46 | \item{merged.clusters}{vector of integers, stating for every mixture
47 |   component the number of the cluster of components that would be merged
48 |   by merging connectivity components of the graph specified by
49 |   \code{connection.matrix}.}
50 | \item{connection.matrix}{zero-one matrix, in which a one means that the
51 |   mixture of the corresponding pair of components of the original
52 |   mixture is either unimodel (if \code{ratiocutoff=NULL}) or that their
53 |   ridgeline ratio is above \code{ratiocutoff}. If \code{ipairs!="all"},
54 |   ignored pairs always have 0 in this matrix, same for
55 |   \code{ratio.matrix}.}
56 | \item{ratio.matrix}{matrix with entries between 0 und 1, giving the
57 |   ridgeline ratio, which is the density minimum of the mixture of the
58 |   corresponding pair of components along the ridgeline divided by the
59 |   minimum of the two maxima closest to the beginning and the end of the
60 |   ridgeline.}
61 | }
62 | 
63 | \references{
64 |   Hennig, C. (2010a) Methods for merging Gaussian mixture components,
65 |   \emph{Advances in Data Analysis and Classification}, 4, 3-34.
66 |   
67 |   Hennig, C. (2010b) Ridgeline plot and clusterwise stability as tools
68 |   for merging Gaussian mixture components. To appear in
69 |   \emph{Classification as a Tool for Research}, Proceedings of IFCS
70 |   2009.
71 |   
72 |   Ray, S. and Lindsay, B. G. (2005) The Topography of Multivariate 
73 | Normal Mixtures, \emph{Annals of Statistics}, 33, 2042-2065.
74 | }
75 | \author{Christian Hennig
76 |   \email{christian.hennig@unibo.it}
77 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
78 | }
79 | 
80 | \seealso{
81 |   \code{\link{ridgeline}}, \code{\link{dridgeline}},
82 |   \code{\link{piridge}}, \code{\link{piridge.zeroes}}
83 | }
84 | 
85 | \examples{
86 |   muarray <- cbind(c(0,0),c(0,0.1),c(10,10))
87 |   sigmaarray <- array(c(diag(2),diag(2),diag(2)),dim=c(2,2,3))
88 |   rd <-
89 |   ridgeline.diagnosis(c(0.5,0.3,0.2),muarray,sigmaarray,ridgelineplot="matrix",by=0.1)
90 |   # Much slower but more precise with default by=0.001.
91 | }
92 | \keyword{cluster}% at least one, from doc/KEYWORDS
93 | \keyword{multivariate}
94 | 
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/man/simmatrix.Rd:
--------------------------------------------------------------------------------
 1 | \name{simmatrix}
 2 | \alias{simmatrix}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Extracting intersections between clusters from fpc-object}
 5 | \description{
 6 |   Extracts the information about the size of the intersections
 7 |   between representative
 8 |   Fixed Point Clusters (FPCs) of stable groups from the output of
 9 |   the FPC-functions \code{\link{fixreg}} and \code{\link{fixmahal}}.
10 | }
11 | \usage{
12 | simmatrix(fpcobj)
13 | }
14 | %- maybe also `usage' for other objects documented here.
15 | \arguments{
16 |   \item{fpcobj}{an object of class \code{rfpc} or \code{mfpc}.}
17 | }
18 | 
19 | \value{
20 |   A non-negative real-valued vector giving the number of points in
21 |   the intersections of the representative FPCs of stable groups.
22 | }
23 | 
24 | \author{Christian Hennig
25 |   \email{christian.hennig@unibo.it}
26 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
27 | }
28 | \note{The intersection between representative FPCs no. \code{i} and
29 |   \code{j} is at position \code{\link{sseg}(i,j)}.}
30 | 
31 | \seealso{
32 |   \code{\link{fixmahal}},
33 |   \code{\link{fixreg}},
34 |   \code{\link{sseg}}
35 | }
36 | 
37 | \examples{
38 | set.seed(190000)
39 | data(tonedata)
40 | # Note: If you do not use the installed package, replace this by
41 | # tonedata <- read.table("(path/)tonedata.txt", header=TRUE)
42 | attach(tonedata)
43 | tonefix <- fixreg(stretchratio,tuned,mtf=1,ir=20)
44 | simmatrix(tonefix)[sseg(2,3)]
45 | }
46 | \keyword{utilities}% at least one, from doc/KEYWORDS
47 | 
48 | 


--------------------------------------------------------------------------------
/man/solvecov.Rd:
--------------------------------------------------------------------------------
 1 | \name{solvecov}
 2 | \alias{solvecov}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Inversion of (possibly singular) symmetric matrices}
 5 | \description{
 6 |   Tries to invert a matrix by \code{solve}. If this fails because of
 7 |   singularity, an
 8 |   eigenvector decomposition is computed, and eigenvalues below
 9 |   \code{1/cmax} are replaced by \code{1/cmax}, i.e., \code{cmax} will be
10 |   the corresponding eigenvalue of the inverted matrix.
11 | }
12 | \usage{
13 | solvecov(m, cmax = 1e+10)
14 | }
15 | %- maybe also `usage' for other objects documented here.
16 | \arguments{
17 |   \item{m}{a numeric symmetric matrix.}
18 |   \item{cmax}{a positive value, see above.}
19 | }
20 | 
21 | \value{
22 |   A list with the following components:
23 |   \item{inv}{the inverted matrix}
24 |   \item{coll}{\code{TRUE} if \code{solve} failed because of singularity.}
25 | }
26 | \author{Christian Hennig
27 |   \email{christian.hennig@unibo.it}
28 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
29 | 
30 | \seealso{\code{\link{solve}}, \code{\link{eigen}}}
31 | 
32 | \examples{
33 |   x <- c(1,0,0,1,0,1,0,0,1)
34 |   dim(x) <- c(3,3)
35 |   solvecov(x)
36 | }
37 | \keyword{array}% at least one, from doc/KEYWORDS
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/man/sseg.Rd:
--------------------------------------------------------------------------------
 1 | \name{sseg}
 2 | \alias{sseg}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Position in a similarity vector}
 5 | \description{
 6 |   \code{sseg(i,j)} gives the position of the similarity of objects
 7 |   \code{i} and \code{j} in the similarity vectors produced by
 8 |   \code{fixreg} and \code{fixmahal}.
 9 |   \code{sseg} should only be used as an auxiliary function in
10 |   \code{fixreg} and \code{fixmahal}.
11 | }
12 | \usage{
13 | sseg(i, j)
14 | }
15 | %- maybe also `usage' for other objects documented here.
16 | \arguments{
17 |   \item{i}{positive integer.}
18 |   \item{j}{positive integer.}
19 | }
20 | \value{A positive integer.
21 | }
22 | \author{Christian Hennig
23 |   \email{christian.hennig@unibo.it}
24 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
25 | 
26 | \examples{
27 | sseg(3,4)
28 | }
29 | \keyword{utilities}% at least one, from doc/KEYWORDS
30 | 


--------------------------------------------------------------------------------
/man/stupidkaven.Rd:
--------------------------------------------------------------------------------
 1 | \name{stupidkaven}
 2 | \alias{stupidkaven}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Stupid average dissimilarity random clustering}
 5 | \description{
 6 |   Picks k random starting points from given dataset to initialise k
 7 |   clusters. Then, one by one, the point not yet assigned to any cluster
 8 |   with smallest average dissimilarity to the points of any already
 9 |   existing cluster is assigned to that
10 |   cluster, until all points are assigned. This is a random versione of
11 |   average linkage clustering, see
12 |   Akhanli and Hennig (2020).
13 | }
14 | \usage{
15 |   stupidkaven(d,k)
16 | }
17 | %- maybe also `usage' for other objects documented here.
18 | \arguments{
19 |   \item{d}{\code{dist}-object or dissimilarity matrix.}
20 |   \item{k}{integer. Number of clusters.}
21 | }
22 | 
23 | % \details{
24 | % }
25 | \value{
26 |   The clustering vector (values 1 to \code{k}, length number of objects
27 |   behind \code{d}), 
28 | }
29 | \references{
30 | Akhanli, S. and Hennig, C. (2020) Calibrating and aggregating cluster
31 | validity indexes for context-adapted comparison of clusterings.
32 | \emph{Statistics and Computing}, 30, 1523-1544,
33 | \url{https://link.springer.com/article/10.1007/s11222-020-09958-2}, \url{https://arxiv.org/abs/2002.01822}
34 | 
35 | 
36 | }
37 | \author{Christian Hennig
38 |   \email{christian.hennig@unibo.it}
39 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
40 | }
41 | 
42 | \seealso{
43 |   \code{\link{stupidkcentroids}}, \code{\link{stupidknn}}, \code{\link{stupidkfn}}
44 | }
45 | 
46 | \examples{
47 |   set.seed(20000)
48 |   options(digits=3)
49 |   face <- rFace(200,dMoNo=2,dNoEy=0,p=2)
50 |   stupidkaven(dist(face),3) 
51 | }
52 | \keyword{multivariate}% at least one, from doc/KEYWORDS
53 | \keyword{cluster}% __ONLY ONE__ keyword per line
54 | 
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/man/stupidkcentroids.Rd:
--------------------------------------------------------------------------------
 1 | \name{stupidkcentroids}
 2 | \alias{stupidkcentroids}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Stupid k-centroids random clustering}
 5 | \description{
 6 |   Picks k random centroids from given dataset and assigns every point to
 7 |   closest centroid. This is called stupid k-centroids in Hennig (2019).
 8 | }
 9 | \usage{
10 |   stupidkcentroids(xdata, k, distances = inherits(xdata, "dist"))
11 | }
12 | %- maybe also `usage' for other objects documented here.
13 | \arguments{
14 |   \item{xdata}{cases*variables data, \code{dist}-object or dissimilarity
15 |     matrix, see \code{distances}.}
16 |   \item{k}{integer. Number of clusters.}
17 |   \item{distances}{logical. If \code{TRUE}, \code{xdata} is interpreted
18 |     as distances.}
19 | }
20 | 
21 | % \details{
22 | % }
23 | \value{
24 |   A list with components
25 |   \item{partition}{vector if integers 1 to \code{k}, of length equal to
26 |     number of objects, indicates to which cluster an object belongs.}
27 |   \item{centroids}{vector of integers of length \code{k}, indicating the
28 |     centroids of the clusters (observation number).}
29 |   \item{distances}{as argument \code{distances}.}
30 | }
31 | \references{
32 | 
33 | Hennig, C. (2019) Cluster validation by measurement of clustering
34 |   characteristics relevant to the user. In C. H. Skiadas (ed.)
35 |   \emph{Data Analysis and Applications 1: Clustering and Regression,
36 |   Modeling-estimating, Forecasting and Data Mining, Volume 2}, Wiley,
37 |   New York 1-24,
38 |     \url{https://arxiv.org/abs/1703.09282}
39 | 
40 | Akhanli, S. and Hennig, C. (2020) Calibrating and aggregating cluster
41 | validity indexes for context-adapted comparison of clusterings.
42 | \emph{Statistics and Computing}, 30, 1523-1544,
43 | \url{https://link.springer.com/article/10.1007/s11222-020-09958-2}, \url{https://arxiv.org/abs/2002.01822}
44 | 
45 | }
46 | \author{Christian Hennig
47 |   \email{christian.hennig@unibo.it}
48 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
49 | }
50 | 
51 | \seealso{
52 |   \code{\link{stupidknn}}, \code{\link{stupidkfn}}, \code{\link{stupidkaven}}
53 | }
54 | 
55 | \examples{
56 |   set.seed(20000)
57 |   options(digits=3)
58 |   face <- rFace(200,dMoNo=2,dNoEy=0,p=2)
59 |   stupidkcentroids(dist(face),3) 
60 | }
61 | \keyword{multivariate}% at least one, from doc/KEYWORDS
62 | \keyword{cluster}% __ONLY ONE__ keyword per line
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/man/stupidkfn.Rd:
--------------------------------------------------------------------------------
 1 | \name{stupidkfn}
 2 | \alias{stupidkfn}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Stupid farthest neighbour random clustering}
 5 | \description{
 6 |   Picks k random starting points from given dataset to initialise k
 7 |   clusters. Then, one by one, a point not yet assigned to any cluster
 8 |   is assigned to that
 9 |   cluster, until all points are assigned. The point/cluster pair to be
10 |   used is picked according to the smallest distance of a point to the
11 |   farthest point to it in any of the already existing clusters as in
12 |   complete linkage clustering, see
13 |   Akhanli and Hennig (2020).
14 | }
15 | \usage{
16 |   stupidkfn(d,k)
17 | }
18 | %- maybe also `usage' for other objects documented here.
19 | \arguments{
20 |   \item{d}{\code{dist}-object or dissimilarity matrix.}
21 |   \item{k}{integer. Number of clusters.}
22 | }
23 | 
24 | % \details{
25 | % }
26 | \value{
27 |   The clustering vector (values 1 to \code{k}, length number of objects
28 |   behind \code{d}), 
29 | }
30 | \references{
31 | Akhanli, S. and Hennig, C. (2020) Calibrating and aggregating cluster
32 | validity indexes for context-adapted comparison of clusterings.
33 | \emph{Statistics and Computing}, 30, 1523-1544,
34 | \url{https://link.springer.com/article/10.1007/s11222-020-09958-2}, \url{https://arxiv.org/abs/2002.01822}
35 | 
36 | }
37 | \author{Christian Hennig
38 |   \email{christian.hennig@unibo.it}
39 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
40 | }
41 | 
42 | \seealso{
43 |   \code{\link{stupidkcentroids}}, \code{\link{stupidknn}}, \code{\link{stupidkaven}}
44 | }
45 | 
46 | \examples{
47 |   set.seed(20000)
48 |   options(digits=3)
49 |   face <- rFace(200,dMoNo=2,dNoEy=0,p=2)
50 |   stupidkfn(dist(face),3) 
51 | }
52 | \keyword{multivariate}% at least one, from doc/KEYWORDS
53 | \keyword{cluster}% __ONLY ONE__ keyword per line
54 | 
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/man/stupidknn.Rd:
--------------------------------------------------------------------------------
 1 | \name{stupidknn}
 2 | \alias{stupidknn}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Stupid nearest neighbour random clustering}
 5 | \description{
 6 |   Picks k random starting points from given dataset to initialise k
 7 |   clusters. Then, one by one, the point not yet assigned to any cluster
 8 |   that is closest to an already assigned point is assigned to that
 9 |   cluster, until all points are assigned. This is called stupid nearest
10 |   neighbour clustering in Hennig (2019).
11 | }
12 | \usage{
13 |   stupidknn(d,k)
14 | }
15 | %- maybe also `usage' for other objects documented here.
16 | \arguments{
17 |   \item{d}{\code{dist}-object or dissimilarity matrix.}
18 |   \item{k}{integer. Number of clusters.}
19 | }
20 | 
21 | % \details{
22 | % }
23 | \value{
24 |   The clustering vector (values 1 to \code{k}, length number of objects
25 |   behind \code{d}), 
26 | }
27 | \references{
28 | Hennig, C. (2019) Cluster validation by measurement of clustering
29 |   characteristics relevant to the user. In C. H. Skiadas (ed.)
30 |   \emph{Data Analysis and Applications 1: Clustering and Regression,
31 |   Modeling-estimating, Forecasting and Data Mining, Volume 2}, Wiley,
32 |   New York 1-24,
33 |     \url{https://arxiv.org/abs/1703.09282}
34 | 
35 | Akhanli, S. and Hennig, C. (2020) Calibrating and aggregating cluster
36 | validity indexes for context-adapted comparison of clusterings.
37 | \emph{Statistics and Computing}, 30, 1523-1544,
38 | \url{https://link.springer.com/article/10.1007/s11222-020-09958-2}, \url{https://arxiv.org/abs/2002.01822}
39 | 
40 | }
41 | \author{Christian Hennig
42 |   \email{christian.hennig@unibo.it}
43 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
44 | }
45 | 
46 | \seealso{
47 |   \code{\link{stupidkcentroids}}, \code{\link{stupidkfn}}, \code{\link{stupidkaven}}
48 | }
49 | 
50 | \examples{
51 |   set.seed(20000)
52 |   options(digits=3)
53 |   face <- rFace(200,dMoNo=2,dNoEy=0,p=2)
54 |   stupidknn(dist(face),3) 
55 | }
56 | \keyword{multivariate}% at least one, from doc/KEYWORDS
57 | \keyword{cluster}% __ONLY ONE__ keyword per line
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/man/tdecomp.Rd:
--------------------------------------------------------------------------------
 1 | \name{tdecomp}
 2 | \alias{tdecomp}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Root of singularity-corrected eigenvalue decomposition}
 5 | \description{
 6 |   Computes transposed eigenvectors of matrix \code{m} times diagonal of
 7 |   square root of eigenvalues so that eigenvalues smaller than 1e-6 are
 8 |   set to 1e-6.
 9 | }
10 | \usage{
11 |   tdecomp(m)
12 | }
13 | %- maybe also `usage' for other objects documented here.
14 | \arguments{
15 |   \item{m}{a symmetric matrix of minimum format 2*2.}
16 | }
17 | \details{
18 |   Thought for use in \code{discrcoord} only.}
19 | \value{
20 |   a matrix.
21 | }
22 | \author{Christian Hennig
23 |   \email{christian.hennig@unibo.it}
24 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
25 | }
26 | \note{
27 |   Thought for use within \code{\link{discrcoord}} only.
28 | }
29 | 
30 | \examples{
31 | x <- rnorm(10)
32 | y <- rnorm(10)
33 | z <- cov(cbind(x,y))
34 | round(tdecomp(z),digits=2)
35 | }
36 | \keyword{array}% at least one, from doc/KEYWORDS
37 | 
38 | 


--------------------------------------------------------------------------------
/man/tonedata.Rd:
--------------------------------------------------------------------------------
 1 | \name{tonedata}
 2 | \alias{tonedata}
 3 | \docType{data}
 4 | \title{Tone perception data}
 5 | \description{
 6 |   The tone perception data stem
 7 |   from an experiment of Cohen (1980) and have been analyzed in de Veaux
 8 |   (1989).
 9 |   A pure fundamental tone was played to a
10 |   trained musician. Electronically generated overtones were added, determined 
11 |   by a stretching ratio of \code{stretchratio}. \code{stretchratio=2.0}
12 |   corresponds to the harmonic pattern
13 |   usually heard in traditional definite pitched instruments. The musician was
14 |   asked to tune an adjustable tone to the octave above the fundamental tone.
15 |   \code{tuned} gives the ratio of the adjusted tone to the fundamental,
16 |   i.e. \code{tuned=2.0} would be the correct tuning for all
17 |   \code{stretchratio}-values.
18 |   The data analyzed here belong to 150 trials 
19 |   with the same musician. In the original study, there were four further
20 |   musicians. 
21 | }
22 | \usage{data(tonedata)}
23 | \format{A data frame with 2 variables \code{stretchratio} and
24 |   \code{tuned} and 150 cases.}
25 | \source{Cohen, E. A. (1980) \emph{Inharmonic tone
26 |     perception}. Unpublished Ph.D. dissertation, Stanford University}
27 | \references{
28 |   de Veaux, R. D. (1989) Mixtures of Linear Regressions,
29 |   \emph{Computational Statistics and Data Analysis} 8, 227-245.
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/man/unimodal.ind.Rd:
--------------------------------------------------------------------------------
 1 | \name{unimodal.ind}
 2 | \alias{unimodal.ind}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Is a fitted denisity unimodal or not?}
 5 | \description{
 6 |   Checks whether a series of fitted density values (such as given out as
 7 |   \code{y}-component of  \code{\link{density}}) is unimodal.
 8 | }
 9 | \usage{
10 |   unimodal.ind(y)
11 | }
12 | %- maybe also `usage' for other objects documented here.
13 | \arguments{
14 |   \item{y}{numeric vector of fitted density values in order of
15 |     increasing x-values such as given out as
16 |   \code{y}-component of  \code{\link{density}}.}
17 | }
18 | 
19 | \value{
20 |   Logical. \code{TRUE} if unimodal.
21 | }
22 | 
23 | \author{Christian Hennig
24 |   \email{christian.hennig@unibo.it}
25 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
26 | }
27 | \examples{
28 | unimodal.ind(c(1,3,3,4,2,1,0,0))
29 | }
30 | \keyword{univar}% at least one, from doc/KEYWORDS
31 | % \keyword{multivariate}
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/man/weightplots.Rd:
--------------------------------------------------------------------------------
 1 | \name{weightplots}
 2 | \alias{weightplots}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Ordered posterior plots}
 5 | \description{
 6 |   Ordered posterior plots for Gaussian mixture components, see Hennig (2010).
 7 | }
 8 | \usage{
 9 |   weightplots(z, clusternumbers="all", clustercol=2,
10 |                         allcol=grey(0.2+((1:ncol(z))-1)*
11 |                           0.6/(ncol(z)-1)),
12 |                         lty=rep(1,ncol(z)),clusterlwd=3,
13 |                         legendposition="none",
14 |                         weightcutoff=0.01,ask=TRUE, ...)
15 | }
16 | %- maybe also `usage' for other objects documented here.
17 | \arguments{
18 |   \item{z}{matrix with rows corresponding to observations and columns
19 |     corresponding to mixture components. Entries are probabilities that
20 |     an observation has been generated by a mixture component. These will
21 |     normally be estimated a posteriori probabilities, as generated as
22 |     component \code{z} of the output object from
23 |     \code{\link[mclust]{summary.mclustBIC}}.}
24 |   \item{clusternumbers}{\code{"all"} or vector of integers. Numbers of
25 |     components for which plots are drawn.}
26 |   \item{clustercol}{colour used for the main components for which a
27 |     plot is drawn.}
28 |   \item{allcol}{colours used for respective other components in plots in
29 |     which they are not main components.}
30 |   \item{lty}{line types for components.}
31 |   \item{clusterlwd}{numeric. Line width for main component.}
32 |   \item{legendposition}{\code{"none"} or vector with two coordinates in
33 |     the plot, where a legend should be printed.}
34 |   \item{weightcutoff}{numeric between 0 and 1. Observations are only taken
35 |     into account for which the posterior probability for the main
36 |     component is larger than this.}
37 |   \item{ask}{logical. If \code{TRUE}, it sets \code{par(ask=TRUE)} in
38 |     the beginning and \code{par(ask=FALSE)} after all plots were showed.}
39 |   \item{...}{further parameters to be passed on to \code{\link{legend}}.}
40 | }
41 | 
42 | \value{
43 |   Invisible matrix of posterior probabilities \code{z} from
44 |   \code{mclustsummary}.
45 | }
46 | 
47 | \details{
48 |   Shows posterior probabilities for observations belonging to all
49 |   mixture components on the y-axis, with points ordered by posterior
50 |   probability for main component.
51 | }
52 | 
53 | \references{
54 | Hennig, C. (2010) Methods for merging Gaussian mixture components,
55 |   \emph{Advances in Data Analysis and Classification}, 4, 3-34.
56 | }
57 | \author{Christian Hennig
58 |   \email{christian.hennig@unibo.it}
59 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
60 | }
61 | 
62 | \examples{
63 |   require(mclust)
64 |   require(MASS)
65 |   data(crabs)
66 |   dc <- crabs[,4:8]
67 |   cm <- mclustBIC(crabs[,4:8],G=9,modelNames="EEE")
68 |   scm <- summary(cm,crabs[,4:8])
69 |   weightplots(scm$z,clusternumbers=1:3,ask=FALSE)
70 |   weightplots(scm$z,clusternumbers=1:3,allcol=1:9, ask=FALSE,
71 |               legendposition=c(5,0.7))
72 | # Remove ask=FALSE to have time to watch the plots.
73 | }
74 | \keyword{multivariate}
75 | \keyword{cluster}
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/man/wfu.Rd:
--------------------------------------------------------------------------------
 1 | \name{wfu}
 2 | \alias{wfu}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Weight function (for Mahalabobis distances)}
 5 | \description{
 6 |   Function of the elements of \code{md}, which is 1 for arguments smaller
 7 |   than \code{ca}, 0 for arguments larger than \code{ca2} and linear
 8 |   (default: continuous) in between.
 9 |   
10 |   Thought for use in \code{fixmahal}.
11 | }
12 | \usage{
13 | wfu(md, ca, ca2, a1 = 1/(ca - ca2), a0 = -a1 * ca2)
14 | }
15 | %- maybe also `usage' for other objects documented here.
16 | \arguments{
17 |   \item{md}{vector of positive numericals.}
18 |   \item{ca}{positive numerical.}
19 |   \item{ca2}{positive numerical.}
20 |   \item{a1}{numerical. Slope.}
21 |   \item{a0}{numerical. Intercept.}
22 | }
23 | 
24 | \value{
25 |   A vector of numericals between 0 and 1.
26 | }
27 | 
28 | \author{Christian Hennig
29 |   \email{christian.hennig@unibo.it}
30 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}}
31 | 
32 | \seealso{\code{\link{fixmahal}}}
33 | 
34 | \examples{
35 |   md <- seq(0,10,by=0.1)
36 |   round(wfu(md,ca=5,ca2=8),digits=2)
37 | }
38 | \keyword{arith}% at least one, from doc/KEYWORDS
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/man/xtable.Rd:
--------------------------------------------------------------------------------
 1 | \name{xtable}
 2 | \alias{xtable}
 3 | 
 4 | %- Also NEED an `\alias' for EACH other topic documented here.
 5 | \title{Partition crosstable with empty clusters}
 6 | \description{
 7 |   This produces a crosstable between two integer vectors (partitions) of
 8 |   the same length with a given maximum vector entry \code{k} so that the
 9 |   size of the table is \code{k*k} with zeroes for missing entries
10 |   between 1 and \code{k} (the command \code{\link{table}} does pretty
11 |   much the same thing but will leave out missing entries). 
12 | }
13 | \usage{
14 | xtable(c1,c2,k)
15 | }
16 | \arguments{
17 |   \item{c1}{vector of integers.}
18 |   \item{c2}{vector of integers of same length as \code{c1}.}
19 |   \item{k}{integer. Must be larger or equal to maximum entry in
20 |     \code{c1} and \code{c2}.}
21 | }
22 | 
23 | \value{
24 |   A matrix of dimensions \code{c(k,k)}. Entry \code{[i,j]} gives the
25 |   number of places in which \code{c1==i & c2==j}. 
26 | }
27 | \author{Christian Hennig
28 |   \email{christian.hennig@unibo.it}
29 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
30 | }
31 | 
32 | \seealso{
33 |   \code{\link{table}}
34 | }
35 | \examples{
36 |   c1 <- 1:3
37 |   c2 <- c(1,1,2)
38 |   xtable(c1,c2,3)
39 | }
40 | \keyword{array}% at least one, from doc/KEYWORDS
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/man/zmisclassification.matrix.Rd:
--------------------------------------------------------------------------------
 1 | \name{zmisclassification.matrix}
 2 | \alias{zmisclassification.matrix}
 3 | %- Also NEED an `\alias' for EACH other topic documented here.
 4 | \title{Matrix of misclassification probabilities between mixture components}
 5 | \description{
 6 |    Matrix of misclassification probabilities in a mixture distribution
 7 |    between two mixture components from estimated posterior probabilities
 8 |    regardless of component parameters, see Hennig (2010).
 9 | }
10 | \usage{
11 | zmisclassification.matrix(z,pro=NULL,clustering=NULL,
12 |                                       ipairs="all",symmetric=TRUE,
13 |                                       stat="max")
14 | 
15 | }
16 | %- maybe also `usage' for other objects documented here.
17 | \arguments{
18 |   \item{z}{matrix of posterior probabilities for observations (rows) to
19 |     belong to mixture components (columns), so entries need to sum up to
20 |     1 for each row.}
21 |   \item{pro}{vector of component proportions, need to sum up to
22 |     1. Computed from \code{z} as default.}
23 |   \item{clustering}{vector of integers giving the estimated mixture
24 |     components for every observation. Computed from \code{z} as
25 |     default.}
26 |   \item{ipairs}{\code{"all"} or list of vectors of two integers. If
27 |     \code{ipairs="all"}, computations are carried out for all pairs of
28 |     components. Otherwise, ipairs gives the pairs of components for
29 |     which computations are carried out.}
30 |   \item{symmetric}{logical. If \code{TRUE}, the matrix is symmetrised,
31 |     see parameter \code{stat}.}
32 |   \item{stat}{\code{"max"} or \code{"mean"}. The statistic by which the
33 |     two misclassification probabilities are aggregated if
34 |     \code{symmetric=TRUE}.} 
35 | }
36 | 
37 | \value{
38 |   A matrix with the (symmetrised, if required) misclassification
39 |   probabilities between each pair of mixture components. If
40 |   \code{symmetric=FALSE}, matrix entry \code{[i,j]} is the estimated
41 |   probability that an observation generated by component
42 |   \code{j} is classified to component \code{i} by maximum a posteriori rule.
43 | }
44 | 
45 | \references{
46 |   Hennig, C. (2010) Methods for merging Gaussian mixture components,
47 |   \emph{Advances in Data Analysis and Classification}, 4, 3-34.
48 | }
49 | \author{Christian Hennig
50 |   \email{christian.hennig@unibo.it}
51 |   \url{https://www.unibo.it/sitoweb/christian.hennig/en/}
52 | }
53 | \seealso{
54 |   \code{\link{confusion}}
55 |   }
56 | \examples{
57 |   set.seed(12345)
58 |   m <- rpois(20,lambda=5)
59 |   dim(m) <- c(5,4)
60 |   m <- m/apply(m,1,sum)
61 |   round(zmisclassification.matrix(m,symmetric=FALSE),digits=2) 
62 | }
63 | \keyword{cluster}% at least one, from doc/KEYWORDS
64 | \keyword{multivariate}
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/tests/fpctests_notallin.R:
--------------------------------------------------------------------------------
  1 | # This tests a few things that are not run in the examples.
  2 | 
  3 | library(fpc)
  4 | library(MASS)
  5 | library(diptest)
  6 | library(mclust)
  7 | options(digits=3)
  8 | 
  9 | set.seed(4634)
 10 | face <- rFace(300,dMoNo=2,dNoEy=0,p=3)
 11 | grface <- as.integer(attr(face,"grouping"))
 12 | # discrproj(face,grface, clnum=1, method="bc")$units
 13 | discrproj(face,grface, clnum=1, method="anc")$units
 14 | discrproj(face,grface, clnum=1, method="awc")$units
 15 | 
 16 | 
 17 | pamk(face,krange=1:5,criterion="ch",usepam=FALSE,critout=TRUE)
 18 | 
 19 | set.seed(20000)
 20 | face50 <- rFace(50,dMoNo=2,dNoEy=0,p=2)
 21 | pamk(dist(face50),krange=1:5,criterion="asw",critout=TRUE)
 22 | 
 23 | x <- c(1,2,3,6,6,7,8,120)
 24 | ff8 <- fixmahal(x)
 25 | summary(ff8)
 26 |   # ...dataset a bit too small for the defaults...
 27 | ff9 <- fixmahal(x, mnc=3, startn=3)
 28 | summary(ff9)
 29 | 
 30 | set.seed(776655)
 31 | v1 <- rnorm(100)
 32 | v2 <- rnorm(100)
 33 | d1 <- sample(1:5,100,replace=TRUE)
 34 | d2 <- sample(1:4,100,replace=TRUE)
 35 | ldata <- cbind(v1,v2,d1,d2)
 36 | fr <- flexmixedruns(ldata,
 37 |     continuous=2,discrete=2,simruns=1,initial.cluster=c(rep(1,5),rep(2,45),
 38 |                                         rep(3,50)),
 39 |                     control=list(minprior=0.1),
 40 |                     n.cluster=3,allout=FALSE)
 41 | print(fr$optsummary)
 42 | 
 43 | dface <- dist(face50)
 44 | 
 45 | 
 46 | hclusttreeCBI(face50,minlevel=2,method="complete",scaling=TRUE)
 47 | 
 48 | disthclusttreeCBI(dface,minlevel=2,method="complete")
 49 | 
 50 | noisemclustCBI(face50,G=1:5,emModelNames="VVV",nnk=2)
 51 | 
 52 | distnoisemclustCBI(dface,G=5,emModelNames="EEE",nnk=2,
 53 |                         mdsmethod="classical",
 54 |                         mdsdim=2)
 55 | 
 56 | mahalCBI(face50,clustercut=0.5)
 57 | 
 58 | set.seed(20000)
 59 | face100 <- rFace(100,dMoNo=2,dNoEy=0,p=2)
 60 | cbf <- clusterboot(face100,B=2,clustermethod=speccCBI,showplots=TRUE,k=6,seed=50000)
 61 | cbf$nc
 62 | cbf$noisemethod
 63 | cbf$bootmethod
 64 | # suppressWarnings(if(require(tclust))
 65 | # print(clusterboot(face100,B=2,clustermethod=tclustCBI,showplots=TRUE,k=5,seed=50000,noisemethod=TRUE)))
 66 | 
 67 | 
 68 | complete3 <- cutree(hclust(dface),3)
 69 | 
 70 | cluster.stats(dface,complete3,G2=TRUE)
 71 | 
 72 | set.seed(55667788)
 73 | 
 74 | data(crabs)
 75 | dc <- crabs[,4:8]
 76 | cmo <- mclustBIC(crabs[,4:8],G=9,modelNames="EEE")
 77 | # set.seed(12345)
 78 | cm <- mclustBIC(crabs[,4:8],G=9,modelNames="EEE",
 79 |                 initialization=list(noise=(1:200)[sample(200,50)]))
 80 | 
 81 | 
 82 | scm <- summary(cm,crabs[,4:8])
 83 | scmo <- summary(cmo,crabs[,4:8])
 84 | 
 85 | set.seed(334455)
 86 | summary(mergenormals(crabs[,4:8],scm,method="ridge.ratio",by=0.05))
 87 | summary(mergenormals(crabs[,4:8],scmo,method="ridge.uni",by=0.05))
 88 | # summary(mergenormals(crabs[,4:8],scm,method="diptantrum",by=0.05))
 89 | # summary(mergenormals(crabs[,4:8],scmo,method="dipuni",by=0.05))
 90 | # summary(mergenormals(crabs[,4:8],scm,method="predictive",M=2))
 91 | 
 92 | set.seed(20000)
 93 | x1 <- rnorm(50)
 94 | y <- rnorm(100)
 95 | x2 <- rnorm(40,mean=20)
 96 | x3 <- rnorm(10,mean=25,sd=100)
 97 | x0 <- cbind(c(x1,x2,x3),y)
 98 | 
 99 | prediction.strength(x0,M=10,Gmax=4,
100 |                            clustermethod=noisemclustCBI,
101 |                            classification="qda")
102 | 
103 | prediction.strength(dist(x0),M=10,Gmax=4,
104 |                            clustermethod=claraCBI,
105 |                            classification="centroids")
106 | 
107 | 
108 | set.seed(20000)
109 | xdata <- c(rnorm(10,0,1),rnorm(10,8,1))
110 | clustermethod=c("claraCBI","dbscanCBI")
111 | 
112 | clustermethodpars <- list()
113 | clustermethodpars[[1]] <- clustermethodpars[[2]] <- list()
114 | clustermethodpars[[2]]$eps <- 2
115 | clustermethodpars[[2]]$MinPts <- 2
116 | cbs <-  clusterbenchstats(xdata,G=3,clustermethod=clustermethod,
117 |     distmethod=rep(TRUE,2),ncinput=c(TRUE,FALSE),scaling=FALSE,
118 |     clustermethodpars=clustermethodpars,nnruns=2,kmruns=2,fnruns=1,avenruns=1,useallg=TRUE)
119 | 
120 | print(cbs$sstat,aggregate=TRUE,weights=c(1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1),include.othernc=cbs$cm$othernc)
121 | print(cbs$qstat,aggregate=TRUE,weights=c(1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,1),include.othernc=cbs$cm$othernc)
122 | 
123 | 
124 | 


--------------------------------------------------------------------------------