├── data
    ├── Atom.rda
    ├── Hepta.rda
    ├── Lsun3D.rda
    ├── Target.rda
    ├── Tetra.rda
    ├── Chainlink.rda
    ├── EngyTime.rda
    ├── GolfBall.rda
    ├── Leukemia.rda
    ├── WingNut.rda
    ├── TwoDiamonds.rda
    └── dietary_survey_IBS.rda
├── img
    ├── Fig0.png
    ├── Fig1.png
    ├── Fig2.png
    ├── Fig3.png
    └── Fig4.png
├── .gitignore
├── .Rbuildignore
├── FCPS.Rproj
├── R
    ├── GapStatistic.R
    ├── ClusterNormalize.R
    ├── internalMDSestimate.R
    ├── ClusterCreateClassification.R
    ├── EstimateRadiusByDistance.R
    ├── CA_dist_fun.R
    ├── ClusterDaviesBouldinIndex.R
    ├── CrossEntropyClustering.R
    ├── HCLclustering.R
    ├── NeuralGasClustering.R
    ├── pdfClustering.R
    ├── RobustTrimmedClustering.R
    ├── MeanShiftClustering.R
    ├── PenalizedRegressionBasedClustering.R
    ├── ClusterRename.R
    ├── ClusterRedefine.R
    ├── ClusterRenameDescendingSize.R
    ├── ClusterMCC.R
    ├── ConsensusClustering.R
    ├── RandomForestClustering.R
    ├── ClusterInterDistances.R
    ├── PAMclustering.R
    ├── QTclustering.R
    ├── Spectrum.R
    ├── HierarchicalClusterDists.R
    ├── FannyClustering.R
    ├── cluster_analysis_fun.R
    ├── MSTclustering.R
    ├── LargeApplicationClustering.R
    ├── HDDClustering.R
    ├── ClusterEqualWeighting.R
    ├── ModelBasedClustering.R
    ├── GenieClustering.R
    ├── ClusterCount.R
    ├── MinimaxLinkageClustering.R
    ├── ClusterApply.R
    ├── DivisiveAnalysisClustering.R
    ├── ProjectionPursuitClustering.R
    ├── ClusterShannonInfo.R
    ├── ClusterDistances.R
    ├── OPTICSclustering.R
    ├── APclustering.R
    ├── MinimalEnergyClustering.R
    ├── SOMclustering.R
    ├── AgglomerativeNestingClustering.R
    └── HierarchicalClustering.R
├── inst
    └── CITATION
├── man
    ├── Tetra.Rd
    ├── WingNut.Rd
    ├── GolfBall.Rd
    ├── Target.Rd
    ├── EngyTime.Rd
    ├── Hepta.Rd
    ├── GapStatistic.Rd
    ├── Atom.Rd
    ├── cluster_analysis_fun.Rd
    ├── ClusterRename.Rd
    ├── Lsun3D.Rd
    ├── TwoDiamonds.Rd
    ├── Chainlink.Rd
    ├── ClusterNormalize.Rd
    ├── EstimateRadiusByDistance.Rd
    ├── ClusterCreateClassification.Rd
    ├── dietary_survey_IBS.Rd
    ├── HCLclustering.Rd
    ├── Leukemia.Rd
    ├── ClusterRenameDescendingSize.Rd
    ├── ClusterCount.Rd
    ├── ClusterRedefine.Rd
    ├── ClusterDendrogram.Rd
    ├── ClusterEqualWeighting.Rd
    ├── ClusterDistances.Rd
    ├── NeuralGasClustering.Rd
    ├── EntropyOfDataField.Rd
    ├── StatPDEdensity.Rd
    ├── HierarchicalClusterDists.Rd
    ├── kmeansdist.Rd
    ├── MeanShiftClustering.Rd
    ├── ClusterInterDistances.Rd
    ├── pdfClustering.Rd
    ├── ClusterChallenge.Rd
    ├── MSTclustering.Rd
    ├── RandomForestClustering.Rd
    ├── CrossEntropyClustering.Rd
    ├── GenieClustering.Rd
    ├── ClusterDaviesBouldinIndex.Rd
    ├── FannyClustering.Rd
    ├── MinimaxLinkageClustering.Rd
    ├── sotaClustering.Rd
    ├── MinimalEnergyClustering.Rd
    ├── HierarchicalClusterData.Rd
    ├── ClusterUpsamplingMinority.Rd
    ├── ClusterDunnIndex.Rd
    ├── ModelBasedClustering.Rd
    ├── ADPclustering.Rd
    ├── SpectralClustering.Rd
    ├── PenalizedRegressionBasedClustering.Rd
    ├── LargeApplicationClustering.Rd
    ├── ClusterShannonInfo.Rd
    ├── ModelBasedVarSelClustering.Rd
    ├── SharedNearestNeighborClustering.Rd
    ├── PAMClustering.Rd
    ├── SOMclustering.Rd
    ├── DivisiveAnalysisClustering.Rd
    ├── QTclustering.Rd
    ├── Spectrum.Rd
    ├── HierarchicalClustering.Rd
    ├── FCPS-package.Rd
    ├── OPTICSclustering.Rd
    └── ClusterMCC.Rd
└── DESCRIPTION


/data/Atom.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/data/Atom.rda


--------------------------------------------------------------------------------
/img/Fig0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/img/Fig0.png


--------------------------------------------------------------------------------
/img/Fig1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/img/Fig1.png


--------------------------------------------------------------------------------
/img/Fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/img/Fig2.png


--------------------------------------------------------------------------------
/img/Fig3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/img/Fig3.png


--------------------------------------------------------------------------------
/img/Fig4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/img/Fig4.png


--------------------------------------------------------------------------------
/data/Hepta.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/data/Hepta.rda


--------------------------------------------------------------------------------
/data/Lsun3D.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/data/Lsun3D.rda


--------------------------------------------------------------------------------
/data/Target.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/data/Target.rda


--------------------------------------------------------------------------------
/data/Tetra.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/data/Tetra.rda


--------------------------------------------------------------------------------
/data/Chainlink.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/data/Chainlink.rda


--------------------------------------------------------------------------------
/data/EngyTime.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/data/EngyTime.rda


--------------------------------------------------------------------------------
/data/GolfBall.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/data/GolfBall.rda


--------------------------------------------------------------------------------
/data/Leukemia.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/data/Leukemia.rda


--------------------------------------------------------------------------------
/data/WingNut.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/data/WingNut.rda


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | .PreyClustering
6 | 


--------------------------------------------------------------------------------
/data/TwoDiamonds.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/data/TwoDiamonds.rda


--------------------------------------------------------------------------------
/data/dietary_survey_IBS.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mthrun/FCPS/HEAD/data/dietary_survey_IBS.rda


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^.*\Rproj$
4 | ^.*\.PreyClustering$
5 | ^.*\dependencies.R$


--------------------------------------------------------------------------------
/FCPS.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageInstallArgs: --no-multiarch --with-keep.source
17 | PackageCheckArgs: --as-cran
18 | 


--------------------------------------------------------------------------------
/R/GapStatistic.R:
--------------------------------------------------------------------------------
 1 | GapStatistic=function(Data,ClusterNoMax,ClusterFun,...){
 2 |   if (!requireNamespace('cluster',quietly = TRUE)) {
 3 |     message(
 4 |       'Subordinate clustering package (cluster) is missing. No computations are performed.
 5 |             Please install the package which is defined in "Suggests".'
 6 |     )
 7 |     return(
 8 |       list(
 9 |         Cls = rep(1, nrow(Data)),
10 |         Object = "Subordinate clustering package (cluster) is missing.
11 |                 Please install the package which is defined in 'Suggests'."
12 |       )
13 |     )
14 |   }
15 |   
16 |   out=cluster::clusGap(x=Data, FUNcluster=ClusterFun, K.max=ClusterNoMax, ...)
17 |   return(out)
18 | }
19 | 


--------------------------------------------------------------------------------
/R/ClusterNormalize.R:
--------------------------------------------------------------------------------
 1 | ClusterNormalize <- function(Cls) {
 2 |   # Values in Cls are consistently recoded to positive consecutive integers
 3 |   
 4 |   uniqueLabels <- sort(na.last = T, unique(Cls))
 5 |   numberOfLabels <- length(uniqueLabels)
 6 |   unique2Cls <- NULL #  initializing the vector
 7 |   
 8 |   for (i in 1:length(Cls)) {
 9 |     # calculating the indexes of elements of Cls in uniqueLabels
10 |     unique2Cls <- c(unique2Cls, which(uniqueLabels == Cls[i]))
11 |   }
12 |   
13 |   if (numberOfLabels > 0) {
14 |     normalizedLabels <- c(1:numberOfLabels)
15 |     normalizedCls <- normalizedLabels[unique2Cls]
16 |   }
17 |   else {
18 |     normalizedLabels <- Cls
19 |   }
20 |   
21 |   return(normalizedCls)
22 | }


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite the 'FCPS' R package in publications, please use:")
 2 | 
 3 | bibentry(bibtype = "article",
 4 |   title = "Fundamental clustering algorithms suite",
 5 |   author = c(person(given="Michael Christoph"),person(given="Quirin", family="Stier")),
 6 |   publisher = "Elsevier",
 7 |   journal ="SoftwareX",
 8 | volume = "13",
 9 | pages = "100642",
10 | year = "2021",
11 | issn = "2352-7110",
12 | doi = "10.1016/j.softx.2020.100642",
13 | url = "https://www.sciencedirect.com/science/article/pii/S2352711020303551",
14 |   textVersion =
15 |   paste(
16 | "Thrun, M. C., & Stier, Q.:",
17 | "Fundamental Clustering Algorithms Suite",
18 | "SoftwareX, Vol. 13(C),",
19 | "pp. 100642,",
20 | "doi 10.1016/j.softx.2020.100642,",
21 | "2021."
22 | ))


--------------------------------------------------------------------------------
/man/Tetra.Rd:
--------------------------------------------------------------------------------
 1 | \name{Tetra}
 2 | \alias{Tetra}
 3 | 
 4 | \docType{data}
 5 | \title{
 6 | Tetra introduced in [Ultsch, 1993]
 7 | }
 8 | \description{
 9 | Almost touching clusters. Detailed description of dataset and its clustering challenge is provided in [Thrun/Ultsch, 2020].
10 | }
11 | \usage{data("Tetra")}
12 | 
13 | \details{
14 | Size 400, Dimensions 3, stored in \code{Tetra$Data}
15 | 
16 | Classes 4, stored in \code{Tetra$Cls}
17 | }
18 | 
19 | \references{
20 | [Ultsch, 1993]  Ultsch, A.: Self-organizing neural networks for visualisation and classification, Information and classification, (pp. 307-313), Springer, 1993.
21 | 
22 | [Thrun/Ultsch, 2020]  Thrun, M. C., & Ultsch, A.: Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems, Data in Brief, Vol. 30(C), pp. 105501, \doi{10.1016/j.dib.2020.105501}, 2020.
23 | }
24 | \examples{
25 | data(Tetra)
26 | str(Tetra)
27 | }
28 | \keyword{Tetra}
29 | \keyword{datasets}
30 | \keyword{FCPS}
31 | 


--------------------------------------------------------------------------------
/man/WingNut.Rd:
--------------------------------------------------------------------------------
 1 | \name{WingNut}
 2 | \alias{WingNut}
 3 | 
 4 | \docType{data}
 5 | \title{
 6 | WingNut introduced in [Ultsch, 2005]
 7 | }
 8 | \description{
 9 | Density vs. distance. Detailed description of dataset and its clustering challenge is provided in [Thrun/Ultsch, 2020].
10 | }
11 | \usage{data("WingNut")}
12 | 
13 | \details{
14 | Size 1016, Dimensions 2, stored in \code{WingNut$Data}
15 | 
16 | Classes 2, stored in \code{WingNut$Cls}
17 | }
18 | 
19 | \references{
20 | [Ultsch, 2005]  Ultsch, A.: Clustering wih SOM: U* C, Proc. Proceedings of the 5th Workshop on Self-Organizing Maps, Vol. 2, pp. 75-82, 2005.
21 | 
22 | [Thrun/Ultsch, 2020]  Thrun, M. C., & Ultsch, A.: Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems, Data in Brief, Vol. 30(C), pp. 105501, \doi{10.1016/j.dib.2020.105501}, 2020.
23 | }
24 | \examples{
25 | data(WingNut)
26 | str(WingNut)
27 | }
28 | \keyword{WingNut}
29 | \keyword{datasets}
30 | \keyword{FCPS}
31 | 


--------------------------------------------------------------------------------
/man/GolfBall.Rd:
--------------------------------------------------------------------------------
 1 | \name{GolfBall}
 2 | \alias{GolfBall}
 3 | 
 4 | \docType{data}
 5 | \title{
 6 | GolfBall introduced in [Ultsch, 2005]
 7 | }
 8 | \description{
 9 | No clusters at all. Detailed description of dataset and its clustering challenge is provided in [Thrun/Ultsch, 2020].
10 | }
11 | \usage{data("GolfBall")}
12 | 
13 | \details{
14 | Size 4002, Dimensions 3, stored in \code{GolfBall$Data}
15 | 
16 | Classes 1, stored in \code{GolfBall$Cls}
17 | }
18 | 
19 | \references{
20 | [Ultsch, 2005]  Ultsch, A.: Clustering wih SOM: U* C, Proc. Proceedings of the 5th Workshop on Self-Organizing Maps, Vol. 2, pp. 75-82, 2005.
21 | 
22 | [Thrun/Ultsch, 2020]  Thrun, M. C., & Ultsch, A.: Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems, Data in Brief, Vol. 30(C), pp. 105501, \doi{10.1016/j.dib.2020.105501}, 2020.
23 | 
24 | }
25 | \examples{
26 | data(GolfBall)
27 | str(GolfBall)
28 | }
29 | \keyword{GolfBall}
30 | \keyword{datasets}
31 | \keyword{FCPS}


--------------------------------------------------------------------------------
/R/internalMDSestimate.R:
--------------------------------------------------------------------------------
 1 | internalMDSestimate=function(Distances){
 2 |   #smacof is not fast enough
 3 |   
 4 |   if (!requireNamespace('ProjectionBasedClustering',quietly = TRUE)) {
 5 |     message(
 6 |       'Subordinate clustering package (ProjectionBasedClustering) is missing. No computations are performed.
 7 |             Please install the package which is defined in "Suggests".'
 8 |     )
 9 |     return("Subordinate clustering package (ProjectionBasedClustering) is missing.
10 |                 Please install the package which is defined in 'Suggests'."
11 |     )
12 |   }
13 |   
14 |   s=c()
15 |   #fast mds
16 |   for(i in 1:(nrow(Distances)-1)){
17 |     s[i]= suppressWarnings(ProjectionBasedClustering::MDS(Distances,OutputDimension = i)$Stress)
18 |     if(i>2)
19 |       if(s[i]==s[i-1]& s[i]==s[i-2])
20 |         break;
21 |   }
22 |   i=which.min(s)
23 |   data=ProjectionBasedClustering::MDS(Distances,OutputDimension = i)$ProjectedPoints
24 |   return(data)
25 | }


--------------------------------------------------------------------------------
/man/Target.Rd:
--------------------------------------------------------------------------------
 1 | \name{Target}
 2 | \alias{Target}
 3 | 
 4 | \docType{data}
 5 | \title{
 6 | Target introduced in [Ultsch, 2005].
 7 | }
 8 | \description{
 9 | Detailed description of dataset and its clustering challenge of outliers is provided in [Thrun/Ultsch, 2020]
10 | }
11 | \usage{data("Target")}
12 | 
13 | \details{
14 | Size 770, Dimensions 2, stored in \code{Target$Data}
15 | 
16 | Classes 6, stored in \code{Target$Cls}
17 | }
18 | 
19 | \references{
20 | [Ultsch, 2005]  Ultsch, A.: U* C: Self-organized Clustering with Emergent Feature Maps, Proc. Lernen, Wissensentdeckung und Adaptivitaet (LWA/FGML), pp. 240-244, Saarbruecken, Germany, 2005.
21 | 
22 | [Thrun/Ultsch, 2020]  Thrun, M. C., & Ultsch, A.: Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems, Data in Brief, Vol. 30(C), pp. 105501, \doi{10.1016/j.dib.2020.105501}, 2020.
23 | }
24 | \examples{
25 | data(Target)
26 | str(Target)
27 | }
28 | \keyword{Target}
29 | \keyword{datasets}
30 | \keyword{FCPS}


--------------------------------------------------------------------------------
/man/EngyTime.Rd:
--------------------------------------------------------------------------------
 1 | \name{EngyTime}
 2 | \alias{EngyTime}
 3 | 
 4 | \docType{data}
 5 | \title{
 6 | EngyTime introduced in [Baggenstoss, 2002].
 7 | }
 8 | \description{
 9 | Gaussian mixture. Detailed description of dataset and its clustering challenge is provided in [Thrun/Ultsch, 2020].
10 | }
11 | \usage{data("EngyTime")}
12 | 
13 | \details{
14 | Size 4096, Dimensions 2, stored in \code{EngyTime$Data}
15 | 
16 | Classes 2, stored in \code{EngyTime$Cls}
17 | }
18 | 
19 | \references{
20 | [Baggenstoss, 2002]  Baggenstoss, P. M.: Statistical modeling using gaussian mixtures and hmms with matlab, Naval Undersea Warfare Center, Newport RI, 2002.
21 | 
22 | [Thrun/Ultsch, 2020]  Thrun, M. C., & Ultsch, A.: Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems, Data in Brief, Vol. 30(C), pp. 105501, \doi{10.1016/j.dib.2020.105501}, 2020.
23 | 
24 | }
25 | \examples{
26 | data(EngyTime)
27 | str(EngyTime)
28 | }
29 | \keyword{EngyTime}
30 | \keyword{datasets}
31 | \keyword{FCPS}


--------------------------------------------------------------------------------
/R/ClusterCreateClassification.R:
--------------------------------------------------------------------------------
 1 | ClusterCreateClassification=function(Objects,Decreasing){
 2 |   #
 3 |   # INPUT
 4 |   # Objects          listed objects, for example factor
 5 |   #
 6 |   # OUTPUT
 7 |   # Cls[1:n]         Clustering of data
 8 |   # ClusterNames     Object of adpclust algorithm
 9 |   # Decreasing        if not missing, objects are sorted
10 |   # Author: MT
11 |   if(is.list(Objects)) Objects=unlist(Objects)
12 |   y=as.character(Objects)
13 | 
14 |   n=length(y)
15 |   u=unique(y,fromLast = FALSE)
16 |   if(!missing(Decreasing)){
17 |     u=sort(u,decreasing = Decreasing,na.last=TRUE)
18 |   }
19 |   names(u)=1:length(u)
20 |   Cls=rep(NaN,n)
21 |   u_num=as.numeric(u)
22 |   nans=sum(!is.finite(u_num))
23 |   if(nans>0){
24 |     for(i in 1:length(u)){
25 |       Cls[y==u[i]]=i
26 |     }
27 |   }else{#special case: all levels are numeric
28 |     for(i in 1:length(u)){
29 |       Cls[y==u[i]]=u_num[i]
30 |     }
31 |   }
32 |  
33 | 
34 |   return(list(Cls=Cls,ClusterNames=u))
35 | }


--------------------------------------------------------------------------------
/man/Hepta.Rd:
--------------------------------------------------------------------------------
 1 | \name{Hepta}
 2 | \alias{Hepta}
 3 | 
 4 | \docType{data}
 5 | \title{
 6 | Hepta introduced in [Ultsch, 2003]
 7 | }
 8 | \description{
 9 | Clearly defined clusters, different variances. Detailed description of dataset and its clustering challenge is provided in [Thrun/Ultsch, 2020].
10 | }
11 | \usage{data("Hepta")}
12 | 
13 | \details{
14 | Size 212, Dimensions 3, stored in \code{Hepta$Data}
15 | 
16 | Classes 7, stored in \code{Hepta$Cls}
17 | 
18 | }
19 | 
20 | \references{
21 | [Ultsch, 2003]  Ultsch, A.: Maps for the visualization of high-dimensional data spaces, Proc. Workshop on Self organizing Maps (WSOM), pp. 225-230, Kyushu, Japan, 2003.
22 | 
23 | [Thrun/Ultsch, 2020]  Thrun, M. C., & Ultsch, A.: Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems, Data in Brief, Vol. 30(C), pp. 105501, \doi{10.1016/j.dib.2020.105501}, 2020.
24 | }
25 | \examples{
26 | data(Hepta)
27 | str(Hepta)
28 | }
29 | \keyword{Hepta}
30 | \keyword{datasets}
31 | \keyword{FCPS}
32 | 


--------------------------------------------------------------------------------
/R/EstimateRadiusByDistance.R:
--------------------------------------------------------------------------------
 1 | EstimateRadiusByDistance=function(DistanceMatrix){
 2 |   # 
 3 |   # INPUT
 4 |   # DistanceMatrix    symmetric distance Matrix of n cases
 5 |   # 
 6 |   # OUTPUT
 7 |   # Numerical scalar defining the radius
 8 |   # 
 9 | 
10 |   if (!requireNamespace('ABCanalysis',quietly = TRUE)) {
11 |     message(
12 |       'Subordinate package (ABCanalysis) is missing. No computations are performed.
13 |             Please install the package (ABCanalysis) which is defined in "Suggests".'
14 |     )
15 |     return(
16 |       list(
17 |         Radius = 0,
18 |         Message = "Subordinate package (ABCanalysis) is missing.
19 |                 Please install the package which is defined in 'Suggests'."
20 |       )
21 |     )
22 |   }
23 |   
24 |   x=DistanceMatrix[lower.tri(DistanceMatrix, diag = FALSE)]
25 |   xx=ABCanalysis::ABCRemoveSmallYields(x,0.5)
26 |   x=xx$SubstantialData
27 |   res=ABCanalysis::ABCanalysis(x)
28 |   Radius=min(x[res$Aind])/max(x[res$Cind])
29 |   return(Radius)
30 | }


--------------------------------------------------------------------------------
/man/GapStatistic.Rd:
--------------------------------------------------------------------------------
 1 | \name{GapStatistic}
 2 | \alias{GapStatistic}
 3 | \title{
 4 | Gap Statistic
 5 | }
 6 | \description{
 7 | Gap Statistic
 8 | }
 9 | \usage{
10 | GapStatistic(Data, ClusterNoMax, ClusterFun, \dots)
11 | }
12 | \arguments{
13 |   \item{Data}{
14 | [1:n,1:d] data matrix
15 | }
16 |   \item{ClusterNoMax}{
17 | max no of clusters to beinvestigated
18 | }
19 |   \item{ClusterFun}{
20 | which clustering algorithm to investigate
21 | }
22 |   \item{\dots}{
23 | further arguments passed on
24 | }
25 | }
26 | \details{
27 | does not work on hepta, see example
28 | }
29 | \value{
30 | tobedocumented
31 | }
32 | \references{
33 | Tibshirani, R., Walther, G. and Hastie, T: Estimating the number of data clusters via the Gap statistic, Journal of the Royal Statistical Society B, Vol. 63, pp. 411-423, 2003.
34 | }
35 | \author{
36 | Michael Thrun
37 | }
38 | \note{
39 | Wrapper only
40 | }
41 | \examples{
42 | data(Hepta)
43 | #GapStatistic(Hepta$Data,10,ClusterFun = kmeans)
44 | }
45 | \keyword{Gap}
46 | \concept{Gap Statistic}
47 | 


--------------------------------------------------------------------------------
/man/Atom.Rd:
--------------------------------------------------------------------------------
 1 | \name{Atom}
 2 | \alias{Atom}
 3 | 
 4 | \docType{data}
 5 | \title{
 6 | Atom introduced in [Ultsch, 2004].
 7 | }
 8 | \description{
 9 | Two nested spheres with different variances that are not linear not separable.
10 | Detailed description of dataset and its clustering challenge is provided in [Thrun/Ultsch, 2020].
11 | }
12 | \usage{data("Atom")}
13 | 
14 | \details{
15 | Size 800, Dimensions 3, stored in \code{Atom$Data}
16 | 
17 | Classes 2, stored in \code{Atom$Cls}
18 | }
19 | 
20 | \references{
21 | [Ultsch, 2004]  Ultsch, A.: Strategies for an artificial life system to cluster high dimensional data, Abstracting and Synthesizing the Principles of Living Systems, GWAL-6, U. Brggemann, H. Schaub, and F. Detje, Eds, pp. 128-137. 2004.
22 | 
23 | [Thrun/Ultsch, 2020]  Thrun, M. C., & Ultsch, A.: Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems, Data in Brief, Vol. 30(C), pp. 105501, \doi{10.1016/j.dib.2020.105501}, 2020.
24 | 
25 | 
26 | }
27 | \examples{
28 | data(Atom)
29 | str(Atom)
30 | }
31 | \keyword{datasets}
32 | \keyword{FCPS}
33 | \keyword{Atom}
34 | 


--------------------------------------------------------------------------------
/man/cluster_analysis_fun.Rd:
--------------------------------------------------------------------------------
 1 | \name{cluster_analysis_fun}
 2 | \alias{cluster_analysis_fun}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Internal function
 6 | }
 7 | \description{
 8 | Do not use alone, please use \code{\link{parApplyClusterAnalysis}}.
 9 | }
10 | \usage{
11 | cluster_analysis_fun(i, fun, DataOrDistances, ClusterNo,SetSeed=TRUE, ...)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{i}{
16 | ...
17 | }
18 |   \item{fun}{
19 | ...
20 | }
21 |   \item{DataOrDistances}{
22 | ...
23 | }
24 |   \item{ClusterNo}{
25 | ...
26 | }
27 |   \item{SetSeed}{
28 | ...
29 | }
30 |   \item{\dots}{
31 | ...
32 | }
33 | }
34 | \details{
35 | ...
36 | }
37 | \value{
38 | ...
39 | }
40 | \author{
41 | Michael Thrun
42 | }
43 | 
44 | \seealso{
45 | \code{\link{parApplyClusterAnalysis}}
46 | }
47 | \examples{
48 | \dontrun{
49 | #data(Hepta)
50 | #Distance=as.matrix(parallelDist::parallelDist(Hepta$Data))
51 | #out=cluster_analysis_fun(i = 1,fun = APclusterin
52 | #g,DataOrDistances = Distance,ClusterNo = 7)
53 | }
54 | }
55 | 
56 | 


--------------------------------------------------------------------------------
/R/CA_dist_fun.R:
--------------------------------------------------------------------------------
 1 | CA_dist_fun=function(i,fun,Distances,ClusterNo,SetSeed=TRUE,...){
 2 | 
 3 |   if(isTRUE(SetSeed)){
 4 |     seedno=1000+i
 5 |     set.seed(seed = seedno)
 6 |     nndelta=paste0('Seed_',seedno)
 7 |   }else{
 8 |     nndelta=paste0(i)
 9 |     set.seed(seed = NULL)
10 |   }
11 |   prior=Sys.time()
12 |   string=names(formals(fun))
13 |   
14 |   if(is.null(ClusterNo)){
15 |       object=R.utils::doCall(fun, args=list(DataOrDistances=Distances,...),.ignoreUnusedArgs=TRUE)
16 |   }else{
17 |       object=R.utils::doCall(fun,  args=list(DataOrDistances=Distances,ClusterNo=ClusterNo,...),.ignoreUnusedArgs=TRUE)
18 |   }
19 |   past=Sys.time()
20 |   delta=as.vector(as.numeric(difftime(past,prior,units = 'secs')))
21 |   names(delta)=nndelta
22 |   nn=names(object)
23 |   ind=which(nn=='Cls')
24 |   if(length(ind)==1){
25 |     Liste=list(Cls=object[[ind]],ComputationTime=delta,Seed=seedno,CAs=object)
26 |   }else{
27 |     warning('"Cls" object could not be found. Everything available is returned.')
28 |     Liste=list(Cls=NULL,ComputationTime=delta,Seed=seedno,CAs=object)
29 |   }
30 |   return(Liste)
31 | }#end help_fun


--------------------------------------------------------------------------------
/man/ClusterRename.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterRename}
 2 | \alias{ClusterRename}
 3 | 
 4 | \title{
 5 | Renames Clustering
 6 | }
 7 | \description{
 8 | Renames Clustering such that the names of the numerical vectors are the row names of DataOrDistances
 9 | }
10 | \usage{
11 | ClusterRename(Cls, DataOrDistances)
12 | }
13 | 
14 | \arguments{
15 |   \item{Cls}{
16 | 1:n numerical vector of numbers defining the classification as the main output of the clustering algorithm for the n cases of data. It has k unique numbers representing the arbitrary labels of the clustering.
17 | }
18 |   \item{DataOrDistances}{
19 | Either nonsymmetric [1:n,1:d] datamatrix of n cases and d features or symmetric [1:n,1:n] distance matrix
20 | }
21 | }
22 | \details{
23 | If DataOrDistances is missing or if inconsistent length, nothing is done.
24 | }
25 | \value{
26 | Cls[1:n] numerical vector named after the row names of data
27 | }
28 | 
29 | \author{
30 | Michael Thrun
31 | }
32 | 
33 | \examples{
34 | data('Hepta')
35 | Cls=Hepta$Cls
36 | Data=Hepta$Data#
37 | #prior
38 | Cls
39 | #Named Clustering
40 | ClusterRename(Cls,Data)
41 | }
42 | 
43 | \keyword{ClusterRename}% use one of  RShowDoc("KEYWORDS")
44 | 


--------------------------------------------------------------------------------
/R/ClusterDaviesBouldinIndex.R:
--------------------------------------------------------------------------------
 1 | ClusterDaviesBouldinIndex=function(Cls,Data,...){
 2 |   
 3 |   if (!requireNamespace('clusterSim',quietly = TRUE)){
 4 |     message(
 5 |       'Subordinate package (clusterSim) is missing. No computations are performed.
 6 |             Please install the package which is defined in "Suggests".'
 7 |     )
 8 |     return(
 9 |       list(
10 |         DaviesBouldinIndex = NaN,
11 |         Object = "Subordinate package (clusterSim) is missing.
12 |                 Please install the package which is defined in 'Suggests'."
13 |       )
14 |     )
15 |   }else{
16 |     if(missing(Cls))
17 |       stop("ClusterDaviesBouldinIndex: Cls is missing")
18 |     if(missing(Data))
19 |       stop("ClusterDaviesBouldinIndex: Data is missing")
20 |     
21 |     if(length(Cls)!=nrow(Data))
22 |       stop("ClusterDaviesBouldinIndex: Number of rows in 'Data' does not equal length of 'Cls'")
23 |       
24 |     if(length(unique(Cls))==1){
25 |       warning("ClusterDaviesBouldinIndex: 'Cls' has only one cluster stored.")
26 |     }
27 |     
28 |     out=clusterSim::index.DB(Data,cl = Cls,...)
29 |     return(list(DaviesBouldinIndex=out$DB,Object=out))
30 |   }
31 | }


--------------------------------------------------------------------------------
/R/CrossEntropyClustering.R:
--------------------------------------------------------------------------------
 1 | CrossEntropyClustering=function(Data,ClusterNo,PlotIt=FALSE,...){
 2 |   # INPUT
 3 |   # Data[1:n,1:d]     Data set with n observations and d features
 4 |   # ClusterNo         Number of clusters to search for
 5 |   # 
 6 |   # OPTIONAL
 7 |   # PlotIt            Boolean. Decision to plot or not
 8 |   # 
 9 |   # OUTPUT
10 |   # Cls[1:n]          Clustering of data
11 |   # Object            Object of CEC::cec algorithm
12 |   #
13 |   # Author: MT
14 |   if (!requireNamespace('CEC',quietly = TRUE)) {
15 |     message(
16 |       'Subordinate clustering package (CEC) is missing. No computations are performed.
17 |             Please install the package which is defined in "Suggests".'
18 |     )
19 |     return(
20 |       list(
21 |         Cls = rep(1, nrow(Data)),
22 |         Object = "Subordinate clustering package (CEC) is missing.
23 |                 Please install the package which is defined in 'Suggests'."
24 |       )
25 |     )
26 |   }
27 |   
28 |   model=CEC::cec(Data,centers = ClusterNo,...)
29 |   
30 |   Cls=model$cluster
31 |   if(PlotIt){
32 |     ClusterPlotMDS(Data,Cls)
33 |   }
34 |   
35 |   Cls=ClusterRename(Cls,Data)
36 |   return(list(Cls=Cls,Object=model))
37 | }


--------------------------------------------------------------------------------
/man/Lsun3D.Rd:
--------------------------------------------------------------------------------
 1 | \name{Lsun3D}
 2 | \alias{Lsun3D}
 3 | 
 4 | \docType{data}
 5 | \title{
 6 | Lsun3D inspired by FCPS introduced in [Thrun, 2018]
 7 | }
 8 | \description{
 9 | Clearly defined clusters, different variances.  Detailed description of dataset and its clustering challenge is provided in [Thrun/Ultsch, 2020].
10 | }
11 | \usage{data("Lsun3D")}
12 | 
13 | \details{
14 | Size 404, Dimensions 3
15 | 
16 | Dataset defines discontinuites, where the clusters have different variances.
17 | Three main clusters, and four outliers (in cluster 4). For a more detailed description see [Thrun, 2018].
18 | }
19 | 
20 | \references{
21 | [Thrun, 2018]  Thrun, M. C.: Projection Based Clustering through Self-Organization and Swarm Intelligence, doctoral dissertation 2017, Springer, Heidelberg, ISBN: 978-3-658-20539-3, \doi{10.1007/978-3-658-20540-9}, 2018. 
22 | 
23 | [Thrun/Ultsch, 2020]  Thrun, M. C., & Ultsch, A.: Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems, Data in Brief, Vol. 30(C), pp. 105501, \doi{10.1016/j.dib.2020.105501}, 2020.
24 | }
25 | \examples{
26 | data(Lsun3D)
27 | str(Lsun3D)
28 | Cls=Lsun3D$Cls
29 | Data=Lsun3D$Data
30 | }
31 | \keyword{datasets}
32 | \keyword{Lsun3D}
33 | \keyword{FCPS}
34 | 


--------------------------------------------------------------------------------
/man/TwoDiamonds.Rd:
--------------------------------------------------------------------------------
 1 | \name{TwoDiamonds}
 2 | \alias{TwoDiamonds}
 3 | 
 4 | \docType{data}
 5 | \title{
 6 | TwoDiamonds introduced in  [Ultsch, 2003a, 2003b]
 7 | }
 8 | \description{
 9 | Cluster border defined by density. Detailed description of dataset and its clustering challenge is provided in [Thrun/Ultsch, 2020].
10 | }
11 | \usage{data("TwoDiamonds")}
12 | 
13 | \details{
14 | Size 800, Dimensions 2, stored in \code{TwoDiamonds$Data}
15 | 
16 | Classes 2, stored in \code{TwoDiamonds$Cls}
17 | }
18 | 
19 | \references{
20 | [Ultsch, 2003a]  Ultsch, A.Optimal density estimation in data containing clusters of unknown structure, technical report, Vol. 34,University of Marburg, Department of Mathematics and Computer Science, 2003.
21 | 
22 | [Ultsch, 2003b]  Ultsch, A.: U*-matrix: a tool to visualize clusters in high dimensional data, Fachbereich Mathematik und Informatik, 2003.
23 | 
24 | [Thrun/Ultsch, 2020]  Thrun, M. C., & Ultsch, A.: Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems, Data in Brief, Vol. 30(C), pp. 105501, \doi{10.1016/j.dib.2020.105501}, 2020.
25 | 
26 | }
27 | \examples{
28 | data(TwoDiamonds)
29 | str(TwoDiamonds)
30 | }
31 | \keyword{TwoDiamonds}
32 | \keyword{datasets}
33 | \keyword{FCPS}
34 | 


--------------------------------------------------------------------------------
/R/HCLclustering.R:
--------------------------------------------------------------------------------
 1 | HCLclustering <-function(Data,ClusterNo,PlotIt=FALSE,...){
 2 |   # Cls=NeuralGas(Data,ClusterNo=2)
 3 |   #  
 4 |   # INPUT
 5 |   # Data[1:n,1:d]     Data set with n observations and d features
 6 |   # ClusterNo         Number of clusters to search for
 7 |   #
 8 |   # OPTIONAL
 9 |   # PlotIt            Boolean. Decision to plot or not
10 |   # 
11 |   # OUTPUT
12 |   # Cls[1:n]     Clustering of data
13 |   # Object       Object of cclust algorithm
14 |   # 
15 |   # Author: MT 04/2018
16 | 
17 | 
18 |   if (!requireNamespace('cclust',quietly = TRUE)) {
19 |     message(
20 |       'Subordinate clustering package (cclust) is missing. No computations are performed.
21 |             Please install the package which is defined in "Suggests".'
22 |     )
23 |     return(
24 |       list(
25 |         Cls = rep(1, nrow(Data)),
26 |         Object = "Subordinate clustering package (cclust) is missing.
27 |                 Please install the package which is defined in 'Suggests'."
28 |       )
29 |     )
30 |   }
31 |   
32 |   res=cclust::cclust(x=Data,centers=ClusterNo,method='hardcl',...)
33 |   Cls=res$cluster
34 |   if(PlotIt){
35 |    ClusterPlotMDS(Data,Cls)
36 |   }
37 |    Cls=ClusterRename(Cls,Data)
38 |   return(list(Cls=Cls,Object=res))
39 | }


--------------------------------------------------------------------------------
/man/Chainlink.Rd:
--------------------------------------------------------------------------------
 1 | \name{Chainlink}
 2 | \alias{Chainlink}
 3 | 
 4 | \docType{data}
 5 | \title{
 6 | Chainlink introduced in [Ultsch et al., 1994; Ultsch, 1995]. 
 7 | }
 8 | \description{
 9 | Two chains of rings. Detailed description of dataset and its clustering challenge is provided in [Thrun/Ultsch, 2020].
10 | }
11 | \usage{data("Chainlink")}
12 | 
13 | \details{
14 | Size 1000, Dimensions 3, stored in \code{Chainlink$Data}
15 | 
16 | Classes 2, stored in \code{Chainlink$Cls}
17 | }
18 | 
19 | \references{
20 | [Ultsch et al., 1994]  Ultsch, A., Guimaraes, G., Korus, D., & Li, H.: Knowledge extraction from artificial neural networks and applications, Parallele Datenverarbeitung mit dem Transputer, (pp. 148-162), Springer, 1994.
21 | 
22 | [Ultsch, 1995]  Ultsch, A.: Self organizing neural networks perform different from statistical k-means clustering, Proc. Society for Information and Classification (GFKL), Vol. 1995, Basel 8th-10th March 1995.
23 | 
24 | [Thrun/Ultsch, 2020]  Thrun, M. C., & Ultsch, A.: Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems, Data in Brief, Vol. 30(C), pp. 105501, \doi{10.1016/j.dib.2020.105501}, 2020.
25 | 
26 | 
27 | }
28 | \examples{
29 | data(Chainlink)
30 | str(Chainlink)
31 | }
32 | \keyword{Chainlink}
33 | \keyword{datasets}
34 | \keyword{FCPS}


--------------------------------------------------------------------------------
/man/ClusterNormalize.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterNormalize}
 2 | \alias{ClusterNormalize}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Cluster Normalize
 6 | }
 7 | \description{
 8 | Values in Cls are consistently recoded to positive consecutive integers
 9 | }
10 | \usage{
11 | ClusterNormalize(Cls)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{Cls}{[1:n numerical vector of numbers defining the classification as the main output of the clustering algorithm for the n cases of data. It has k unique numbers representing the arbitrary labels of the clustering.}
16 | }
17 | \details{
18 | For recoding depending on cluster size please see \code{\link{ClusterRenameDescendingSize}}.
19 | }
20 | \value{
21 | The renamed classification. A vector of clusters recoded to positive consecutive integers.
22 | }
23 | 
24 | \author{
25 | .
26 | }
27 | 
28 | \seealso{
29 | \code{\link{ClusterRenameDescendingSize}}
30 | }
31 | 
32 | \examples{
33 | data('Lsun3D')
34 | Cls=Lsun3D$Cls
35 | #not desceending cluster numbers
36 | Cls[Cls==1]=543
37 | Cls[Cls==4]=1
38 | 
39 | # Now ordered consecutively
40 | ClusterNormalize(Cls)
41 | }
42 | 
43 | \keyword{ClusterNormalize}
44 | \concept{Consecutive Clustering}
45 | \concept{Cluster Normalize}
46 | 
47 | 


--------------------------------------------------------------------------------
/R/NeuralGasClustering.R:
--------------------------------------------------------------------------------
 1 | NeuralGasClustering <-function(Data,ClusterNo,PlotIt=FALSE,...){
 2 |   # Cls=NeuralGas(Data,ClusterNo)
 3 |   #  
 4 |   # INPUT
 5 |   # Data[1:n,1:d]     Data set with n observations and d features
 6 |   # ClusterNo         Number of clusters to search for
 7 |   #
 8 |   # OPTIONAL
 9 |   # PlotIt            Boolean. Decision to plot or not
10 |   # 
11 |   # OUTPUT
12 |   # Cls[1:n]          Clustering of data
13 |   # Object            Object of cclust::cclust, method = neuralgas
14 |   #
15 |   # Author: MT 06/2015
16 |   # 1.Edit: MT 04/18
17 | 
18 |   
19 |   if (!requireNamespace('cclust',quietly = TRUE)) {
20 |     message(
21 |       'Subordinate clustering package (cclust) is missing. No computations are performed.
22 |             Please install the package which is defined in "Suggests".'
23 |     )
24 |     return(
25 |       list(
26 |         Cls = rep(1, nrow(Data)),
27 |         Object = "Subordinate clustering package (cclust) is missing.
28 |                 Please install the package which is defined in 'Suggests'."
29 |       )
30 |     )
31 |   }
32 |   res=cclust::cclust(x=Data,centers=ClusterNo,method='neuralgas',...)
33 |   Cls=res$cluster
34 |   if(PlotIt){
35 |     ClusterPlotMDS(Data,Cls)
36 |   }
37 |   Cls=ClusterRename(Cls,Data)
38 |   return(list(Cls=Cls,Object=res))
39 |   }


--------------------------------------------------------------------------------
/R/pdfClustering.R:
--------------------------------------------------------------------------------
 1 | pdfClustering <-function(Data,PlotIt=FALSE,...){
 2 |   # Cls <- pdfClustering(Data,ClusterNo);
 3 |   # Clustering via nonparametric density estimation
 4 |   # 
 5 |   # INPUT
 6 |   # Data[1:n,1:d]     Data set with n observations and d features
 7 |   #
 8 |   # OPTIONAL
 9 |   # PlotIt            Boolean. Decision to plot or not
10 |   # 
11 |   # OUTPUT
12 |   # Cls[1:n]          Clustering of data
13 |   # Object            Object of pdfCluster::pdfCluster algorithm
14 |   # 
15 |   # MT 2019
16 | 
17 |   if (!requireNamespace('pdfCluster',quietly = TRUE)) {
18 |     message(
19 |       'Subordinate clustering package (pdfCluster) is missing. No computations are performed.
20 |             Please install the package which is defined in "Suggests".'
21 |     )
22 |     return(
23 |       list(
24 |         Cls = rep(1, nrow(Data)),
25 |         Object = "Subordinate clustering package (pdfCluster) is missing.
26 |                 Please install the package which is defined in 'Suggests'."
27 |       )
28 |     )
29 |   }
30 | 
31 | 
32 |   out=pdfCluster::pdfCluster(Data,...)
33 |   
34 |   Cls=as.vector(out@clusters)
35 |   
36 |   if(!is.null(rownames(Data))){
37 |     names(Cls)=rownames(Data)
38 |   }
39 |   
40 |   if(PlotIt){
41 |     ClusterPlotMDS(Data,Cls)
42 |   }
43 |   Cls=ClusterRename(Cls,Data)
44 |   return(list(Cls=Cls,Object=out))
45 | }


--------------------------------------------------------------------------------
/R/RobustTrimmedClustering.R:
--------------------------------------------------------------------------------
 1 | RobustTrimmedClustering=function(Data,ClusterNo,Alpha=0.05,PlotIt=FALSE,...){
 2 |   # Cls=RobustTrimmedClustering(Data,ClusterNo)
 3 |   #
 4 |   # INPUT
 5 |   # Data[1:n,1:d]     Data set with n observations and d features
 6 |   # ClusterNo         Number of clusters to search for
 7 |   # Alpha             No trimming is done equals to alpha =0, otherwise proportion of datapoints to be trimmed.
 8 |   # 
 9 |   # OPTIONAL
10 |   # PlotIt            Boolean. Decision to plot or not.
11 |   #
12 |   # OUTPUT
13 |   # Cls[1:n]          Clustering of data
14 |   # Object            Object of tclust::tclust
15 |   #
16 |   # Author: MT 09/2019  
17 |   
18 |   if (!requireNamespace('tclust',quietly = TRUE)) {
19 |     message(
20 |       'Subordinate clustering package (tclust) is missing. No computations are performed.
21 |             Please install the package which is defined in "Suggests".'
22 |     )
23 |     return(
24 |       list(
25 |         Cls = rep(1, nrow(Data)),
26 |         Object = "Subordinate clustering package (tclust) is missing.
27 |                 Please install the package which is defined in 'Suggests'."
28 |       )
29 |     )
30 |   }
31 |   
32 |   res=tclust::tclust(x=Data,k = ClusterNo,alpha = Alpha,...)
33 |   Cls=res$cluster	
34 |   
35 |   if(PlotIt){
36 |     ClusterPlotMDS(Data,Cls)
37 |   }
38 |   Cls=ClusterRename(Cls,Data)
39 |   return(list(Cls=Cls,Object=res))
40 | }


--------------------------------------------------------------------------------
/R/MeanShiftClustering.R:
--------------------------------------------------------------------------------
 1 | MeanShiftClustering=function(Data,PlotIt=FALSE,...){
 2 |   # Cls=MeanShiftClustering(Data,ClusterNo=2)
 3 |   # Clustering by mean shift
 4 |   #
 5 |   # INPUT
 6 |   # Data[1:n,1:d]     Data set with n observations and d features
 7 |   # 
 8 |   # OPTIONAL
 9 |   # PlotIt            Boolean. Decision to plot or not
10 |   #
11 |   # OUTPUT
12 |   # Cls[1:n]    Clustering of data
13 |   # Object      Object of mlpack::mean_shift algorithm
14 |   #
15 |   # Author: MT 05/2023
16 |   #Cheng, Yizong ( 1995). "Mean Shift, Mode Seeking, and Clustering". IEEE Transactions on Pattern Analysis and Machine Intelligence. 17 (8): 790–799. CiteSeerX 10.1.1.510.1222. doi:10.1109/34.400568.
17 |   if (!requireNamespace('mlpack',quietly = TRUE)) {
18 |     message(
19 |       'Subordinate clustering package (mlpack) is missing. No computations are performed.
20 |             Please install the package which is defined in "Suggests".'
21 |     )
22 |     return(
23 |       list(
24 |         Cls = rep(1, nrow(Data)),
25 |         Object = "Subordinate clustering package (mlpack) is missing.
26 |                 Please install the package which is defined in 'Suggests'."
27 |       )
28 |     )
29 |   }
30 |   res = mlpack::mean_shift(input = Data,labels_only = T, ...)
31 |   Cls = as.vector(res$output)+1
32 | 
33 |   if (PlotIt) {
34 |     ClusterPlotMDS(Data , Cls)
35 |   }
36 |   Cls = ClusterRename(Cls, Data)
37 |   
38 |   return(list(Cls=Cls,Object=res))
39 | }


--------------------------------------------------------------------------------
/R/PenalizedRegressionBasedClustering.R:
--------------------------------------------------------------------------------
 1 | PenalizedRegressionBasedClustering=function(Data,FirstLambda,SecondLambda,Tau, PlotIt=FALSE,...){
 2 |   # INPUT
 3 |   # Data[1:n,1:d]     Data set with n observations and d features
 4 |   # FirstLambda       Set 1 for quadratic penalty based algorithm, 0.4 for revised ADMM.
 5 |   # SecondLambda      The magnitude of grouping penalty.
 6 |   # Tau               Tuning parameter: tau, related to grouping penalty.
 7 |   # 
 8 |   # OPTIONAL
 9 |   # PlotIt            Boolean. Decision to plot or not
10 |   # 
11 |   # OUTPUT
12 |   # Cls[1:n]          Clustering of data
13 |   # Object            Object of prclust::PRclust algorithm
14 |   #
15 |   # Author: MT
16 |   if (!requireNamespace('prclust',quietly = TRUE)) {
17 |     message(
18 |       'Subordinate clustering package (prclust) is missing. No computations are performed.
19 |             Please install the package which is defined in "Suggests".'
20 |     )
21 |     return(
22 |       list(
23 |         Cls = rep(1, nrow(Data)),
24 |         Object = "Subordinate clustering package (prclust) is missing.
25 |                 Please install the package which is defined in 'Suggests'."
26 |       )
27 |     )
28 |   }
29 |   
30 |   Data=t(Data)
31 |   model=prclust::PRclust(Data,FirstLambda,SecondLambda,Tau)
32 |   
33 |   Cls=model$group
34 |   if(PlotIt){
35 |     ClusterPlotMDS(Data,Cls)
36 |   }
37 |   Cls=ClusterRename(Cls,t(Data))
38 |   return(list(Cls=Cls,Object=model))
39 | }


--------------------------------------------------------------------------------
/R/ClusterRename.R:
--------------------------------------------------------------------------------
 1 | ClusterRename=function(Cls,DataOrDistances){
 2 |   #
 3 |   # INPUT
 4 |   # Cls                1:n numerical vector of numbers defining the classification as the main
 5 |   #                    output of the clustering algorithm for the n cases of data. It has k unique
 6 |   #                    numbers representing the arbitrary labels of the clustering.
 7 |   # DataOrDistances    Either nonsymmetric [1:n,1:d] datamatrix of n cases and d features or
 8 |   #                    symmetric [1:n,1:n] distance matrix
 9 |   # 
10 |   # OUTPUT
11 |   # Cls[1:n] numerical vector named after the row names of data
12 |   # 
13 |   tryCatch({    # Make sure cls is given back
14 |     if(missing(DataOrDistances)){
15 |       warning('ClusterRename: DataOrDistances is missing' )
16 |       return(Cls)
17 |     }
18 |     if(!is.vector(Cls)){
19 |       warning('ClusterRename: Cls is not a vector. Calling as.numeric(as.character(Cls))')
20 |       Cls=as.numeric(as.character(Cls))
21 |     }
22 |     if(nrow(DataOrDistances)!=length(Cls)){
23 |       warning('ClusterRename: DataOrDistances number of rows does not equal length of Cls. Nothing is done' )
24 |       return(Cls)
25 |     }
26 |   
27 |     if(!is.null(rownames(DataOrDistances))){
28 |       names(Cls)=rownames(DataOrDistances)
29 |     }else{
30 |       names(Cls)=1:nrow(DataOrDistances)
31 |     }
32 |   },error=function(e){
33 |     warning(paste('ClusterRename:',e))
34 |   })
35 |   
36 |   return(Cls)
37 | }


--------------------------------------------------------------------------------
/man/EstimateRadiusByDistance.Rd:
--------------------------------------------------------------------------------
 1 | \name{EstimateRadiusByDistance}
 2 | \alias{EstimateRadiusByDistance}
 3 | 
 4 | \title{
 5 | Estimate Radius By Distance
 6 | }
 7 | \description{
 8 | Published in [Thrun et al, 2016] for the case of automatically estimating the radius of the P-matrix. Can also be used to estimate the radius parameter for distance based clustering algorithms.
 9 | }
10 | \usage{
11 | EstimateRadiusByDistance(DistanceMatrix)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{DistanceMatrix}{
16 | [1:n,1:n]  symmetric distance Matrix of n cases
17 | }
18 | }
19 | \details{
20 |  For density-based clustering algorithms like \code{\link{DBSCAN}} it is not always usefull.
21 | }
22 | \value{
23 | Numerical scalar defining the radius
24 | }
25 | \references{
26 | [Thrun et al., 2016]  Thrun, M. C., Lerch, F., Loetsch, J., & Ultsch, A.: Visualization and 3D Printing of Multivariate Data of Biomarkers, in Skala, V. (Ed.), International Conference in Central Europe on Computer Graphics, Visualization and Computer Vision (WSCG), Vol. 24, pp. 7-16, Plzen, http://wscg.zcu.cz/wscg2016/short/A43-full.pdf, 2016.
27 | }
28 | \author{
29 | Michael Thrun
30 | }
31 | \note{
32 | Symmetric matrix is assumed.
33 | }
34 | 
35 | \seealso{
36 | \code{\link[GeneralizedUmatrix]{GeneratePmatrix}}
37 | }
38 | \examples{
39 | data('Hepta')
40 | DistanceMatrix=as.matrix(dist(Hepta$Data))
41 | Radius=EstimateRadiusByDistance(DistanceMatrix)
42 | }
43 | 
44 | \keyword{Radius}
45 | \keyword{Clustering}
46 | 


--------------------------------------------------------------------------------
/man/ClusterCreateClassification.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterCreateClassification}
 2 | \alias{ClusterCreateClassification}
 3 | 
 4 | \title{
 5 | Create Classification for Cluster.. functions
 6 | }
 7 | \description{
 8 | Creates a Cls from arbitrary list of objects
 9 | }
10 | \usage{
11 | ClusterCreateClassification(Objects,Decreasing)
12 | }
13 | 
14 | \arguments{
15 |   \item{Objects}{
16 | Listed objects, for example factor
17 | }
18 |   \item{Decreasing}{
19 | Boolean that can be missing. If given, sorts \code{ClusterNames} with either decreasing or increasing.
20 | }
21 | 
22 | }
23 | \details{
24 | \code{ClusterNames} can be sorted before the classification stored \code{Cls} is created. See example.
25 | }
26 | \value{
27 |  LIST, with
28 | \item{Cls }{[1:n] numerical vector with n numbers defining the labels of the classification. It has 1 to k unique numbers representing the arbitrary labels of the classification.}
29 | \item{ClusterNames }{ClusterNames defined which names belongs to which unique number}
30 | 
31 | }
32 | 
33 | \author{
34 | Michael Thrun
35 | }
36 | 
37 | \examples{
38 | 
39 | \dontrun{
40 |   Iris=datasets::iris
41 |   SomeFactors=Iris$Species
42 |   V=ClusterCreateClassification(SomeFactors)
43 |   Cls=V$Cls
44 |   V$ClusterNames
45 |   table(Cls,SomeFactors)
46 |   
47 |   #Increasing alphabetical order
48 |   V=ClusterCreateClassification(SomeFactors,Decreasing=FALSE)
49 |   Cls=V$Cls
50 |   V$ClusterNames
51 |   table(Cls,SomeFactors)
52 |   }
53 | }
54 | 
55 | \keyword{ClusterCreateClassification}
56 | \keyword{factor}
57 | \concept{Create Cluster Classification}


--------------------------------------------------------------------------------
/R/ClusterRedefine.R:
--------------------------------------------------------------------------------
 1 | ClusterRedefine=function(Cls,NewLabels,OldLabels){
 2 |   # 
 3 |   # INPUT
 4 |   # Cls                1:n numerical vector of numbers defining the classification as the main
 5 |   #                    output of the clustering algorithm for the n cases of data. It has k unique
 6 |   #                    numbers representing the arbitrary labels of the clustering.
 7 |   # NewLabels        [1:p], p<=k identifiers of clusters to be changed with
 8 |   #Optional
 9 |   # OldLabels    [1:p], p<=k identifiers of clusters to be changed, default [1:k] unique cluster Ids of cls
10 |   # 
11 |   # OUTPUT
12 |   # Cls[1:n] numerical vector named after NewLabels 
13 |   # 
14 |     if(!is.vector(Cls)){
15 |       warning('ClusterRedefine: Cls is not a vector. Calling as.numeric(as.character(Cls))')
16 |       Cls=as.numeric(as.character(Cls))
17 |     }
18 |   if(missing(OldLabels))
19 |     OldLabels=unique(Cls)
20 |   
21 |   if(length(OldLabels)!=length(NewLabels)){
22 |     warning('ClusterRedefine: length(OldLabels)!=length(NewLabels))')
23 |     return(Cls)
24 |   }
25 |   if(length(unique(Cls))==1){
26 |     warning("ClusterRedefine: Only one unique label in Cls given. Nothing to redfine.")
27 |     return(Cls)
28 |   }
29 |   #store old clusters
30 |   indV=list()
31 |   for(i in 1:length(OldLabels)){#iterate cluster labels
32 |     #stores boolean vector
33 |     indV[[i]]=c(Cls==OldLabels[i])
34 |   }
35 |   #apply renaming but ignore same label in old cluster and new cluster that indicates different clusters!
36 |   for(i in 1:length(OldLabels)){
37 |     Cls[indV[[i]]]=NewLabels[i]
38 |   }
39 |   return(Cls)
40 | }


--------------------------------------------------------------------------------
/R/ClusterRenameDescendingSize.R:
--------------------------------------------------------------------------------
 1 | ClusterRenameDescendingSize <- function(Cls,ProvideClusterNames=FALSE) {
 2 |   # Cls are renamed such that largest class =1 ...
 3 |   # RenamedCls = ClusterRenameDescendingSize(GivenCls)
 4 |   # 
 5 |   # INPUT
 6 |   # Cls    Vector of classifications
 7 |   # 
 8 |   # OUTPUT
 9 |   # RenamedCls    such that largest class =1 ...  
10 |   #   
11 |   # Author: MT, ALU
12 |   if(!is.vector(Cls)){
13 |     warning('ClusterRenameDescendingSize: Cls is not a vector. Calling as.numeric(as.character(Cls))')
14 |     Cls=as.numeric(as.character(Cls))
15 |   }
16 |   if(length(unique(Cls))==1){
17 |     warning("ClusterRenameDescendingSize: Only one unique label in Cls given. Nothing to rename.")
18 |     return(list(renamedCls=Cls,ClusterName=NULL))
19 |   }
20 |   ListeV <- ClusterCount(Cls)
21 |   countPerClass <- ListeV[[2]]
22 |   UniqueClasses=ListeV[[1]]
23 |   sortedClasses <- sort(na.last=TRUE,countPerClass, decreasing = TRUE, index.return=TRUE) # Original-Indizes mitliefern lassen
24 |   numberOfClasses <- length(countPerClass)
25 |   renamedCls <- Cls
26 |   
27 |   Matchingtable=matrix(0,numberOfClasses,2)
28 |   Matchingtable[,1]=1:numberOfClasses
29 |   colnames(Matchingtable)=c("New","Prior")
30 |   for (i in 1: numberOfClasses) {
31 |     Matchingtable[i,2]=UniqueClasses[sortedClasses$ix[i]]
32 |     renamedCls[which(Cls == UniqueClasses[sortedClasses$ix[i]],arr.ind = T)] <- i # Hier mit den mitgelieferten Original-Indizes arbeiten
33 |   } 
34 |   if(isFALSE(ProvideClusterNames))
35 |     return(renamedCls)
36 |   else
37 |     return(list(renamedCls=renamedCls,ClusterName=Matchingtable))
38 | }
39 | 


--------------------------------------------------------------------------------
/man/dietary_survey_IBS.Rd:
--------------------------------------------------------------------------------
 1 | \name{dietary_survey_IBS}
 2 | \alias{dietary_survey_IBS}
 3 | 
 4 | \docType{data}
 5 | \title{
 6 | Dietary survey IBS [Hayes et al., 2013]
 7 | }
 8 | \description{
 9 | Density based data set with two classes taken from \pkg{clusterR}.
10 | 
11 | }
12 | \usage{data("dietary_survey_IBS")}
13 | 
14 | \details{
15 | Size 400, Dimensions 42, stored in \code{dietary_survey_IBS$Data}
16 | 
17 | Classes 2, stored in \code{dietary_survey_IBS$Cls}: healthy-group (class == 1) vs. the IBS-patients (class == 2)
18 | 
19 | The data is based on the article "A dietary survey	of patients	with irritable bowel syndrome". The mean and standard deviation of the table 1 (Foods perceived as causing or worsening irritable bowel syndrome symptoms in the IBS group and digestive symptoms in the healthy comparative group) were used to generate the synthetic data" [Hayes et al., 2013].
20 | }
21 | 
22 | \references{
23 | [Hayes et al., 2013]		P. Hayes, C. Corish, E. O'Mahony, E. M. M. Quigley: A dietary survey of patients with irritable bowel syndrome, Journal of Human Nutrition and Dietetics,2013.
24 | }
25 | \examples{
26 | data(dietary_survey_IBS)
27 | str(dietary_survey_IBS)
28 | \donttest{
29 | Data=dietary_survey_IBS$Data
30 | Cls=dietary_survey_IBS$Cls
31 | ## The Pswarm is able to seperate density-based clusters
32 | #res=DatabionicSwarm::Pswarm(RobustNormalization(Data,T),PlotIt = T,Cls = Cls)
33 | ## We clearly see that the U-Matrix visualization is unable to catch these structures
34 | ## because it is only usable for distance structures
35 | 
36 | #resU=GeneratePswarmVisualization(Data,res$ProjectedPoints,res$LC,PlotIt = T)
37 | }
38 | }
39 | \keyword{datasets,dietary_survey_IBS,FCPS}


--------------------------------------------------------------------------------
/man/HCLclustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{HCLclustering}
 2 | \alias{HCLclustering}
 3 | \title{On-line Update (Hard Competitive learning) method}
 4 | 
 5 | \usage{
 6 | HCLclustering(Data, ClusterNo,PlotIt=FALSE,...)
 7 | }
 8 | \description{
 9 | Hard Competitive learning clustering published by [Ripley, 2007].
10 | 
11 | }
12 | \arguments{
13 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
14 | 
15 | \item{ClusterNo}{A number k which defines k different clusters to be build by the algorithm.}
16 | \item{PlotIt}{Default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
17 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
18 | }
19 | \value{
20 | List of
21 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
22 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
23 | }
24 | 
25 |  \examples{
26 | data('Hepta')
27 | out=HCLclustering(Hepta$Data,ClusterNo=7,PlotIt=FALSE)
28 | }
29 | \author{Michael Thrun}
30 | 
31 | \references{
32 | [Dimitriadou, 2002]  Dimitriadou, E.: cclust-convex clustering methods and clustering indexes. R package, 2002,
33 | 
34 | [Ripley, 2007]  Ripley, B. D.: Pattern recognition and neural networks, Cambridge university press, ISBN: 0521717701, 2007.
35 | 
36 | }
37 | 
38 | \keyword{HCLclustering}
39 | \concept{Hard Competitive learning clustering}


--------------------------------------------------------------------------------
/R/ClusterMCC.R:
--------------------------------------------------------------------------------
 1 | ClusterMCC=function(PriorCls,CurrentCls,Force=TRUE){
 2 |  
 3 |   if (!requireNamespace('yardstick',quietly = TRUE)) {
 4 |     message(
 5 |       'Subordinate  package (yardstick) is missing. No computations are performed.
 6 |             Please install the package which is defined in "Suggests".'
 7 |     )
 8 |     return(
 9 |       MCC = "Subordinate package (yardstick) is missing.
10 |                 Please install the package which is defined in 'Suggests'."
11 |       
12 |     )
13 |   }
14 |   
15 |   PriorCls[!is.finite(PriorCls)]=9999
16 |   CurrentCls[!is.finite(CurrentCls)]=9999
17 |   
18 |   if(isTRUE(Force)){
19 |     u1=sort(unique(PriorCls))
20 |     u2=sort(unique(CurrentCls))
21 |     s1=setdiff(u1,u2)
22 |     s2=setdiff(u2,u1)
23 |     ss=unique(c(s1,s2))
24 |     if(length(ss)>0){
25 |       message("ClusterMCC: No.of Clusters in PriorCls does not equal CurrentCls. Adding missing unique label(s).")
26 |       PriorCls=c(PriorCls,ss)
27 |       CurrentCls=c(CurrentCls,ss)
28 |     }
29 |   }
30 |   CurrentClsFactor=as.factor(CurrentCls)
31 |   PriorClsFactor=as.factor(PriorCls)
32 |   if(length(levels(CurrentClsFactor))!=length(levels(PriorClsFactor))){
33 |     message("ClusterMCC: No.of Clusters in PriorCls does not equal CurrentCls. Please make sure that the mapping of numbers in ground truth is equal to to mapping of the clustering.")
34 |     if(length(levels(CurrentClsFactor))<length(levels(PriorClsFactor))){
35 |       levels(CurrentClsFactor)=levels(PriorClsFactor)
36 |     }else{
37 |       levels(PriorClsFactor)=levels(CurrentClsFactor)
38 |     }
39 |       
40 |   }
41 | 
42 |   mcc=yardstick::mcc_vec(truth = PriorClsFactor,estimate = CurrentClsFactor)
43 |   
44 |   return(MCC = mcc)
45 | }
46 | 


--------------------------------------------------------------------------------
/man/Leukemia.Rd:
--------------------------------------------------------------------------------
 1 | \name{Leukemia}
 2 | \alias{Leukemia}
 3 | 
 4 | \docType{data}
 5 | \title{
 6 | Leukemia distance matrix and classificiation used in [Thrun, 2018]
 7 | }
 8 | \description{
 9 | Data is anonymized. Original dataset was published in [Haferlach et al., 2010]. Original dataset had around 12.000 dimensions.
10 | Detailed description of preprocessed dataset and its clustering challenge is provided in [Thrun/Ultsch, 2020].
11 | }
12 | \usage{data("Leukemia")}
13 | 
14 | \details{
15 | 554x554 distance matrix.
16 | Cls defines the following clusters:
17 | 
18 | 1= APL Outlier
19 | 
20 | 2=APL
21 | 
22 | 3=Healthy
23 | 
24 | 4=AML
25 | 
26 | 5=CLL
27 | 
28 | 6=CLL Outlier
29 | 
30 | }
31 | 
32 | \references{
33 | [Thrun, 2018]  Thrun, M. C.: Projection Based Clustering through Self-Organization and Swarm Intelligence, doctoral dissertation 2017, Springer, Heidelberg, ISBN: 978-3-658-20539-3, \doi{10.1007/978-3-658-20540-9}, 2018. 
34 | 
35 | [Haferlach et al., 2010]  Haferlach, T., Kohlmann, A., Wieczorek, L., Basso, G., Te Kronnie, G., Bene, M.-C., . . . Mills, K. I.: Clinical utility of microarray-based gene expression profiling in the diagnosis and subclassification of leukemia: report from the International Microarray Innovations in Leukemia Study Group, Journal of Clinical Oncology, Vol. 28(15), pp. 2529-2537. 2010.
36 | 
37 | [Thrun/Ultsch, 2020]  Thrun, M. C., & Ultsch, A.: Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems, Data in Brief, Vol. 30(C), pp. 105501, \doi{10.1016/j.dib.2020.105501}, 2020.
38 | 
39 | }
40 | \examples{
41 | data(Leukemia)
42 | str(Leukemia)
43 | Cls=Leukemia$Cls
44 | Distance=Leukemia$DistanceMatrix
45 | isSymmetric(Distance)
46 | }
47 | \keyword{datasets}
48 | \keyword{Lsun3D}
49 | \keyword{FCPS}
50 | 


--------------------------------------------------------------------------------
/man/ClusterRenameDescendingSize.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterRenameDescendingSize}
 2 | \alias{ClusterRenameDescendingSize}
 3 | \title{Cluster Rename Descending Size}
 4 | \description{
 5 | Renames the clusters of a classification in descending order.}
 6 | \usage{
 7 | ClusterRenameDescendingSize(Cls,
 8 | 
 9 | ProvideClusterNames=FALSE)
10 | }
11 | \arguments{
12 |   \item{Cls}{[1:n numerical vector of numbers defining the classification as the main output of the clustering algorithm for the n cases of data. It has k unique numbers representing the arbitrary labels of the clustering.}
13 |     \item{ProvideClusterNames}{TRUE: Provides in seperate output new and old k numbers, FALSE: simple output }
14 | }
15 | \details{
16 | Beware: output changes in this function depending on \code{ProvideClusterNames} in order to be congruent to prior code in a large varierity of other packages.
17 | 
18 | }
19 | \value{
20 | ProvideClusterNames==FALSE:
21 | 
22 | \item{RenamedCls}{The renamed classification. A vector of clusters, were the largest cluster is C1 and so forth}
23 | 
24 | ProvideClusterNames==TRUE: List V with
25 | 
26 | \item{RenamedCls}{The renamed classification. A vector of clusters, were the largest cluster is C1 and so forth}
27 | 
28 | \item{ClusterName}{[1:k,1:2] matrix of k new numbers and prior numbers}
29 | }
30 | \author{Michael Thrun, Alfred Ultsch}
31 | 
32 | \keyword{ClusterRenameDescendingSize}
33 | \concept{Descending Clustering}
34 | \concept{Rename Descending Cluster Size}
35 | 
36 | \examples{
37 | data('Lsun3D')
38 | Cls=Lsun3D$Cls
39 | #not desceending cluster numbers
40 | Cls[Cls==1]=543
41 | Cls[Cls==4]=1
42 | 
43 | # Now ordered per cluster size and descending
44 | ClusterRenameDescendingSize(Cls)
45 | }
46 | \seealso{
47 | \code{\link{ClusterNormalize}}
48 | }


--------------------------------------------------------------------------------
/man/ClusterCount.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterCount}
 2 | \alias{ClusterCount}
 3 | \title{ClusterCount}
 4 | \description{
 5 | Calulates statistics for clustering in each group of the data points
 6 | }
 7 | \usage{
 8 | ClusterCount(Cls,Ordered=TRUE,NonFinite=9999)
 9 | }
10 | \arguments{
11 |   \item{Cls}{1:n numerical vector of numbers defining the classification as the main output of the clustering algorithm for the n cases of data. It has k unique numbers representing the arbitrary labels of the clustering.}
12 |     \item{Ordered}{
13 |     Optional, boolean, if TRUE: the ouput is ordered increasingly by cluster labels in \code{UniqueClusters}.
14 |     }
15 |    \item{NonFinite}{
16 |     Optional, If non finite values are given in the numerical vector, they are set to the scalar value defined here}
17 |     
18 | }
19 | 
20 | \value{
21 | \item{UniqueClusters}{[1:k] numerical vector of the k unique clusters in Cls}
22 | \item{CountPerCluster}{Named vector [1:k] with the number of data points in the corresponding unique clusters. Names are the \code{UniqueClusters}}
23 | \item{NumberOfClusters}{The number of clusters k}
24 | \item{ClusterPercentages}{[1:k] numerical vector of the percentages of datapoints belonging to a cluster for each cluster}
25 | }
26 | \details{
27 | The ordering of the output is defined by the first occurence of every cluster label in \code{Cls} in the  setting of \code{Ordered=FALSE}. 
28 | 
29 | The function can be overloaded with non-numerical vectors. In this case, a cast via as.character() is applied to \code{Cls}, a warning is stated, and the statistics are still computed.
30 | }
31 | \author{Michael Thrun}
32 | 
33 | \examples{
34 | data('Hepta')
35 | Cls=Hepta$Cls
36 | ClusterCount(Cls)
37 | }
38 | 
39 | 
40 | \keyword{ClusterCount}
41 | \concept{Cluster Count}


--------------------------------------------------------------------------------
/man/ClusterRedefine.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterRedefine}
 2 | \alias{ClusterRedefine}
 3 | 
 4 | \title{
 5 | Redfines Clustering
 6 | }
 7 | \description{
 8 | Redfines some or all Clusters of Clustering such that the names of the numerical vectors are defined by 
 9 | }
10 | \usage{
11 | ClusterRedefine(Cls, NewLabels,OldLabels)
12 | }
13 | 
14 | \arguments{
15 |   \item{Cls}{
16 | 1:n numerical vector of numbers defining the classification as the main output of the clustering algorithm for the n cases of data. It has k unique numbers representing the arbitrary labels of the clustering.
17 | }
18 |   \item{NewLabels}{
19 |   [1:p], p<=k labels (identifiers) of clusters to be changed with
20 | }
21 |   \item{OldLabels}{
22 |   Optional,  [1:p], p<=k labels(identifiers) of clusters to be changed, default [1:k] unique cluster Ids of \code{Cls}
23 | }
24 | 
25 | }
26 | \details{
27 | The same ordering of \code{NewLabels} and  \code{OldLabels} is assumend, i.e., the mapping is defined by OldLabels[i] -> NewLabels[i] with \code{i} in [1:p].   \code{NewLabels} can also be a vector for strings, for example for plotting.
28 | }
29 | \value{
30 | Cls[1:n] numerical vector named after the row names of data
31 | }
32 | 
33 | \author{
34 | Michael Thrun
35 | }
36 | 
37 | \examples{
38 | data('Lsun3D')
39 | Cls=Lsun3D$Cls
40 | Data=Lsun3D$Data#
41 | #prior
42 | ClsNew=unique(Cls)+10
43 | #Redfined Clustering
44 | NewCls=ClusterRedefine(Cls,ClsNew)
45 | 
46 | table(Cls,NewCls)
47 | 
48 | #require(DataVisualizations)
49 | n=length(unique(Cls))
50 | NewCls=ClusterRedefine(Cls,LETTERS[1:n])
51 | #DataVisualizations package required
52 | if(requireNamespace("DataVisualizations"))
53 |   DataVisualizations::Classplot(Data[,1],Data[,2],
54 |   Cls,Names=NewCls,Plotter="ggplot",Size =1.5)
55 | 
56 | }
57 | 
58 | \keyword{ClusterRename}% use one of  RShowDoc("KEYWORDS")
59 | 


--------------------------------------------------------------------------------
/R/ConsensusClustering.R:
--------------------------------------------------------------------------------
 1 | ConsensusClustering=function(DataOrDistances,ClusterNo=NULL,PlotIt=FALSE,PlotConsensus=FALSE,...){
 2 |   # INPUT
 3 |   # DataOrDistances[1:n,1:d]     Data set with n observations and d features or distance matrix
 4 |   # ClusterNo         Number of clusters to search for
 5 |   #
 6 |   # OPTIONAL
 7 |   # PlotIt            Boolean. Decision to plot or not
 8 |   #
 9 |   # OUTPUT
10 |   # Cls[1:n]          Clustering of data
11 |   # Object            Object of ConsensusClustering
12 |   #
13 |   # Author: MT, 11/2020
14 |   if (!requireNamespace('ConsensusClusterPlus', quietly = TRUE)) {
15 |     message(
16 |       'Subordinate clustering package (ConsensusClusterPlus of Bioconductor) is missing. No computations are performed.
17 |             Please install the package which is defined in "Suggests".'
18 |     )
19 |     return(
20 |       list(
21 |         Cls = rep(1, nrow(DataOrDistances)),
22 |         Object = "Subordinate clustering package (ConsensusClusterPlus of Bioconductor) is missing.
23 |                 Please install the package which is defined in 'Suggests'."
24 |       )
25 |     )
26 |   }
27 |   
28 |   if (isSymmetric(unname(DataOrDistances))) {
29 |     DataOrDistances=as.dist(DataOrDistances)
30 |   }else{
31 |     message("Data Matrix max not work in Subordinate package due to strange error.")
32 |   }
33 |   
34 |   if(is.null(ClusterNo)){
35 |     PlotConsensus=NULL
36 |     CA=ConsensusClusterPlus::ConsensusClusterPlus(d = DataOrDistances,plot = PlotConsensus,...)
37 |     return(CA)
38 |   }else{
39 |     CA=ConsensusClusterPlus::ConsensusClusterPlus(d = DataOrDistances,maxK=ClusterNo,plot = PlotConsensus,...)
40 |     Cls=CA[[ClusterNo]]$consensusClass
41 |   }
42 |  
43 |   Cls=ClusterRename(Cls,DataOrDistances)
44 |   if(PlotIt){
45 |     ClusterPlotMDS(DataOrDistances,Cls)
46 |   }
47 |   return(list(Cls=Cls,Object=CA))
48 | }
49 | 


--------------------------------------------------------------------------------
/R/RandomForestClustering.R:
--------------------------------------------------------------------------------
 1 | RandomForestClustering=function(Data,ClusterNo,Type="ward.D2",NoTrees = 2000,PlotIt=FALSE,PlotForest=FALSE,...){
 2 |   if (!requireNamespace('randomForest')) {
 3 |     message(
 4 |       'Subordinate package (randomForest) is missing. No computations are performed.
 5 |             Please install the package which is defined in "Suggests".'
 6 |     )
 7 |     return(
 8 |       list(
 9 |         Cls = rep(1, nrow(Data)),
10 |         Object = "Subordinate package (randomForest) is missing.
11 |                 Please install the package which is defined in 'Suggests'."
12 |       )
13 |     )
14 |   }
15 |   if (!requireNamespace('cluster',quietly = TRUE)) {
16 |     message(
17 |       'Subordinate clustering package (cluster) is missing. No computations are performed.
18 |             Please install the package which is defined in "Suggests".'
19 |     )
20 |     return(
21 |       list(
22 |         Cls = rep(1, nrow(Data)),
23 |         Object = "Subordinate clustering package (cluster) is missing.
24 |                 Please install the package which is defined in 'Suggests'."
25 |       )
26 |     )
27 |   }
28 |   rf=randomForest::randomForest(x = Data, ntree = NoTrees, proximity = TRUE,type="unsupervised",...)
29 |   proximity=rf$proximity
30 |   Dissimilarity=max(proximity)-proximity #sqrt(1-proximity)
31 |   
32 |   if(Type=="PAM"){
33 |     CA=cluster::pam(x=Dissimilarity,diss = T,k=ClusterNo)
34 |     Cls=CA$clustering
35 |   }else{
36 |     CA=HierarchicalClusterDists(Dissimilarity,ClusterNo=ClusterNo,method=Type,Fast=FALSE)
37 |     Cls=CA$Cls
38 |   }
39 |   
40 |   Cls = ClusterRename(Cls, Data)
41 |   
42 |   if (PlotIt) {
43 |     ClusterPlotMDS(Data, Cls)
44 |   }
45 |   if (PlotForest) {
46 |     randomForest::MDSplot(rf, as.factor(Cls))
47 |   }
48 |   return(list(
49 |     Cls = Cls,
50 |     Object = list(rf=rf,CA=CA)
51 |   ))
52 | }
53 |   
54 |   


--------------------------------------------------------------------------------
/man/ClusterDendrogram.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterDendrogram}
 2 | \alias{ClusterDendrogram}
 3 | 
 4 | \title{
 5 | Cluster Dendrogram
 6 | }
 7 | \description{
 8 | Presents a dendrogram of a given tree using a colorsequence for the branches defined from the highest cluster size to the lowest cluster size.
 9 | }
10 | \usage{
11 | ClusterDendrogram(TreeOrDendrogram, ClusterNo, 
12 | 
13 | Colorsequence,main='Name of Algorithm')
14 | }
15 | 
16 | \arguments{
17 |   \item{TreeOrDendrogram}{
18 | Either object of hcclust defining the tree, third list element of hierarchical cluster algorithms of this package
19 | 
20 | or
21 | 
22 | Object of class dendrogram,  second list element of hierarchical cluster algorithms.
23 | }
24 |   \item{ClusterNo}{
25 | k number of clusters for cutree.
26 | }
27 |   \item{Colorsequence}{
28 | [1:k] character vector of colors, per default the colorsquence defined in the \pkg{DataVisualizations} is used
29 | }
30 | \item{main}{Title of plot}
31 | }
32 | \details{
33 | Reqires the package \pkg{dendextend} to work correctly.
34 | }
35 | \value{
36 | In mode invisible:
37 | 
38 | [1:n]  numerical vector defining the clustering of k clusters; this classification is the main output of the algorithm.
39 | }
40 | 
41 | \author{
42 | Michael Thrun
43 | }
44 | 
45 | \seealso{
46 | \code{\link{cutree}}, \code{\link{hclust}}
47 | }
48 | \examples{
49 | data(Lsun3D)
50 | listofh=HierarchicalClustering(Lsun3D$Data,0,'SingleL')
51 | Tree=listofh$Object
52 | #given colors are per default:
53 | #"magenta" "yellow"  "black"   "red"
54 | ClusterDendrogram(Tree, 4,main='Single Linkage Clustering')
55 | 
56 | listofh=HierarchicalClustering(Lsun3D$Data,4)
57 | ClusterCount(listofh$Cls)
58 | #c1 is magenta, c2 is red, c3 is yellow, c4 is black
59 | #because the order of the cluster sizes is
60 | #c1,c3,c4,c2
61 | }
62 | 
63 | \keyword{ClusterDendrogram}
64 | \keyword{Dendrogram}
65 | \concept{Cluster Dendrogram}


--------------------------------------------------------------------------------
/R/ClusterInterDistances.R:
--------------------------------------------------------------------------------
 1 | ClusterInterDistances=InterClusterDistances=function(FullDistanceMatrix,Cls,Names,PlotIt=FALSE){
 2 |   #
 3 |   # INPUT
 4 |   # FullDistanceMatrix    symmetric distance matrix
 5 |   # Cls                   numerical vector of k classes
 6 |   #
 7 |   # OPTIONAL
 8 |   # Names                 character vector naming k classes
 9 |   # PlotIt                Boolean
10 |   # 
11 |   # OUTPUT
12 |   # Matrix of k clusters, each columns consists of the distances between a cluster and all other clusters,
13 |   # filled up with NaN at the end to be of the same lenght as the complete distance matrix.
14 |   # 
15 |   #
16 |   u=sort(unique(Cls))
17 |   classdist=list(FullDistanceMatrix[upper.tri(FullDistanceMatrix,diag = F)])
18 |   if(length(u)==1) return(unlist(classdist))
19 |   # Does not work for clusters with one point
20 |   for(i in u){
21 |     classdistcur=FullDistanceMatrix[Cls==i,Cls!=i]
22 |     #if(i==1) print(classdistcur)
23 |     distvec=classdistcur[upper.tri(classdistcur,diag = F)]
24 |     classdist=c(classdist,list(distvec))
25 |   }
26 |   
27 |   if(requireNamespace("DataVisualizations",quietly = TRUE)){
28 |     xmat=do.call(DataVisualizations::CombineCols,classdist)
29 |   }
30 |   else{
31 |     stop('DataVisualizations package not loaded or installed.')
32 |   }
33 |   
34 |   
35 |   if(missing(Names)){
36 |     colnames(xmat)=c('Full',paste0('Cluster',u))
37 |   }else{
38 |     if(length(u)!=length(Names)){
39 |       warning('Length of Names has to be equal of length of unique Cls.')
40 |       colnames(xmat)=c('Full',paste0('Cluster',Names))
41 |     }else{
42 |       colnames(xmat)=c('Full',Names)
43 |     }
44 |   }
45 |   
46 |    if(PlotIt){
47 |       ggobject=DataVisualizations::MDplot(xmat,OnlyPlotOutput = TRUE)
48 |       print(ggobject)
49 |       return(list(ClusterDists=as.matrix(xmat),ggobject=ggobject))
50 |    }
51 | 	
52 |   return(as.matrix(xmat))
53 | }


--------------------------------------------------------------------------------
/man/ClusterEqualWeighting.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterEqualWeighting}
 2 | \alias{ClusterEqualWeighting}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | ClusterEqualWeighting
 6 | }
 7 | \description{
 8 | Weights clusters equally
 9 | }
10 | \usage{
11 | ClusterEqualWeighting(Cls, Data, MinClusterSize)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{Cls}{
16 | 1:n numerical vector of numbers defining the classification as the main output of the clustering algorithm for the n cases of data. It has k unique numbers representing the arbitrary labels of the clustering.
17 | }
18 |   \item{Data}{
19 | Optional, [1:n,1:d] matrix of dataset consisting of n cases of d-dimensional data points. Every case has d attributes, variables or features.
20 | }
21 |   \item{MinClusterSize}{
22 | Optional, scalar defining the number of cases m that each cluster should have
23 | }
24 | }
25 | \details{
26 | Balance clusters such that their sizes are the same by subsampling the larger cluster. If \code{MinClusterSize} is missing the number of cases per cluster is set to the smallest cluster size. For clusters sizes smaller than \code{MinClusterSize}, sampling with replacement is turned on, i.e. up sampling. For clusters sizes equal to \code{MinClusterSize}, no sampling is performed.
27 | }
28 | \value{
29 | List of
30 | \item{BalancedCls }{Vector of Cls such that all clusters have the same sizes spezified by \code{MinClusterSize}}
31 | \item{BalancedInd }{index such that BalancedCls = Cls[BalancedInd]}
32 | \item{BalancedData }{NULL if missing, otherwise, Data[BalancedInd,]}
33 | }
34 | 
35 | \author{
36 | Alfred Ultsch (matlab), reimplemented by Michael Thrun 
37 | }
38 | 
39 | 
40 | \examples{
41 | data(Hepta)
42 | ClusterEqualWeighting(Hepta$Cls,Hepta$Data,5)
43 | }
44 | 
45 | \keyword{ClusterEqualWeighting}
46 | \concept{Equal Weighting}
47 | \concept{Cluster Sampling}
48 | 


--------------------------------------------------------------------------------
/man/ClusterDistances.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterDistances}
 2 | \alias{ClusterIntraDistances}
 3 | \alias{IntraClusterDistances}
 4 | \alias{ClusterDistances}
 5 | 
 6 | \title{
 7 | ClusterDistances
 8 | }
 9 | \description{
10 | Computes intra-cluster distances which are the distance in-between each cluster.
11 | }
12 | \usage{
13 | ClusterDistances(FullDistanceMatrix, Cls,
14 | 
15 | Names, PlotIt = FALSE)
16 | 
17 | ClusterIntraDistances(FullDistanceMatrix, Cls,
18 | 
19 | Names, PlotIt = FALSE)
20 | }
21 | \arguments{
22 |   \item{FullDistanceMatrix}{
23 |   [1:n,1:n] symmetric distance matrix
24 | }
25 |   \item{Cls}{
26 |   [1:n] numerical vector of k classes
27 | }
28 |   \item{Names}{
29 |   Optional [1:k] character vector naming k classes
30 | }
31 |   \item{PlotIt}{
32 |   Optional, Plots if TRUE
33 | }
34 | }
35 | \details{
36 | Cluster distances are given back as a matrix, one column per cluster and the vector of the full distance matrix without the diagonal elements and the upper half of the symmetric matrix.
37 | Details and definitons can be found in [Thrun, 2021].
38 | }
39 | \value{
40 | Matrix [1:m,1:(k+1)] of k clusters, each columns consists of the distances in a cluster, filled up with NaN at the end to be of the same length as the vector of the upper triangle of the complete distance matrix.
41 | }
42 | \author{
43 | Michael Thrun
44 | }
45 | \references{
46 | [Thrun, 2021]  Thrun, M. C.: The Exploitation of Distance Distributions for Clustering, International Journal of Computational Intelligence and Applications, Vol. 20(3), pp. 2150016, DOI: \doi{10.1142/S1469026821500164}, 2021.
47 | }
48 | \seealso{
49 | \code{\link[DataVisualizations]{MDplot}}
50 | 
51 | \code{\link{ClusterInterDistances}}
52 | }
53 | \examples{
54 | data(Hepta)
55 | Distance=as.matrix(dist(Hepta$Data))
56 | 
57 | interdists=ClusterDistances(Distance,Hepta$Cls)
58 | }
59 | 
60 | \concept{intra cluster}
61 | \keyword{distances}
62 | \keyword{intracluster}


--------------------------------------------------------------------------------
/man/NeuralGasClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{NeuralGasClustering}
 2 | \alias{NeuralGasClustering}
 3 | \title{Neural gas algorithm for clustering}
 4 | 
 5 | \usage{
 6 | NeuralGasClustering(Data, ClusterNo,PlotIt=FALSE,...)
 7 | }
 8 | 
 9 | \description{
10 | Neural gas clustering published by [Martinetz et al., 1993]] and implemented by [Bodenhofer et al., 2011].
11 | 
12 | }
13 | \arguments{
14 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
15 | 
16 | \item{ClusterNo}{A number k which defines k different clusters to be built by the algorithm.}
17 | \item{PlotIt}{Default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
18 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
19 | }
20 | \value{
21 | List of
22 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
23 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
24 | }
25 | 
26 |  \examples{
27 | data('Hepta')
28 | out=NeuralGasClustering(Hepta$Data,ClusterNo=7,PlotIt=FALSE)
29 | }
30 | \author{Michael Thrun}
31 | 
32 | 
33 | 
34 | \references{
35 | [Dimitriadou, 2002]  Dimitriadou, E.: cclust-convex clustering methods and clustering indexes. R package, 2002,
36 | 
37 | [Martinetz et al., 1993]  Martinetz, T. M., Berkovich, S. G., & Schulten, K. J.: 'Neural-gas' network for vector quantization and its application to time-series prediction, IEEE Transactions on Neural Networks, Vol. 4(4), pp. 558-569. 1993.
38 | }
39 | 
40 | \keyword{NeuralGasClustering}% __ONLY ONE__ keyword per line
41 | \concept{Neural Gas}% __ONLY ONE__ keyword per line


--------------------------------------------------------------------------------
/R/PAMclustering.R:
--------------------------------------------------------------------------------
 1 | PAMclustering=PAMClustering=function(DataOrDistances,ClusterNo,PlotIt=FALSE,Standardization=TRUE,Data,...){
 2 |   # INPUT
 3 |   # Data[1:n,1:d]     Data set with n observations and d features
 4 |   # ClusterNo         Number of clusters to search for
 5 |   #
 6 |   # OPTIONAL
 7 |   # PlotIt            Boolean. Decision to plot or not
 8 |   # Standardization   Boolean.
 9 |   # 
10 |   # OUTPUT
11 |   # Cls[1:n]          Clustering of data
12 |   # Object            Object of cluster::pam algorithm
13 |   #
14 |   # Author: MT, 04/2018
15 |   if (!requireNamespace('cluster',quietly = TRUE)) {
16 |     message(
17 |       'Subordinate clustering package (cluster) is missing. No computations are performed.
18 |             Please install the package which is defined in "Suggests".'
19 |     )
20 |     return(
21 |       list(
22 |         Cls = rep(1, nrow(DataOrDistances)),
23 |         Object = "Subordinate clustering package (cluster) is missing.
24 |                 Please install the package which is defined in 'Suggests'."
25 |       )
26 |     )
27 |   }
28 |   
29 |   if(missing(DataOrDistances)){
30 |     DataOrDistances=Data
31 |   }
32 |   if(Standardization==1) Standardization=TRUE
33 |   if(Standardization==0) Standardization=FALSE
34 |       if (isSymmetric(unname(DataOrDistances))) {
35 |       Input = as.dist(DataOrDistances)
36 |       AnzVar = ncol(DataOrDistances)
37 |       AnzData = nrow(DataOrDistances)
38 | 	  diss =TRUE
39 |     }else{
40 | 	  Input=DataOrDistances
41 | 	  diss =FALSE
42 |     }
43 | 	
44 |   pam=cluster::pam(x=Input,k=ClusterNo,diss=diss,stand=Standardization,...)
45 |   Cls=pam$clustering
46 |   if(!is.null(rownames(DataOrDistances)))
47 |     names(Cls)=rownames(DataOrDistances)
48 |   else
49 |     names(Cls)=1:nrow(DataOrDistances)
50 |   
51 |   if(PlotIt){
52 | 		ClusterPlotMDS(DataOrDistances,Cls)
53 |   }
54 |   Cls=ClusterRename(Cls,DataOrDistances)
55 |   return(list(Cls=Cls,Object=pam))
56 | }


--------------------------------------------------------------------------------
/R/QTclustering.R:
--------------------------------------------------------------------------------
 1 | QTclustering=QTClustering <-function(Data,Radius,PlotIt=FALSE,...){
 2 |   # Cls=QTClustering(Data,Radius=2)
 3 |   #  
 4 |   # INPUT
 5 |   # Data[1:n,1:d]     Data set with n observations and d features
 6 |   # Radius            Maximum radius of clusters. If NULL, automatic estimation can be
 7 |   #                   done with [Thrun et al., 2016] if not otherwise set.
 8 |   # 
 9 |   # OPTIONAL
10 |   # PlotIt            Boolean. Decision to plot or not
11 |   #
12 |   # OUTPUT
13 |   # Cls[1:n]          Clustering of data
14 |   # Object            Object of flexclust::qtclust algorithm
15 |   #
16 |   # Author: MT 04/2018
17 | 
18 |   if (!requireNamespace('flexclust',quietly = TRUE)) {
19 |     message(
20 |       'Subordinate clustering package (flexclust) is missing. No computations are performed.
21 |             Please install the package which is defined in "Suggests".'
22 |     )
23 |     return(
24 |       list(
25 |         Cls = rep(1, nrow(Data)),
26 |         Object = "Subordinate clustering package (flexclust) is missing.
27 |                 Please install the package which is defined in 'Suggests'."
28 |       )
29 |     )
30 |   }
31 | 
32 |   if(is.null(Radius)){ #estimate Maximum diameter of cluster, i.e. group of large distances
33 |     if(requireNamespace("parallelDist",quietly = TRUE)){
34 |       Radius=EstimateRadiusByDistance(as.matrix(parallelDist::parallelDist(Data)))
35 |     }
36 |     else{
37 |       warning('parallelDist package not loaded or installed, using dist()')
38 |       Radius=EstimateRadiusByDistance(as.matrix(dist(Data)))
39 |     }
40 |   } 
41 |   obj=flexclust::qtclust(Data,Radius,...)
42 |   Cls=obj@cluster
43 |   Cls[!is.finite(Cls)]=0
44 |   if(!is.null(rownames(Data)))
45 |     names(Cls)=rownames(Data)
46 |   
47 |   if(PlotIt){
48 |     Cls2=Cls
49 |     Cls2[Cls2==0]=999
50 | 	ClusterPlotMDS(Data,Cls2)
51 |   }
52 |   Cls=ClusterRename(Cls,Data)
53 |   return(list(Cls=Cls,Object=obj))
54 | }


--------------------------------------------------------------------------------
/man/EntropyOfDataField.Rd:
--------------------------------------------------------------------------------
 1 | \name{EntropyOfDataField}
 2 | \alias{EntropyOfDataField}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Entropy Of a Data Field [Wang et al., 2011].
 6 | }
 7 | \description{
 8 | Calculates the Potential Entropy Of a Data Field for a given ranges of impact factors sigma
 9 | }
10 | \usage{
11 | EntropyOfDataField(Data, 
12 | 
13 | sigmarange = c(0.01, 0.1, 0.5, 1, 2, 5, 8, 10, 100)
14 | 
15 | , PlotIt = FALSE)
16 | }
17 | %- maybe also 'usage' for other objects documented here.
18 | \arguments{
19 |   \item{Data}{
20 | [1:n,1:d] data matrix
21 | }
22 |   \item{sigmarange}{
23 | Numeric vector [1:s] of relevant sigmas
24 | }
25 |   \item{PlotIt}{
26 | FALSE: disable plot, TRUE: Plot with upper boundary of H after [Wang et al., 2011].
27 | }
28 | }
29 | \details{
30 | In theory there should be a curve with a clear minimum of Entropy [Wang et al.,2011]. Then the choice for the impact factor sigma is the minimum of the entropy to define the correct data field. It follows, that the influence radius is 3/sqrt(2)*sigma (3B rule of gaussian distribution) for clustering algorithms like density peak clustering [Wang et al.,2011].
31 | }
32 | \value{
33 | [1:s] named vector of the Entropy of data field. The names are the impact factor sigma.
34 | }
35 | \references{
36 | [Wang et al., 2015] Wang, S., Wang, D., Li, C., & Li, Y.: Comment on" Clustering by fast search and find of density peaks", arXiv preprint arXiv:1501.04267, 2015.
37 | 
38 | [Wang et al., 2011]  Wang, S., Gan, W., Li, D., & Li, D.: Data field for hierarchical clustering, International Journal of Data Warehousing and Mining (IJDWM), Vol. 7(4), pp. 43-63. 2011.
39 | 
40 | 
41 | }
42 | \author{
43 | Michael Thrun
44 | }
45 | 
46 | 
47 | \examples{
48 | data(Hepta)
49 | H=EntropyOfDataField(Hepta$Data,PlotIt=FALSE)
50 | Sigmamin=names(H)[which.min(H)]
51 | Dc=3/sqrt(2)*as.numeric(names(H)[which.min(H)])
52 | }
53 | 
54 | \concept{data field}
55 | \keyword{entropy}
56 | \concept{data entropy}


--------------------------------------------------------------------------------
/man/StatPDEdensity.Rd:
--------------------------------------------------------------------------------
 1 | \name{StatPDEdensity}
 2 | \alias{StatPDEdensity}
 3 | 
 4 | \title{
 5 | Pareto Density Estimation
 6 | }
 7 | \description{
 8 | Density estimation for ggplot with a clear model behind it.
 9 | }
10 | 
11 | \format{
12 |   The format is:
13 | Classes 'StatPDEdensity', 'Stat', 'ggproto' <ggproto object: Class StatPDEdensity, Stat>
14 |     aesthetics: function
15 |     compute_group: function
16 |     compute_layer: function
17 |     compute_panel: function
18 |     default_aes: uneval
19 |     extra_params: na.rm
20 |     finish_layer: function
21 |     non_missing_aes: 
22 |     parameters: function
23 |     required_aes: x y
24 |     retransform: TRUE
25 |     setup_data: function
26 |     setup_params: function
27 |     super:  <ggproto object: Class Stat> 
28 | }
29 | \details{
30 | PDE was published in [Ultsch, 2005], short explanation in [Thrun, Ultsch 2018] and the PDE optimized violin plot was published in [Thrun et al., 2018].
31 | }
32 | 
33 | \references{
34 | [Ultsch,2005]  Ultsch, A.: Pareto density estimation: A density estimation for knowledge discovery, in  Baier, D.; Werrnecke, K. D., (Eds), Innovations in classification, data science, and information systems, Proc Gfkl 2003, pp 91-100, Springer, Berlin, 2005.
35 | 
36 | [Thrun, Ultsch 2018]   Thrun, M. C., & Ultsch, A. : Effects of the payout system of income taxes to municipalities in Germany, in Papiez, M. & Smiech,, S. (eds.), Proc. 12th Professor Aleksander Zelias International Conference on Modelling and Forecasting of Socio-Economic Phenomena, pp. 533-542, Cracow: Foundation of the Cracow University of Economics, Cracow, Poland, 2018. 
37 | 
38 | [Thrun et al, 2018]	Thrun, M. C., Pape, F., & Ultsch, A. : Benchmarking Cluster Analysis Methods using PDE-Optimized Violin Plots, Proc. European Conference on Data Analysis (ECDA), accepted, Paderborn, Germany, 2018. 
39 | 
40 | }
41 | 
42 | \keyword{density estimation}
43 | \concept{ggproto density estimation}
44 | \keyword{PDE}
45 | \concept{Pareto Density Estimation}


--------------------------------------------------------------------------------
/man/HierarchicalClusterDists.Rd:
--------------------------------------------------------------------------------
 1 | \name{HierarchicalClusterDists}
 2 | 
 3 | \alias{HierarchicalClusterDists}
 4 | 
 5 | \title{
 6 | Internal Function of Hierarchical Clustering with Distances
 7 | }
 8 | \usage{
 9 | HierarchicalClusterDists(pDist,ClusterNo=0,Type="ward.D2",
10 | 
11 | ColorTreshold=0,Fast=FALSE,\dots)
12 | }
13 | 
14 | \description{
15 | Please use \code{\link{HierarchicalClustering}}. Cluster analysis on a set of dissimilarities and methods for analyzing it. Uses stats package function 'hclust'.
16 | }
17 | 
18 | \arguments{
19 | \item{pDist}{Distances as either matrix [1:n,1:n] or dist object}
20 | \item{ClusterNo}{A number k which defines k different clusters to be built by the algorithm.}
21 | \item{Type}{Method of cluster analysis: "ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median" or "centroid".}
22 | \item{ColorTreshold}{Draws cutline w.r.t. dendogram y-axis (height), height of line as scalar should be given}
23 | \item{Fast}{If TRUE and fastcluster installed, then a faster implementation of the methods above can be used}
24 | \item{\dots}{In case of plotting further argument for \code{plot}, see \code{\link{as.dendrogram}}
25 | }
26 | }
27 | 
28 | \value{
29 | List of
30 | \item{Cls}{If, ClusterNo>0: [1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering. Otherwise for ClusterNo=0: NULL}
31 | \item{Dendrogram}{Dendrogram of hierarchical clustering algorithm}
32 | \item{Object}{Ultrametric tree of hierarchical clustering algorithm}
33 | }
34 | \author{Michael Thrun}
35 |  \examples{
36 | data('Hepta')
37 | #out=HierarchicalClusterDists(as.matrix(dist(Hepta$Data)),ClusterNo=7)
38 | }
39 | \keyword{Hierarchical}
40 | \keyword{Clustering}
41 | \keyword{Distances}
42 | 
43 | 
44 | \seealso{
45 | \code{\link{HierarchicalClusterData}}
46 | 
47 | \code{\link{HierarchicalClusterDists}}
48 | 
49 | \code{\link{HierarchicalClustering}}
50 | }
51 | 


--------------------------------------------------------------------------------
/man/kmeansdist.Rd:
--------------------------------------------------------------------------------
 1 | \name{kmeansDist}
 2 | \alias{kmeansDist}
 3 | \title{
 4 | k-means Clustering using a distance matrix
 5 | }
 6 | \description{
 7 | Perform k-means clustering on a distance matrix
 8 | }
 9 | \usage{
10 | kmeansDist(Distance, ClusterNo=2,Centers=NULL,
11 | 
12 | RandomNo=1,maxIt = 2000, 
13 | 
14 | PlotIt=FALSE,verbose = F)
15 | }
16 | \arguments{
17 |   \item{Distance}{ Distance matrix. For n data points of the dimension n x n   }
18 |   \item{ClusterNo}{A number k which defines k different clusters to be built by the algorithm.}
19 | 
20 |   \item{Centers}{Default(NULL) a set of initial (distinct) cluster centres.}
21 |   
22 |   \item{RandomNo}{If>1: Number of random initializations with searching for minimal SSE is defined by this scalar}
23 | 	
24 |   \item{maxIt}{Optional: Maximum number of iterations before the algorithm terminates.}
25 |     \item{PlotIt}{Default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
26 |   \item{verbose}{Optional: Algorithm always outputs current iteration.}
27 | }
28 | 
29 | \value{
30 |   \item{Cls[1:n] }{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
31 |   \item{centerids[1:k]}{Indices of the centroids from which the cluster Cls was created}
32 | }
33 | \author{
34 | Felix Pape, Michael Thrun
35 | }
36 | \note{
37 | Currently an experimental version
38 | }
39 |  \examples{
40 | data('Hepta')
41 | #out=kmeansDist(as.matrix(dist(Hepta$Data)),ClusterNo=7,PlotIt=FALSE,RandomNo = 10)
42 | 
43 | \dontrun{
44 | data('Leukemia')
45 | #as expected does not perform well
46 | #for non-spherical cluster structures:
47 | #out=kmeansDist(Leukemia$DistanceMatrix,ClusterNo=6,PlotIt=TRUE,RandomNo=10)
48 | }
49 | }
50 | 
51 | \keyword{kmeansClustering}
52 | \keyword{Clustering}
53 | \concept{kmeans Clustering}
54 | \keyword{distances}


--------------------------------------------------------------------------------
/man/MeanShiftClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{MeanShiftClustering}
 2 | \alias{MeanShiftClustering}
 3 | \title{Mean Shift Clustering}
 4 | \description{
 5 | Mean Shift Clustering of  [Cheng, 1995]	
 6 | }
 7 | \usage{
 8 | MeanShiftClustering(Data,
 9 | 
10 | PlotIt=FALSE,...)
11 | }
12 | 
13 | \arguments{
14 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
15 | 
16 | 
17 | \item{PlotIt}{Default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
18 | 
19 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
20 | }
21 | 
22 | \details{
23 | the radius used for search can be specified with the "\code{radius}" parameter. The maximum number of iterations before algorithm termination is controlled with the "\code{max_iterations}" parameter.
24 | 
25 | If the distance between two centroids is less than the given radius, one will be removed. A radius of 0 or less means an estimate will be calculated and used for the radius. Default value "0" (numeric).
26 | }
27 | \value{
28 | List of
29 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
30 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
31 | }
32 | 
33 |  \examples{
34 | data('Hepta')
35 | out=MeanShiftClustering(Hepta$Data,PlotIt=FALSE,radius=1)
36 | }
37 | \author{Michael Thrun}
38 | 
39 | \references{
40 | [Cheng, 1995]	Cheng, Yizong: Mean Shift, Mode Seeking, and Clustering, IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 17 (8), pp. 790-799, doi:10.1109/34.400568, 1995.
41 | }
42 | \keyword{MeanShiftClustering}
43 | \keyword{Clustering}
44 | \concept{Large Application Clusteringg}
45 | \keyword{clara}


--------------------------------------------------------------------------------
/R/Spectrum.R:
--------------------------------------------------------------------------------
 1 | Spectrum=function(Data,Type=2,ClusterNo=NULL,PlotIt=FALSE,Silent=TRUE,PlotResults=FALSE,...){
 2 |   # INPUT
 3 |   # Data[1:n,1:d]     Data set with n observations and d features
 4 |   # Type            Type = 1: Default eigengap method (Gaussian clusters)
 5 |   #                   Type = 2: multimodality gap method (Gaussian/ non-Gaussian clusters)
 6 |   #                   Type = 3: Allows to setClusterNo
 7 |   # ClusterNo         Number of clusters to search for
 8 |   #
 9 |   # OPTIONAL
10 |   # PlotIt            Boolean. Decision to plot or not
11 |   # Silent            Boolean. Show status messages.
12 |   # PlotResults       Boolean. Visualize results or not.
13 |   #
14 |   # OUTPUT
15 |   # Cls[1:n]          Clustering of data
16 |   # Object            Object of Spectrum::Spectrum algorithm
17 |   #
18 |   # Author: MT
19 |   if (!requireNamespace('Spectrum',quietly = TRUE)) {
20 |     message(
21 |       'Subordinate clustering package (Spectrum) is missing. No computations are performed.
22 |             Please install the package which is defined in "Suggests".'
23 |     )
24 |     return(
25 |       list(
26 |         Cls = rep(1, nrow(Data)),
27 |         Object = "Subordinate clustering package (Spectrum) is missing.
28 |                 Please install the package which is defined in 'Suggests'."
29 |       )
30 |     )
31 |   }
32 |  
33 |     
34 |   if(is.null(ClusterNo)){
35 |     out=Spectrum::Spectrum(t(Data),method = Type,silent = Silent,showres =PlotResults ,...)
36 |   }else{
37 |     if(ClusterNo==0) ClusterNo=NULL
38 |     warning("Spectrum:: curently there is the unresolved bug:  Error in rowSums(A2) : object 'A2' not found in the Subordinate clustering package.")
39 |     out=Spectrum::Spectrum(t(Data),fixk = ClusterNo,method = 3,ClusterNo,silent = Silent,showres =PlotResults,...)
40 |   }
41 |     
42 |   
43 |   Cls=out$assignments
44 |   
45 |   
46 |   if(PlotIt){
47 |     ClusterPlotMDS(Data,Cls)
48 |   }
49 |   Cls=ClusterRename(Cls,Data)
50 |   return(list(Cls=Cls,Object=out))
51 | }


--------------------------------------------------------------------------------
/man/ClusterInterDistances.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterInterDistances}
 2 | \alias{ClusterInterDistances}
 3 | \alias{InterClusterDistances}
 4 | 
 5 | \title{
 6 | Computes Inter-Cluster Distances
 7 | }
 8 | \description{
 9 | Computes inter-cluster distances which are the distance between each cluster and all other clusters
10 | }
11 | \usage{
12 | ClusterInterDistances(FullDistanceMatrix, Cls,
13 | 
14 | Names,PlotIt=FALSE)
15 | }
16 | 
17 | \arguments{
18 |   \item{FullDistanceMatrix}{
19 |   [1:n,1:n] symmetric distance matrix
20 | }
21 |   \item{Cls}{
22 | [1:n] numerical vector of numbers defining the classification as the main output of the clustering algorithm for the n cases of data. It has k unique numbers representing the arbitrary labels of the clustering.
23 | }
24 |   \item{Names}{
25 |   Optional [1:k] character vector naming k classes
26 | }
27 |   \item{PlotIt}{
28 |   Optional, Plots if TRUE
29 | }
30 | }
31 | \details{
32 | Cluster distances are given back as a matrix, one column per cluster and the vector of the full distance matrix without the diagonal elements and the upper half of the symmetric matrix. Details and definitons can be found in [Thrun, 2021].
33 | }
34 | \value{
35 | Matrix [1:m,1:(k+1)] of k clusters, each columns consists of the distances between a cluster and all other clusters, filled up with NaN at the end to be of the same lenght as the vector of the upper triangle of the complete distance matrix.
36 | }
37 | \author{
38 | Michael Thrun
39 | }
40 | \references{
41 | [Thrun, 2021]  Thrun, M. C.: The Exploitation of Distance Distributions for Clustering, International Journal of Computational Intelligence and Applications, Vol. 20(3), pp. 2150016, DOI: \doi{10.1142/S1469026821500164}, 2021.
42 | }
43 | \seealso{
44 | \code{\link[DataVisualizations]{MDplot}}
45 | 
46 | \code{ClusterDistances}
47 | }
48 | \examples{
49 | data(Hepta)
50 | Distance=as.matrix(dist(Hepta$Data))
51 | 
52 | interdists=ClusterInterDistances(Distance,Hepta$Cls)
53 | }
54 | 
55 | \concept{inter cluster}
56 | \keyword{distances}
57 | \keyword{intercluster}


--------------------------------------------------------------------------------
/R/HierarchicalClusterDists.R:
--------------------------------------------------------------------------------
 1 | HierarchicalClusterDists <-function(pDist,ClusterNo=0,Type="ward.D2",ColorTreshold=0,Fast=FALSE,...){
 2 |   # HierarchicalClusterDists(pDist)
 3 |   # HierarchicalClusterDists(pDist,0,"ward.D2",100)
 4 |   # Cls=HierarchicalClusterDists(pDist,6,"ward.D2")
 5 |   # 
 6 |   # Draws either a dendrogram or returns a class assignment
 7 |   # INPUT
 8 |   # pDist[1:n,1:d]    Dataset with n observations and d features or distance matrix with size n
 9 |   # ClusterNo        Number of clusters to search for. ClusterNo=0 means, that dendrogram will be used
10 |   #
11 |   # OPTIONAL
12 |   # Type			     Cluster method: "ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median" or "centroid".
13 |   # ColorTreshold		 Draws intersection at appropriate dendrogram y-axes (heigth), height of line is number
14 |   # Fast             Enables a fast computation.
15 |   # 
16 |   # OUTPUT
17 |   # Cls[1:n]          Clustering of data
18 |   # Dendrogram        
19 |   # hclustObject      Object of hclust algorithm
20 |   # 
21 |   # Author: MT
22 |   # Clustering
23 |   if(!inherits(pDist,'dist'))
24 |     pDist=as.dist(pDist)
25 |   
26 |   if(isTRUE(Fast)&requireNamespace('fastcluster')){
27 |     hc <- fastcluster::hclust(pDist,method=Type)
28 |   }else{
29 |     hc <- hclust(pDist,method=Type); #liefert teilweise andere Werte wie Z = linkage(Y,Type);
30 |   }
31 | 	m=paste(Type,"Clustering, N=",nrow(as.matrix(pDist)))
32 | 	
33 |   # Classification or Dendrogram
34 | 	if (ClusterNo>0){
35 | 	Cls=cutree(hc,ClusterNo)
36 | 		return(list(Cls=Cls,Dendrogram=as.dendrogram(hc),Object=hc))
37 | 	} 
38 | 	else{
39 | 		x=as.dendrogram(hc);plot(x, main=m,xlab="Number of Data Points N", ylab="Ultrametric Portion of Distance",sub=" ",leaflab ="none",...)
40 | 		axis(1,col="black",las=1)
41 | 		if (ColorTreshold!=0){
42 | 		  rect.hclust(hc, h=ColorTreshold,border="red")}		  
43 | 		else{
44 | 		  #rect.hclust(hc, h=4*mean(hc$height),border="red")
45 | 		}
46 | 		return(list(Cls=NULL,Dendrogram=x,Object=hc))
47 | 	}
48 | }
49 | 
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/man/pdfClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{pdfClustering}
 2 | \alias{pdfClustering}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Probability Density Distribution Clustering
 6 | }
 7 | \description{
 8 | Clustering via non parametric density estimation
 9 | }
10 | \usage{
11 | pdfClustering(Data, PlotIt = FALSE, ...)
12 | }
13 | 
14 | \arguments{
15 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
16 | \item{PlotIt}{Default: FALSE, if TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
17 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
18 | 
19 | }
20 | \details{
21 | Cluster analysis is performed by the density-based procedures described in Azzalini and Torelli (2007) and Menardi and Azzalini (2014), and summarized in Azzalini and Menardi (2014).
22 | }
23 | \value{
24 | List of
25 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
26 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
27 | }
28 | \references{
29 | Azzalini, A., Menardi, G. (2014). Clustering via nonparametric density estimation: the R package pdfCluster. Journal of Statistical Software, 57(11), 1-26, URL http://www.jstatsoft.org/v57/i11/.
30 | 
31 | Azzalini A., Torelli N. (2007). Clustering via nonparametric density estimation. Statistics and Computing. 17, 71-80.
32 | 
33 | Menardi, G., Azzalini, A. (2014). An advancement in clustering via nonparametric density estimation. Statistics and Computing. DOI: 10.1007/s11222-013-9400-x.
34 | }
35 | \author{
36 | Michael Thrun
37 | }
38 | 
39 |  \examples{
40 | data('Hepta')
41 | out=pdfClustering(Hepta$Data,PlotIt=FALSE)
42 | }
43 | \keyword{pdfClustering}
44 | \keyword{Clustering}


--------------------------------------------------------------------------------
/man/ClusterChallenge.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterChallenge}
 2 | \alias{ClusterChallenge}
 3 | 
 4 | \title{
 5 | Generates a Fundamental Clustering Challenge based on specific artificial datasets.
 6 | }
 7 | \description{
 8 | Lsun3D and FCPS datasets were introduced in various publications for a specific fixed size. This function generalizes them for any sample size.
 9 | }
10 | \usage{
11 | ClusterChallenge(Name,SampleSize,
12 | 
13 | PlotIt=FALSE,PointSize=1,Plotter3D="rgl",\dots)
14 | }
15 | \arguments{
16 |   \item{Name}{
17 | string, either 'Atom', 'Chainlink, 'EngyTime', 'GolfBall', 'Hepta', 'Lsun3D',  'Target'
18 |     'Tetra'
19 |     'TwoDiamonds'
20 |     'WingNut
21 | }
22 |   \item{SampleSize}{
23 | Size of Sample higher than 300, preferable above 500
24 | }
25 |   \item{PlotIt}{
26 | TRUE: Plots the challenge with \code{\link{ClusterPlotMDS}}
27 | }
28 |   \item{PointSize}{
29 | If PlotIt=TRUE: see \code{\link{ClusterPlotMDS}}
30 | }
31 |   \item{Plotter3D}{
32 | If PlotIt=TRUE: see \code{\link{ClusterPlotMDS}}
33 | }
34 |   \item{\dots}{
35 | If PlotIt=TRUE: further arguments for \code{\link{ClusterPlotMDS}}
36 | }
37 | }
38 | \details{
39 | A detailed description of the datasets can be found in [Thrun/Ultsch 2020]. Sampling works by combining Pareto Density Estimation with rejection sampling.
40 | }
41 | \value{
42 |  LIST, with
43 | \item{Name}{[1:SampleSize,1:d] data matrix}
44 | \item{Cls }{[1:SampleSize] numerical vector of classification}
45 | 
46 | }
47 | \references{
48 | [Thrun/Ultsch, 2020]  Thrun, M. C., & Ultsch, A.: Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems, Data in Brief, Vol. in press, pp. 105501, \doi{10.1016/j.dib.2020.105501}, 2020.
49 | }
50 | \author{
51 | Michael Thrun
52 | }
53 | 
54 | \examples{
55 | \dontshow{
56 | ClusterChallenge("Chainlink",500, PlotIt=FALSE)
57 | }
58 | \dontrun{
59 | ClusterChallenge("Chainlink",2000,PlotIt=TRUE)
60 | }
61 | }
62 | \seealso{
63 | \code{\link{ClusterPlotMDS}}
64 | }
65 | \concept{Generate Fundamental Clustering Problem}
66 | \keyword{FCPS}% __ONLY ONE__ keyword per line
67 | \keyword{Clustering}%
68 | \concept{Cluster Challenge}%


--------------------------------------------------------------------------------
/man/MSTclustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{MSTclustering}
 2 | \alias{MSTclustering}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | MST-kNN clustering algorithm [Inostroza-Ponta, 2008].
 6 | }
 7 | \description{
 8 | Performs the MST-kNN clustering algorithm which generate a clustering solution with automatic k determination using two proximity graphs: Minimal Spanning Tree (MST) and k-Nearest Neighbor (kNN) which are recursively intersected.
 9 | }
10 | \usage{
11 | MSTclustering(DataOrDistances, DistanceMethod = "euclidean",PlotIt=FALSE, \dots)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |  \item{DataOrDistances}{
16 | Either [1:n,1:n] symmetric distance matrix or [1:n,1:d] not symmetric data matrix of n cases and d variables
17 | }
18 | 
19 |   \item{DistanceMethod}{
20 | Optional distance method of data, default is euclid, see \code{\link[parallelDist]{parDist}} for details
21 | }
22 | \item{PlotIt}{Default: FALSE, if TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
23 |   \item{\dots}{
24 | Optional, further arguments for   \code{\link[mstknnclust]{mst.knn}}
25 | }
26 | }
27 | \details{
28 | Does not work on Hepta with euclidean distances.
29 | }
30 | \value{
31 | List of
32 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
33 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
34 | }
35 | \references{
36 | [Inostroza-Ponta, 2008]  Inostroza-Ponta, M.: An integrated and scalable approach based on combinatorial optimization techniques for the analysis of microarray data, University of Newcastle, ISBN, 2008}
37 | \author{
38 | Michael Thrun
39 | }
40 | 
41 | 
42 | \seealso{
43 |  \code{\link[mstknnclust]{mst.knn}}
44 | }
45 | \examples{
46 | data(Hepta)
47 | \donttest{
48 | MSTclustering(Hepta$Data)
49 | }
50 | }
51 | 
52 | \keyword{MSTclustering}
53 | \keyword{mst}
54 | 


--------------------------------------------------------------------------------
/R/FannyClustering.R:
--------------------------------------------------------------------------------
 1 | FannyClustering=function(DataOrDistances,ClusterNo,PlotIt=FALSE,Standardization=TRUE,...){
 2 |   #
 3 |   # INPUT
 4 |   # DataOrDistances[1:n,1:d]    Dataset with n observations and d features or distance matrix with size n
 5 |   # ClusterNo                   Number of clusters to search for
 6 |   # PlotIt                      Boolean. Decision to plot or not
 7 |   # Standardization             Boolean. Decision of use of standardization or not.
 8 |   #
 9 |   # OUTPUT
10 |   # Cls[1:n]          Clustering of data
11 |   # fanObject         Object of fanny algorithm
12 |   #
13 |   # Author: MT, 04/2018
14 |   if (!requireNamespace('cluster',quietly = TRUE)) {
15 |     message(
16 |       'Subordinate clustering package (cluster) is missing. No computations are performed.
17 |             Please install the package which is defined in "Suggests".'
18 |     )
19 |     return(
20 |       list(
21 |         Cls = rep(1, nrow(DataOrDistances)),
22 |         Object = "Subordinate clustering package (cluster) is missing.
23 |                 Please install the package which is defined in 'Suggests'."
24 |       )
25 |     )
26 |   }
27 |   
28 |   # if(missing(DataOrDistances)){
29 |   #   DataOrDistances=Data
30 |   # }
31 |   if(Standardization==1) Standardization=TRUE
32 |   if(Standardization==0) Standardization=FALSE
33 | 
34 |     if (unname(isSymmetric(DataOrDistances))) {
35 |       Input = as.dist(DataOrDistances)
36 |       AnzVar = ncol(DataOrDistances)
37 |       AnzData = nrow(DataOrDistances)
38 | 	  diss =TRUE
39 |     }else{
40 | 	  Input=DataOrDistances
41 | 	  diss =FALSE
42 |     }
43 |   
44 |   fan=cluster::fanny(Input,k=ClusterNo,diss=diss,stand=Standardization,...)
45 |   Cls=fan$clustering
46 |   if(!is.null(rownames(DataOrDistances)))
47 |     names(Cls)=rownames(DataOrDistances)
48 |   else
49 |     names(Cls)=1:nrow(DataOrDistances)
50 |   
51 |   Cls[!is.finite(Cls)]=0
52 |   if(isTRUE(PlotIt)){
53 |     Cls2=Cls
54 |     Cls2[Cls2==0]=999
55 | 	  ClusterPlotMDS(DataOrDistances,Cls2)
56 |   }
57 |     Cls=ClusterRename(Cls,DataOrDistances)
58 |   return(list(Cls=Cls,Object=fan))
59 | }


--------------------------------------------------------------------------------
/R/cluster_analysis_fun.R:
--------------------------------------------------------------------------------
 1 | cluster_analysis_fun=function(i,fun,DataOrDistances,ClusterNo,SetSeed=TRUE,...){
 2 |   #example
 3 |   # data(Hepta)
 4 |   # Distance=as.matrix(parallelDist::parallelDist(Hepta$Data))
 5 |   # out=cluster_analysis_fun(i = 1,fun = APclustering,DataOrDistances = Distance,ClusterNo = 7)
 6 |   if(isTRUE(SetSeed)){
 7 |     seedno=1000+i
 8 |     set.seed(seed = seedno)
 9 |     nndelta=paste0('Seed_',seedno)
10 |   }else{
11 |     nndelta=paste0(i)
12 |     set.seed(seed = NULL)
13 |   }
14 |   prior=Sys.time()
15 |   string=names(formals(fun))
16 |   
17 |   if(is.null(ClusterNo)){
18 |     if (isSymmetric(unname(DataOrDistances))) {
19 |       object=R.utils::doCall(fun, args=list(DataOrDistances=DataOrDistances,...),.ignoreUnusedArgs=TRUE)
20 |     }else{
21 |       if(string[1]=="Data")
22 |         object=R.utils::doCall(fun, args=list(Data=DataOrDistances,...),.ignoreUnusedArgs=TRUE)
23 |       else
24 |         object=R.utils::doCall(fun, args=list(DataOrDistances=DataOrDistances,...),.ignoreUnusedArgs=TRUE)
25 |     }
26 |     #object=fun(DataOrDistances,...)
27 |   }else{
28 |     if(isSymmetric(unname(DataOrDistances))) {
29 |       object=R.utils::doCall(fun,  args=list(DataOrDistancess=DataOrDistances,ClusterNo=ClusterNo,...),.ignoreUnusedArgs=TRUE)
30 |     }else{
31 |       if(string[1]=="Data")
32 |         object=R.utils::doCall(fun,  args=list(Data=DataOrDistances,ClusterNo=ClusterNo,...),.ignoreUnusedArgs=TRUE)
33 |       else
34 |         object=R.utils::doCall(fun,  args=list(DataOrDistances=DataOrDistances,ClusterNo=ClusterNo,...),.ignoreUnusedArgs=TRUE)
35 |     }
36 |     #object=fun(DataOrDistances,ClusterNo,...)
37 |   }
38 |   past=Sys.time()
39 |   delta=as.vector(as.numeric(difftime(past,prior,units = 'secs')))
40 |   names(delta)=nndelta
41 |   nn=names(object)
42 |   ind=which(nn=='Cls')
43 |   if(length(ind)==1){
44 |     Liste=list(Cls=object[[ind]],ComputationTime=delta,Seed=seedno)
45 |   }else{
46 |     warning('"Cls" object could not be found. Everything available is returned.')
47 |     Liste=list(Cls=object,ComputationTime=delta,Seed=seedno)
48 |   }
49 |   return(Liste)
50 | }#end help_fun


--------------------------------------------------------------------------------
/R/MSTclustering.R:
--------------------------------------------------------------------------------
 1 | MSTclustering=function(DataOrDistances,DistanceMethod="euclidean",PlotIt=FALSE,...){
 2 |   # INPUT
 3 |   # DataOrDistances[1:n,1:d]    Dataset with n observations and d features or distance matrix with size n
 4 |   # OPTIONAL
 5 |   # DistanceMethod     Choose distance metric.
 6 |   # PlotIt     Boolean. Decision to plot or not
 7 |   # 
 8 |   # OUTPUT
 9 |   # Cls[1:n]          Clustering of data
10 |   # Object            Object of mstknnclust::mst.knn algorithm
11 |   #
12 |   # Author: MT
13 |   if (!requireNamespace('mstknnclust',quietly = TRUE)) {
14 |     message(
15 |       'Subordinate clustering package (mstknnclust) is missing. No computations are performed.
16 |             Please install the package which is defined in "Suggests".'
17 |     )
18 |     return(
19 |       list(
20 |         Cls = rep(1, nrow(DataOrDistances)),
21 |         Object = "Subordinate clustering package (mstknnclust) is missing.
22 |                 Please install the package which is defined in 'Suggests'."
23 |       )
24 |     )
25 |   }
26 |   
27 |   if(!is.matrix(DataOrDistances)){
28 |     warning('DataOrDistances is not a matrix. Calling as.matrix()')
29 |     DataOrDistances=as.matrix(DataOrDistances)
30 |   }
31 |   if(!mode(DataOrDistances)=='numeric'){
32 |     warning('Data is not a numeric matrix. Calling mode(DataOrDistances)="numeric"')
33 |     mode(DataOrDistances)='numeric'
34 |   }
35 |   AnzData = nrow(DataOrDistances)
36 |   
37 |   if (!isSymmetric(unname(DataOrDistances))) {
38 |     if(requireNamespace("parallelDist",quietly = TRUE)){
39 |       Distances=as.matrix(parallelDist::parDist(DataOrDistances,method=DistanceMethod))
40 |     }else{
41 |       warning("Please install the parallelDist package, using dist()")
42 |       Distances=as.matrix(dist(DataOrDistances,method=DistanceMethod))
43 |     }
44 |   }else{
45 |     Distances=DataOrDistances
46 |   }
47 |   results <- mstknnclust::mst.knn(distance.matrix = Distances,...)
48 |   Cls=results$cluster
49 |   if(isTRUE(PlotIt)){
50 | 	ClusterPlotMDS(DataOrDistances,Cls)
51 |   }
52 |   Cls=ClusterRename(Cls,DataOrDistances)
53 |   return(list(Cls=Cls,Object=results))
54 | }


--------------------------------------------------------------------------------
/R/LargeApplicationClustering.R:
--------------------------------------------------------------------------------
 1 | LargeApplicationClustering <-function(Data,ClusterNo,PlotIt=FALSE,Standardization=TRUE,Samples=50,Random=TRUE,...){
 2 |   # Cls=LargeApplicationClustering(Data,ClusterNo=2)
 3 |   # Clustering Large Applications  (clara)
 4 |   #
 5 |   # INPUT
 6 |   # Data[1:n,1:d]     Data set with n observations and d features
 7 |   # ClusterNo         Number of clusters to search for
 8 |   # 
 9 |   # OPTIONAL
10 |   # PlotIt            Boolean. Decision to plot or not
11 |   # Standardization   Data is standardized before calculating the dissimilarities.
12 |   #                   Measurements are standardized for each variable (column), by subtracting the
13 |   #                   variable's mean value and dividing by the variable's mean absolute deviation.
14 |   # Samples           integer, say N, the number of samples to be drawn from the dataset. Default value
15 |   #                   set as recommended by documentation of cluster::clara.
16 |   # Random            logical indicating if R's random number generator should be used instead of the primitive clara()-builtin one.
17 |   #
18 |   # OUTPUT
19 |   # Cls[1:n]    Clustering of data
20 |   # Object      Object of cluster::clara algorithm
21 |   #
22 |   # Author: MT 04/2018
23 |   
24 |   if (!requireNamespace('cluster',quietly = TRUE)) {
25 |     message(
26 |       'Subordinate clustering package (cluster) is missing. No computations are performed.
27 |             Please install the package which is defined in "Suggests".'
28 |     )
29 |     return(
30 |       list(
31 |         Cls = rep(1, nrow(Data)),
32 |         Object = "Subordinate clustering package (cluster) is missing.
33 |                 Please install the package which is defined in 'Suggests'."
34 |       )
35 |     )
36 |   }
37 |   
38 |   if(Standardization==1) Standardization=TRUE
39 |   if(Standardization==0) Standardization=FALSE
40 |   
41 |   res=cluster::clara(x=Data,k = ClusterNo,samples=Samples,rngR=Random,stand=Standardization,...)
42 |   Cls=res$clustering
43 | 
44 |   if(PlotIt){
45 |     ClusterPlotMDS(Data,Cls)
46 |   }
47 |   	Cls=ClusterRename(Cls,Data)
48 |   return(list(Cls=Cls,Object=res))
49 | }


--------------------------------------------------------------------------------
/man/RandomForestClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{RandomForestClustering}
 2 | \alias{RandomForestClustering}
 3 | \alias{RandomForestClustering}
 4 | \title{
 5 | Random Forest Clustering
 6 | }
 7 | \description{
 8 | Clustering using the proximity matrix of random forest with either PAM or hierarchical clustering algorithms.
 9 | }
10 | \usage{
11 | RandomForestClustering(Data,ClusterNo,
12 | 
13 | Type="ward.D2",NoTrees = 2000,
14 | 
15 | PlotIt=FALSE,PlotForest=FALSE,\dots)
16 | }
17 | \arguments{
18 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features}
19 | \item{ClusterNo}{A number k which defines k different clusters to be built by the algorithm.}
20 | \item{Type}{Method of cluster analysis: "PAM", "ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median" or "centroid".}
21 | 
22 | \item{NoTrees}{A number of trees used in the forest}
23 | 
24 | \item{PlotIt}{Default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
25 | 
26 | \item{PlotForest}{Default: FALSE, If TRUE plots the forest}
27 | 
28 | \item{\dots}{Further arguments to be set for the random forest algorithm, if not set, default arguments are used.}
29 | }
30 | \details{
31 | Inspired by [Alhusain/Hafez, 2017].
32 | }
33 | \value{
34 | List of
35 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
36 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
37 | }
38 | \references{
39 | [Alhusain/Hafez, 2017]  Alhusain, L., & Hafez, A. M.: Cluster ensemble based on Random Forests for genetic data, BioData mining, Vol. 10(1), pp. 37. 2017.
40 | 
41 | }
42 | \author{
43 | Michael Thrun
44 | }
45 | \examples{
46 | data('Hepta')
47 | #out=RandomForestClustering(Hepta$Data,ClusterNo=7,PlotIt=FALSE)
48 | }
49 | \keyword{RandomForestClustering}
50 | \concept{Random Forest Clustering}
51 | \keyword{clustering}


--------------------------------------------------------------------------------
/man/CrossEntropyClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{CrossEntropyClustering}
 2 | \alias{CrossEntropyClustering}
 3 | \title{Cross-Entropy Clustering}
 4 | 
 5 | \usage{
 6 | CrossEntropyClustering(Data, ClusterNo,PlotIt=FALSE,...)
 7 | }
 8 | 
 9 | \description{
10 | Cross-entropy clustering published by [Tabor/Spurek, 2014] and implemented by [Spurek et al., 2017].
11 | 
12 | }
13 | \arguments{
14 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
15 | 
16 | \item{ClusterNo}{A number k which defines k different clusters to be built by the algorithm.}
17 | \item{PlotIt}{Default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
18 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
19 | }
20 | \value{
21 | List of
22 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
23 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
24 | }
25 | 
26 | \details{
27 | Contrary to most of the other implemented algorithms in this package, the results on the easiest clustering challenge of Hepta are unstable for cross-entropy clustering
28 | in the sense that the clustering is not always correct. Reproducibilty experiments should be performed (see [Tabor/Spurek, 2014]).
29 | }
30 |  \examples{
31 | data('Hepta')
32 | out=CrossEntropyClustering(Hepta$Data,ClusterNo=7,PlotIt=FALSE)
33 | }
34 | \author{Michael Thrun}
35 | 
36 | 
37 | 
38 | \references{
39 | [Spurek et al., 2017]  Spurek, P., Kamieniecki, K., Tabor, J., Misztal, K., & Śmieja, M.: R package cec, Neurocomputing, Vol. 237, pp. 410-413. 2017.
40 | 
41 | [Tabor/Spurek, 2014]  Tabor, J., & Spurek, P.: Cross-entropy clustering, Pattern Recognition, Vol. 47(9), pp. 3046-3059. 2014.
42 | 
43 | }
44 | 
45 | \keyword{CrossEntropyClustering}
46 | \concept{Cross-Entropy Clustering}
47 | \concept{Cross-Entropy}


--------------------------------------------------------------------------------
/R/HDDClustering.R:
--------------------------------------------------------------------------------
 1 | HDDClustering=function(Data, ClusterNo, PlotIt=F,...){
 2 |   # INPUT
 3 |   # Data[1:n,1:d]     Data set with n observations and d features
 4 |   # 
 5 |   # OPTIONAL
 6 |   # ClusterNo         Numeric vector of integers specifying the number of
 7 |   #                   clusters for which the BIC and the parameters are to be
 8 |   #                   calculated; the function keeps the parameters which
 9 |   #                   maximises the BIC. Note that the length of the vector K
10 |   #                   can't be larger than 20. Default is 1:10.
11 |   # PlotIt            Boolean. Default = FALSE = No plotting performed.
12 |   # ...               See HDclassif for more parameters.
13 |   # 
14 |   # OUTPUT
15 |   # Cls[1:n]          Clustering of data
16 |   # Object            Object of hddc.
17 |   #                   
18 |   #                   
19 |   # 
20 |   # Author: QS, 06/2021
21 |   if (!requireNamespace('HDclassif', quietly = TRUE)) {
22 |     message(
23 |       'Subordinate clustering package (HDclassif) is missing. No computations are performed.
24 |             Please install the package which is defined in "Suggests".'
25 |     )
26 |     return(
27 |       list(
28 |         Cls = rep(1, nrow(Data)),
29 |         Object = "Subordinate clustering package (HDclassif) is missing.
30 |                 Please install the package which is defined in 'Suggests'."
31 |       )
32 |     )
33 |   }
34 |   if(missing(Data)){
35 |     message('Variable Data is not given. Returning.')
36 |     return()
37 |   }
38 |   if(is.null(Data)){
39 |     message('Variable Data is not given. Returning.')
40 |     return()
41 |   }
42 |   if(!missing(ClusterNo))
43 |     Object = HDclassif::hddc(data=Data, K=ClusterNo, ...)
44 |   else
45 |     Object = HDclassif::hddc(data=Data, ...)
46 |   
47 |   #Cls = apply(Object$posterior, 1, which.max)
48 |   Cls=Object$class
49 |   Cls=ClusterRename(Cls,Data)
50 |   
51 |   if(PlotIt == TRUE){
52 |     FCPS::ClusterPlotMDS(Data, Cls, main = "Clustering",
53 |                          DistanceMethod = "euclidean", OutputDimension = 3,
54 |                          PointSize=1,Plotter3D="rgl", ...)
55 |   }
56 |   return(list("Cls"=Cls, "Object"=Object))
57 | }


--------------------------------------------------------------------------------
/man/GenieClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{GenieClustering}
 2 | \alias{GenieClustering}
 3 | \title{
 4 | Genie Clustering by Gini Index
 5 | }
 6 | \description{
 7 | Outlier Resistant Hierarchical Clustering Algorithm of [Gagolewski/Bartoszuk, 2016].
 8 | }
 9 | \usage{
10 | GenieClustering(DataOrDistances, ClusterNo = 0,
11 | DistanceMethod="euclidean", ColorTreshold = 0,\dots)
12 | }
13 | \arguments{
14 | \item{DataOrDistances}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features. Alternatively, symmetric [1:n,1:n] distance matrix}
15 | 
16 | \item{ClusterNo}{A number k which defines k different clusters to be build by the algorithm.}
17 | 
18 | \item{DistanceMethod}{See  \code{\link[parallelDist]{parDist}}, for example 'euclidean','mahalanobis','manhatten' (cityblock),'fJaccard','binary', 'canberra', 'maximum'. Any unambiguous substring can be given.}
19 | 
20 | \item{ColorTreshold}{Draws cutline w.r.t. dendogram y-axis (height), height of line as scalar should be given}
21 | \item{\dots}{furter argument to genie like:
22 | 
23 | \code{thresholdGini}  Single numeric value in [0,1], threshold for the Gini index, 1 gives the standard single linkage algorithm
24 | }
25 | }
26 | \details{
27 | Wrapper for Genie algorithm.
28 | }
29 | \value{
30 | List of
31 | \item{Cls}{If, ClusterNo>0: [1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering. Otherwise for ClusterNo=0: NULL}
32 | \item{Dendrogram}{Dendrogram of hierarchical clustering algorithm}
33 | \item{Object}{Ultrametric tree of hierarchical clustering algorithm}
34 | }
35 | \references{
36 | [Gagolewski/Bartoszuk, 2016]  Gagolewski M., Bartoszuk M., Cena A., Genie: A new, fast, and outlier-resistant hierarchical clustering
37 | algorithm, Information Sciences, Vol. 363, pp. 8-23, 2016.
38 | }
39 | \author{
40 | Michael Thrun
41 | }
42 | 
43 | \seealso{
44 | \code{\link{HierarchicalClustering}}
45 | }
46 |  \examples{
47 | data('Hepta')
48 | Clust=GenieClustering(Hepta$Data,ClusterNo=7)
49 | }
50 | \keyword{Hierarchical}
51 | \keyword{Clustering}
52 | \keyword{Agglomerative}
53 | 


--------------------------------------------------------------------------------
/man/ClusterDaviesBouldinIndex.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterDaviesBouldinIndex}
 2 | \alias{ClusterDaviesBouldinIndex}
 3 | 
 4 | \title{
 5 | Davies Bouldin Index
 6 | }
 7 | \description{
 8 | Internal (i.e. without prior classification) cluster quality measure called Davies Bouldin index for a given clustering published in [Davies/Bouldin, 1979].
 9 | }
10 | \usage{
11 | ClusterDaviesBouldinIndex(Cls, Data,\dots)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{Cls}{
16 | [1:n] numerical vector of numbers defining the classification as the main output of the clustering algorithm for the n cases of data. It has k unique numbers representing the arbitrary labels of the clustering.
17 | }
18 |   \item{Data}{
19 | 	matrix, [1:d,1:n] dataset of d variables and n cases
20 | }
21 | 
22 |     \item{\dots}{Further arguments passed on to the \code{\link[clusterSim]{index.DB}} function of \code{clusterSim}}
23 | }
24 | \details{
25 | Wrapper for \code{\link[clusterSim]{index.DB}}. Davies Bouldin index is defined in [Davies/Bouldin, 1979]. Best clustering scheme essentially minimizes the Davies-Bouldin index because it is defined as the function of the ratio of the within cluster scatter, to the between cluster separation.[Davies/Bouldin, 1979].
26 | }
27 | \value{
28 | List of 
29 |   \item{DaviesBouldinIndex}{
30 | scalar,Davies Bouldin index
31 | }
32 |   \item{Object}{
33 | further information stored in  \code{\link[clusterSim]{index.DB}}
34 | }
35 | }
36 | \references{
37 | [Davies/Bouldin, 1979]  Davies, D. L., & Bouldin, D. W.: A cluster separation measure, IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 1(2), pp. 224-227. doi 10.1109/TPAMI.1979.4766909, 1979.
38 | }
39 | \author{
40 | Michael Thrun
41 | }
42 | 
43 | \examples{
44 | data("Hepta")
45 | Cls=kmeansClustering(Hepta$Data,ClusterNo = 7,Type="Hartigan")$Cls
46 | ClusterDaviesBouldinIndex(Cls,Hepta$Data)[1]
47 | 
48 | \donttest{
49 | data("Hepta")
50 | ClsWellSeperated=kmeansClustering(Hepta$Data,ClusterNo = 7,Type="Steinley")$Cls
51 | ClusterDaviesBouldinIndex(ClsWellSeperated,Hepta$Data)[1]
52 | }
53 | }
54 | \keyword{DaviesBouldinIndex}
55 | \keyword{Davies}
56 | \keyword{Bouldin}
57 | \keyword{Clustering}
58 | \concept{Davies Bouldin Index}
59 | 
60 | 


--------------------------------------------------------------------------------
/man/FannyClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{FannyClustering}
 2 | \alias{FannyClustering}
 3 | 
 4 | \title{
 5 | Fuzzy Analysis Clustering [Rousseeuw/Kaufman, 1990, p. 253-279] 
 6 | }
 7 | \description{
 8 | ...
 9 | }
10 | \usage{
11 | FannyClustering(DataOrDistances,ClusterNo,
12 | 
13 | PlotIt=FALSE,Standardization=TRUE,...)
14 | }
15 | %- maybe also 'usage' for other objects documented here.
16 | \arguments{
17 | \item{DataOrDistances}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases or d-dimensional data points. Every case has d attributes, variables or features. Alternatively, symmetric [1:n,1:n] distance matrix}
18 | 
19 | \item{ClusterNo}{A number k which defines k different clusters to be build by the algorithm.}
20 | \item{PlotIt}{Default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
21 | \item{Standardization}{
22 | \code{DataOrDistances} is standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation. If \code{DataOrDistances} is already a distance matrix, then this argument will be ignored.
23 | }
24 | 
25 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
26 | }
27 | \details{
28 | ...
29 | }
30 | \value{
31 | List of
32 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.
33 | Points which cannot be assigned to a cluster will be reported with 0.
34 | }
35 | \item{Object}{Object defined by clustering algorithm as the second output of this algorithm}
36 | }
37 | \references{
38 |  [Rousseeuw/Kaufman, 1990]	Rousseeuw, P. J., & Kaufman, L.: Finding groups in data, Belgium, John Wiley & Sons Inc., ISBN: 0471735787, doi: 10.1002/9780470316801, Online ISBN: 9780470316801, 1990.
39 | 
40 | }
41 | \author{
42 | Michael Thrun
43 | }
44 | 
45 | \examples{
46 | data('Hepta')
47 | out=FannyClustering(Hepta$Data,ClusterNo=7,PlotIt=FALSE)
48 | }
49 | 
50 | \keyword{fanny}
51 | \concept{fuzzy clustering}
52 | 


--------------------------------------------------------------------------------
/R/ClusterEqualWeighting.R:
--------------------------------------------------------------------------------
 1 | ClusterEqualWeighting=function(Cls,Data,MinClusterSize){
 2 | # V = ClusterEqualWeighting(Cls)
 3 | # V = ClusterEqualWeighting(Cls,Data)
 4 | # balance clusters such that their sizes are the same by subsampling the larger cluster
 5 | #
 6 | # INPUTC
 7 | # Cls                  vector of cluster identifiers can be integers or
 8 | #                      NaN's, need not be consecutive nor positive
 9 | # OPTIONAL
10 | # Data
11 | #
12 | # OUTPUT
13 | # BalancedCls         Vector of Cls such that all classes have the same sizes
14 | # BalancedInd         index such that BalancedCls = Cls(BalancedInd);
15 | #
16 | # BalancedData = Data(BalancedInd,:)
17 | # author: ALU, reimplemented from matlab by mct  
18 |    if(length(Cls)!=nrow(Data)){
19 |       stop("ClusterEqualWeighting: length of Cls does not equal the number of cases in data.")
20 |    }
21 | V= ClusterCount(Cls)
22 | UniqueClusters=V$UniqueClusters
23 | CountPerCluster=V$CountPerCluster
24 | NumberOfClusters=V$NumberOfClusters
25 | 
26 | if(missing(MinClusterSize))
27 |   MinClusterSize = min(CountPerCluster,na.rm = T)
28 | 
29 | BalancedInd= c()
30 | for(i in 1:NumberOfClusters){
31 |    Current = UniqueClusters[i]                 # die fragliche klasse
32 |    ClusterInd = which(Cls==Current)              # index der Mitgieder in der Klasse
33 |    if(CountPerCluster[i] > MinClusterSize){     # Kuerzung notwendig
34 |     Ind = sample(1:CountPerCluster[i],size = MinClusterSize,replace = F)       # subsample
35 |     ClusterInd = ClusterInd[Ind]         # Kuerzung auf  MinClsAnz
36 |    } else if(CountPerCluster[i] == MinClusterSize){
37 |       Ind = 1:CountPerCluster[i]      # no sample
38 |       ClusterInd = ClusterInd[Ind]         # Kuerzung auf  MinClsAnz
39 |    }else{
40 |       Ind = sample(1:CountPerCluster[i],size = MinClusterSize - CountPerCluster[i],replace = T)       # subsample
41 |       Ind=c(1:CountPerCluster[i],Ind)
42 |       ClusterInd = ClusterInd[Ind]         # Kuerzung auf  MinClsAnz
43 |    }
44 | 
45 | BalancedInd= c(BalancedInd,ClusterInd)# Aufsammeln des index
46 | }; # for i
47 | BalancedCls = Cls[BalancedInd]
48 | 
49 | if(!missing(Data)) BalancedData = Data[BalancedInd,] else BalancedData=NULL
50 | 
51 | 
52 | return(list(BalancedCls=BalancedCls,BalancedInd=BalancedInd,BalancedData=BalancedData))
53 | }


--------------------------------------------------------------------------------
/man/MinimaxLinkageClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{MinimaxLinkageClustering}
 2 | \alias{MinimaxLinkageClustering}
 3 | \title{
 4 | Minimax Linkage Hierarchical Clustering
 5 | }
 6 | \description{
 7 | In the minimax linkage hierarchical clustering every cluster has an associated prototype element that represents that cluster [Bien/Tibshirani, 2011].
 8 | }
 9 | \usage{
10 | MinimaxLinkageClustering(DataOrDistances, ClusterNo = 0,
11 | DistanceMethod="euclidean", ColorTreshold = 0,\dots)
12 | }
13 | \arguments{
14 | \item{DataOrDistances}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases or d-dimensional data points. Every case has d attributes, variables or features. Alternatively, symmetric [1:n,1:n] distance matrix}
15 | 
16 | \item{ClusterNo}{A number k which defines k different clusters to be build by the algorithm.}
17 | 
18 | \item{DistanceMethod}{See  \code{\link[parallelDist]{parDist}}, for example 'euclidean','mahalanobis','manhatten' (cityblock),'fJaccard','binary', 'canberra', 'maximum'. Any unambiguous substring can be given.}
19 | 
20 | \item{ColorTreshold}{Draws cutline w.r.t. dendogram y-axis (height), height of line as scalar should be given}
21 | \item{\dots}{In case of plotting further argument for \code{plot}, see \code{\link{as.dendrogram}}
22 | }
23 | }
24 | 
25 | \value{
26 | List of
27 | \item{Cls}{If, ClusterNo>0: [1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering. Otherwise for ClusterNo=0: NULL}
28 | \item{Dendrogram}{Dendrogram of hierarchical clustering algorithm}
29 | \item{Object}{Ultrametric tree of hierarchical clustering algorithm}
30 | }
31 | \references{
32 | [Bien/Tibshirani, 2011] Bien, J., and Tibshirani, R.: Hierarchical Clustering with Prototypes via Minimax Linkage, The Journal of the American Statistical Association, Vol. 106(495), pp. 1075-1084, 2011.
33 | }
34 | \author{
35 | Michael Thrun
36 | }
37 | 
38 | \seealso{
39 | \code{\link{HierarchicalClustering}}
40 | }
41 |  \examples{
42 | data('Hepta')
43 | out=MinimaxLinkageClustering(Hepta$Data,ClusterNo=7)
44 | }
45 | \keyword{Hierarchical}% use one of  RShowDoc("KEYWORDS")
46 | \keyword{Clustering}% __ONLY ONE__ keyword per line
47 | \keyword{Agglomerative}% __ONLY ONE__ keyword per line
48 | \concept{Minimax Linkage}
49 | \keyword{Minimax}


--------------------------------------------------------------------------------
/man/sotaClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{SOTAclustering}
 2 | \alias{SOTAclustering}
 3 | \alias{sotaClustering}
 4 | \title{SOTA Clustering}
 5 | \description{
 6 | Self-organizing Tree Algorithm (SOTA) introduced by [Herrero et al., 2001].
 7 | }
 8 | \usage{
 9 | SOTAclustering(Data, ClusterNo,PlotIt=FALSE,UnrestGrowth,...)
10 | }
11 | 
12 | \arguments{
13 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
14 | 
15 | \item{ClusterNo}{A number k which defines k different clusters to be built by the algorithm.}
16 | \item{PlotIt}{Default: FALSE, if TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
17 | \item{UnrestGrowth}{
18 | TRUE: forces the \code{ClusterNo} option to uphold.
19 | FALSE: enables the algorithm to find its own number of clusters, in this cases ClusterNo should contain a high number because it is internally set as the number of iterations which is either reached or the max diversity criteria is satisfied priorly.
20 | }
21 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
22 | }
23 | \value{
24 | List of
25 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
26 | \item{sotaObject}{Object defined by clustering algorithm as the other output of this algorithm}
27 | }
28 | 
29 |  \examples{
30 | #Does Work
31 | data('Hepta')
32 | out=SOTAclustering(Hepta$Data,ClusterNo=7)
33 | table(Hepta$Cls,out$Cls)
34 | 
35 | \donttest{
36 | #Does not work well
37 | data('Lsun3D')
38 | out=SOTAclustering(Lsun3D$Data,ClusterNo=100,PlotIt=FALSE,UnrestGrowth=FALSE)
39 | }
40 | }
41 | \author{Luis Winckelmann*, Vasyl Pihur, Guy Brock, Susmita Datta, Somnath Datta}
42 | 
43 |  \note{
44 |  *Luis Winckelman intergrated several function from clValid because it's ORPHANED.
45 |  }
46 | \references{
47 | [Herrero et al., 2001]  Herrero, J., Valencia, A., & Dopazo, J.: A hierarchical unsupervised growing neural network for clustering gene expression patterns, Bioinformatics, Vol. 17(2), pp. 126-136. 2001.
48 | 
49 | }
50 | \keyword{SOTAclustering}
51 | \concept{Self-organizing Tree Algorithm}


--------------------------------------------------------------------------------
/R/ModelBasedClustering.R:
--------------------------------------------------------------------------------
 1 | ModelBasedClustering <-function(Data,ClusterNo=2,PlotIt=FALSE,...){
 2 |   # Cls <- MoGclustering(Data,ClusterNo);
 3 |   # call R's Model based clustering or MixtureOfGaussians (MoG) clustering
 4 |   #
 5 |   # INPUT
 6 |   # Data[1:n,1:d]     Data set with n observations and d features
 7 |   # ClusterNo         Number of clusters to search for
 8 |   #
 9 |   # OPTIONAL
10 |   # PlotIt            Boolean. Decision to plot or not
11 |   #
12 |   # OUTPUT
13 |   # Cls[1:n]          Clustering of data
14 |   # Object            Object of mclust::Mclust algorithm
15 |   #
16 |   # MT 2017
17 |   # Uebersicht/Kurz-Zfssg in  [Thrun, 2017, p. 23]
18 |   #
19 |   # [Thrun, 2017]  Thrun, M. C.:A System for Projection Based Clustering through Self-Organization and Swarm Intelligence, (Doctoral dissertation), Philipps-Universitaet Marburg, Marburg, 2017.
20 |   # Algorithmus from:  
21 |   # [Fraley/Raftery, 2002]  Fraley, C., & Raftery, A. E.: Model-based clustering, discriminant analysis, and density estimation, Journal of the American Statistical Association, Vol. 97(458), pp. 611-631. 2002.
22 |   # [Fraley/Raftery, 2006]  Fraley, C., & Raftery, A. E.MCLUST version 3: an R package for normal mixture modeling and model-based clustering,DTIC Document, 2006.
23 |   
24 |   if (!requireNamespace('mclust',quietly = TRUE)) {
25 |     message(
26 |       'Subordinate clustering package (mclust) is missing. No computations are performed.
27 |             Please install the package which is defined in "Suggests".'
28 |     )
29 |     return(
30 |       list(
31 |         Cls = rep(1, nrow(Data)),
32 |         Object = "Subordinate clustering package (mclust) is missing.
33 |                 Please install the package which is defined in 'Suggests'."
34 |       )
35 |     )
36 |   }
37 |   
38 |   if (ClusterNo<2){
39 |     warning("ClusterNo should be an integer > 2. Now, all of your data is in one cluster.")
40 |     if(is.null(nrow(Data))){# dann haben wir einen Vektor
41 |       return(cls <- rep(1,length(Data)))
42 |     }else{ # Matrix
43 |       return(cls <- rep(1, nrow(Data)))
44 |     }
45 |   }
46 |   
47 |   res=mclust::Mclust(Data,G=ClusterNo,modelNames=mclust::mclust.options("emModelNames"),...)
48 |   Cls=res$classification
49 |   if(PlotIt){
50 |     ClusterPlotMDS(Data,Cls)
51 |   }
52 |   Cls=ClusterRename(Cls,Data)
53 |   return(list(Cls=Cls,Object=res))
54 |   }


--------------------------------------------------------------------------------
/man/MinimalEnergyClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{MinimalEnergyClustering}
 2 | \alias{MinimalEnergyClustering}
 3 | \title{
 4 | Minimal Energy Clustering 
 5 | }
 6 | \description{
 7 | Hierchical Clustering using the minimal energy approach of [Szekely/Rizzo, 2005].
 8 | }
 9 | \usage{
10 | MinimalEnergyClustering(DataOrDistances, ClusterNo = 0,
11 | DistanceMethod="euclidean", ColorTreshold = 0,Data,\dots)
12 | }
13 | \arguments{
14 | \item{DataOrDistances}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features. Alternatively, symmetric [1:n,1:n] distance matrix}
15 | 
16 | \item{ClusterNo}{A number k which defines k different clusters to be build by the algorithm.}
17 | 
18 | \item{DistanceMethod}{See  \code{\link[parallelDist]{parDist}}, for example 'euclidean','mahalanobis','manhatten' (cityblock),'fJaccard','binary', 'canberra', 'maximum'. Any unambiguous substring can be given.}
19 | 
20 | \item{ColorTreshold}{Draws cutline w.r.t. dendogram y-axis (height), height of line as scalar should be given}
21 | \item{Data}{[1:n,1:d] data matrix in the case that \code{DataOrDistances} is missing and partial matching does not work.}
22 | \item{\dots}{In case of plotting further argument for \code{plot}, see \code{\link{as.dendrogram}}
23 | }
24 | }
25 | \value{
26 | List of
27 | \item{Cls}{If ClusterNo>0: [1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering. Otherwise ClusterNo=0: NULL}
28 | \item{Dendrogram}{Dendrogram of hierarchical clustering algorithm}
29 | \item{Object}{Ultrametric tree of hierarchical clustering algorithm}
30 | }
31 | \references{
32 | [Szekely/Rizzo, 2005]  Szekely, G. J. and Rizzo, M. L.: Hierarchical Clustering via Joint Between-Within Distances: Extending Ward's Minimum Variance Method, Journal of Classification, 22(2) 151-183.http://dx.doi.org/10.1007/s00357-005-0012-9, 2005.
33 | }
34 | \author{
35 | Michael Thrun
36 | }
37 | 
38 | \seealso{
39 | \code{\link{HierarchicalClustering}}
40 | }
41 |  \examples{
42 | data('Hepta')
43 | out=MinimalEnergyClustering(Hepta$Data,ClusterNo=7)
44 | }
45 | \keyword{Hierarchical}% use one of  RShowDoc("KEYWORDS")
46 | \keyword{Clustering}% __ONLY ONE__ keyword per line
47 | 
48 | \concept{Minimal Energy}
49 | \keyword{MinimalEnergy}


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: FCPS
 2 | Type: Package
 3 | Title: Fundamental Clustering Problems Suite
 4 | Version: 1.3.3
 5 | Date: 2023-05-28
 6 | Authors@R: c(person("Michael", "Thrun", email= "m.thrun@gmx.net",role=c("aut","cre","cph"), comment = c(ORCID = "0000-0001-9542-5543")),person("Peter", "Nahrgang",role=c("ctr","ctb")),person("Felix", "Pape",role=c("ctr","ctb")),person("Vasyl","Pihur", role=c("ctb")),person("Guy","Brock", role=c("ctb")),person("Susmita","Datta", role=c("ctb")),person("Somnath","Datta", role=c("ctb")),person("Luis","Winckelmann", role=c("com")),person("Alfred", "Ultsch",role=c("dtc","ctb")),person("Quirin", "Stier",role=c("ctb","rev")))
 7 | Maintainer: Michael Thrun <m.thrun@gmx.net>
 8 | Description: Over sixty clustering algorithms are provided in this package with consistent input and output, which enables the user to try out algorithms swiftly. Additionally, 26 statistical approaches for the estimation of the number of clusters as well as the mirrored density plot (MD-plot) of clusterability are implemented. The packages is published in Thrun, M.C., Stier Q.: "Fundamental Clustering Algorithms Suite" (2021), SoftwareX, <DOI:10.1016/j.softx.2020.100642>. Moreover, the fundamental clustering problems suite (FCPS) offers a variety of clustering challenges any algorithm should handle when facing real world data, see Thrun, M.C., Ultsch A.: "Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems" (2020), Data in Brief, <DOI:10.1016/j.dib.2020.105501>.
 9 | Imports: mclust, ggplot2, DataVisualizations
10 | Suggests: mlpack, kernlab, cclust, dbscan, kohonen, MCL, ADPclust, cluster, DatabionicSwarm, orclus, subspace, flexclust, ABCanalysis, apcluster, pracma,EMCluster, pdfCluster, parallelDist, plotly, ProjectionBasedClustering, GeneralizedUmatrix, mstknnclust, densityClust, parallel, energy, R.utils, tclust, Spectrum, genie, protoclust, fastcluster, clusterability, signal, reshape2, PPCI, clustrd, smacof, rgl,prclust, CEC, dendextend, moments,prabclus, VarSelLCM, sparcl, mixtools, HDclassif, clustvarsel, yardstick, knitr, rmarkdown, igraph, leiden,clustMixType, clusterSim, NetworkToolbox, randomForest, ConsensusClusterPlus, RWeka
11 | Depends: R (>= 3.5.0)
12 | License: GPL-3
13 | LazyData: TRUE
14 | LazyLoad: yes
15 | URL: https://www.deepbionics.org/
16 | BugReports: https://github.com/Mthrun/FCPS/issues
17 | Encoding: UTF-8
18 | VignetteBuilder: knitr
19 | SystemRequirements: Pandoc (>= 1.12.3)
20 | 


--------------------------------------------------------------------------------
/man/HierarchicalClusterData.Rd:
--------------------------------------------------------------------------------
 1 | \name{HierarchicalClusterData}
 2 | \alias{HierarchicalClusterData}
 3 | \alias{HierarchicalCluster}
 4 | 
 5 | \title{
 6 | Internal function of Hierarchical Clusterering of Data
 7 | }
 8 | \usage{
 9 | HierarchicalClusterData(Data,ClusterNo=0,
10 | 
11 | Type="ward.D2",DistanceMethod="euclidean",
12 | 
13 | ColorTreshold=0,Fast=FALSE,Cls=NULL,\dots)
14 | }
15 | 
16 | \description{
17 | Please use \code{\link{HierarchicalClustering}}. Hierarchical cluster analysis on a set of dissimilarities and methods for analyzing it. Uses stats package function 'hclust'.
18 | }
19 | \arguments{
20 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
21 | \item{ClusterNo}{A number k which defines k different clusters to be build by the algorithm.}
22 | \item{Type}{Methode der Clusterung: "ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median" or "centroid".}
23 | \item{DistanceMethod}{see  \code{\link[parallelDist]{parDist}}, for example 'euclidean','mahalanobis','manhatten' (cityblock),'fJaccard','binary', 'canberra', 'maximum'. Any unambiguous substring can be given.}
24 | \item{ColorTreshold}{Draws cutline w.r.t. dendrogram y-axis (height), height of line as scalar should be given}
25 | \item{Fast}{If TRUE and fastcluster installed, then a faster implementation of the methods above can be used}
26 | \item{Cls}{[1:n] classification vector for coloring of dendrogram in plot}
27 | \item{\dots}{In case of plotting further argument for \code{plot}, see \code{\link{as.dendrogram}}
28 | }
29 | }
30 | \value{
31 | List of
32 | \item{Cls}{If, ClusterNo>0: [1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering. Otherwise for ClusterNo=0: NULL}
33 | \item{Dendrogram}{Dendrogram of hierarchical clustering algorithm}
34 | \item{Object}{Ultrametric tree of hierarchical clustering algorithm}
35 | }
36 | \author{
37 | Michael Thrun
38 | }
39 | \seealso{
40 | \code{\link{HierarchicalClusterData}}
41 | 
42 | \code{\link{HierarchicalClusterDists}}
43 | 
44 | \code{\link{HierarchicalClustering}}
45 | }
46 |  \examples{
47 | data('Hepta')
48 | #out=HierarchicalClusterData(Hepta$Data,ClusterNo=7)
49 | }
50 | \keyword{Hierarchical}
51 | \keyword{Clustering}
52 | \keyword{Agglomerative}


--------------------------------------------------------------------------------
/R/GenieClustering.R:
--------------------------------------------------------------------------------
 1 | GenieClustering=function(DataOrDistances,ClusterNo=0,DistanceMethod="euclidean",ColorTreshold=0,...){
 2 |   # INPUT
 3 |   # DataOrDistances[1:n,1:d]    Dataset with n observations and d features or distance matrix with size n
 4 |   #
 5 |   # OPTIONAL
 6 |   # ClusterNo                   Number of clusters to search for
 7 |   # DistanceMethod              String. 'euclidean','mahalanobis','manhatten' (cityblock),'fJaccard','binary', 'canberra', 'maximum'
 8 |   # ColorTreshold               Number. Draws cutline w.r.t. dendogram y-axis (height), height of line as scalar should be given
 9 |   # 
10 |   # OUTPUT
11 |   # Cls[1:n]    Clustering of data
12 |   # hc          Object of hclust2 algorithm
13 |   #
14 |   # Author: MT
15 |   if (!requireNamespace('genie',quietly = TRUE)) {
16 |     message(
17 |       'Subordinate clustering package (genie) is missing. No computations are performed.
18 |             Please install the package which is defined in "Suggests".'
19 |     )
20 |     return(
21 |       list(
22 |         Cls = rep(1, nrow(DataOrDistances)),
23 |         Object = "Subordinate clustering package (genie) is missing.
24 |                 Please install the package which is defined in 'Suggests'."
25 |       )
26 |     )
27 |   }
28 |   
29 |   if (!isSymmetric(unname(DataOrDistances))) {
30 |     if(requireNamespace("parallelDist",quietly = TRUE)){
31 |       pDist=as.dist(parallelDist::parDist(DataOrDistances,method=DistanceMethod))
32 |     }
33 |     else{
34 |       stop('parallelDist package not loaded or installed.')
35 |     }
36 | 
37 |   }else if(!inherits(DataOrDistances,'dist')){
38 |     pDist=as.dist(DataOrDistances)
39 |   }else{
40 |     pDist=DataOrDistances
41 |   }
42 |   
43 |   #requireNamespace('genie')
44 |   hc <- genie::hclust2(pDist,...)
45 |   
46 |   m=paste("Genie Clustering/ "," N=",nrow(as.matrix(pDist)))
47 |   
48 |   # Classification or dendrogram
49 |   if (ClusterNo>0){
50 | 	  Cls=cutree(hc,ClusterNo)
51 |     Cls=ClusterRename(Cls,DataOrDistances)
52 |     return(list(Cls=Cls,Dendrogram=as.dendrogram(hc),Object=hc))
53 |   } 
54 |   else{
55 |     x=as.dendrogram(hc);plot(x, main=m,xlab="Number of data points N", ylab="Distance",sub=" ",leaflab ="none")
56 |     axis(1,col="black",las=1)
57 |     if (ColorTreshold!=0){
58 |       rect.hclust(hc, h=ColorTreshold,border="red")}		  
59 |     else{
60 |     }
61 |     return(list(Cls=NULL,Dendrogram=x,Object=hc))
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/man/ClusterUpsamplingMinority.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterUpsamplingMinority}
 2 | \alias{ClusterUpsamplingMinority}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Cluster Up Sampling using SMOTE for minority cluster
 6 | }
 7 | \description{
 8 | Wrapper for one specific internal function of L. Torgo who implemented there the relevant part of the SMOTE algorithm [Chawla et al., 2002].
 9 | }
10 | \usage{
11 | ClusterUpsamplingMinority(Cls, Data, MinorityCluster,
12 | 
13 | Percentage = 200, knn = 5, PlotIt = FALSE)
14 | }
15 | %- maybe also 'usage' for other objects documented here.
16 | \arguments{
17 |   \item{Cls}{
18 | 1:n numerical vector of numbers defining the classification as the main output of the clustering algorithm for the n cases of data. It has k unique numbers representing the arbitrary labels of the clustering.
19 | }
20 |   \item{Data}{
21 | [1:n,1:d] datamatrix of n cases and d features
22 | }
23 |   \item{MinorityCluster}{
24 | scalar defining the number of the cluster to be upsampeled
25 | }
26 |   \item{Percentage}{
27 | pecentage above 100 of who many samples should be taken
28 | }
29 |   \item{knn}{
30 | k nearest neighbors of SMOTE algorithm}
31 |   \item{PlotIt}{
32 | TRUE: plots the result using   \code{\link{ClusterPlotMDS}}
33 | }
34 | }
35 | \details{
36 |  the number of items \code{m} is defined by the scalar \code{Percentage} and the up sampling is combined with the \code{Data} and the \code{Cls} to  \code{DataExt} and \code{ClsExt} such that the sample is placed thereafter.
37 | }
38 | \value{
39 | List with 
40 |  \item{ClsExt }{1:(n+m) numerical vector of numbers defining the classification as the main output of the clustering algorithm for the n cases of data. It has k unique numbers representing the arbitrary labels of the clustering.}
41 | \item{DataExt }{[1:(n+m),1:d] datamatrix of n cases and d features}
42 | .
43 | }
44 | \references{
45 | [Chawla et al., 2002]  Chawla, N. V., Bowyer, K. W., Hall, L. O., & Kegelmeyer, W. P.: SMOTE: synthetic minority over-sampling technique, Journal of artificial intelligence research, Vol. 16, pp. 321-357. 2002.
46 | }
47 | \author{
48 | L. Torgo
49 | }
50 | 
51 | \examples{
52 | data(Lsun3D)
53 | Data=Lsun3D$Data
54 | Cls=Lsun3D$Cls
55 | table(Cls)
56 | 
57 | V=ClusterUpsamplingMinority(Cls,Data,4,1000)
58 | table(V$ClsExt)
59 | }
60 | 
61 | \keyword{SMOTE}
62 | \keyword{upsampling}
63 | \keyword{over-sampling}
64 | \concept{up sampling}
65 | \concept{over sampling}
66 | 


--------------------------------------------------------------------------------
/man/ClusterDunnIndex.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterDunnIndex}
 2 | \alias{ClusterDunnIndex}
 3 | 
 4 | \title{
 5 | Dunn Index
 6 | }
 7 | \description{
 8 | Internal (i.e. without prior classification) cluster quality measure called Dunn index for a given clustering published in [Dunn, 1974].
 9 | }
10 | \usage{
11 | ClusterDunnIndex(Cls,DataOrDistances,
12 | 
13 | DistanceMethod="euclidean",Silent=TRUE,Force=FALSE,\dots)
14 | }
15 | 
16 | \arguments{
17 |   \item{Cls}{
18 | [1:n] numerical vector of numbers defining the classification as the main output of the clustering algorithm for the n cases of data. It has k unique numbers representing the arbitrary labels of the clustering.
19 | }
20 |   \item{DataOrDistances}{
21 | 	matrix, DataOrDistance[1:n,1:n] symmetric matrix of dissimilarities, if variable unsymmetric 
22 | 	DataOrDistance[1:d,1:n] is assumed as a dataset and the euclidean distances are calculated of d variables and n cases
23 | }
24 | 
25 |   \item{DistanceMethod}{
26 | Optional, one of 39 distance methods of \code{parDist} of package parallelDist,  if Data matrix is chosen above
27 | }
28 |   \item{Silent}{
29 | TRUE: Warnings are shown
30 | }
31 |   \item{Force}{
32 | TRUE: force computing in case of numerical instability
33 | }
34 |     \item{\dots}{Further arguments passed on to the \code{parDist} function, e.g. user_defined distance functions}
35 | }
36 | \details{
37 | Dunn index is defined as \code{Dunn=min(InterDist)/max(IntraDist)}. Well seperated clusters have usually a dunn index above 1, for details please see [Dunn, 1974].
38 | }
39 | \value{
40 | List of 
41 |   \item{Dunn}{
42 | scalar, Dunn Index
43 | }
44 |   \item{IntraDist}{
45 | [1:k] numerical vector of minimal intra cluster distances per given cluster
46 | }
47 |   \item{InterDist}{
48 | [1:k] numerical vector of minimal inter cluster distances per given cluster
49 | }
50 | }
51 | \references{
52 | [Dunn, 1974]  Dunn, J. C.: Well_separated clusters and optimal fuzzy partitions, Journal of cybernetics, Vol. 4(1), pp. 95-104. 1974.
53 | }
54 | \author{
55 | Michael Thrun
56 | }
57 | 
58 | \examples{
59 | data("Hepta")
60 | Cls=kmeansClustering(Hepta$Data,ClusterNo = 7,Type="Hartigan")$Cls
61 | ClusterDunnIndex(Cls,Hepta$Data)
62 | 
63 | \donttest{
64 | data("Hepta")
65 | ClsWellSeperated=kmeansClustering(Hepta$Data,ClusterNo = 7,Type="Steinley")$Cls
66 | ClusterDunnIndex(ClsWellSeperated,Hepta$Data)
67 | }
68 | }
69 | \keyword{DunnIndex}
70 | \keyword{dunn}
71 | \keyword{Clustering}
72 | \concept{Dunn Index}
73 | 
74 | 


--------------------------------------------------------------------------------
/R/ClusterCount.R:
--------------------------------------------------------------------------------
 1 | ClusterCount <- function(Cls,Ordered=TRUE,NonFinite=9999) {
 2 |   # Calculates statistics for clustering
 3 |   # C <-ClusterCount(Cls)
 4 |   # UniqueClusters <-C$UniqueClusters
 5 |   # CountPerCluster <-C$CountPerCluster
 6 |   # NrOfClusters   <-C$NumberOfClusters
 7 |   # ClusterPercentages <-C$ClusterPercentages
 8 |   #
 9 |   # INPUT
10 |   # Cls[d]                          numeric vector such as Cls(i) == ClusterNumber of Data[i,] of point i
11 |   #
12 |   # OUTPUT list with:
13 |   # UniqueClusters[1:NrOfClusters]     NrOfClusters unique Clusters in Cls
14 |   # CountPerCluster(NrOfClusters,n)    CountPerCluster(i) is the Count of the data points in UniqueClusters(i)
15 |   # NumberOfClusters                   Number of Clusters
16 |   # ClusterPercentages                 Percentages of the Clusters
17 |   #
18 |   # Author MT
19 |   
20 |   if(!is.vector(Cls)){
21 |     warning('ClusterCount: Cls is not a vector. Calling as.numeric(as.character(Cls))')
22 |     Cls=as.numeric(as.character(Cls))
23 |   }
24 |   if(is.numeric(Cls)){
25 |     Cls[!is.finite(Cls)]=NonFinite
26 |   }else{
27 |     warning('ClusterCount: Cls is not numeric Calling as.character(Cls)')
28 |     Cls=as.vector(Cls)
29 |   }
30 |   if(isFALSE(Ordered)){
31 |     countPerCluster=table(Cls)
32 |     u= unique(Cls,fromLast = FALSE)
33 |     uniqueClusters = as.numeric(names(countPerCluster)) #order ist not as is!
34 |     ind=match(u,table = uniqueClusters)
35 |     uniqueClusters=uniqueClusters[ind]
36 |     countPerCluster=as.numeric(countPerCluster)[ind]
37 |   }else{
38 |     #radix: fasted sort of numeric
39 |     #rle, run length encoding, counts number of consecutive values
40 |     V=rle(sort(Cls,method="radix"))
41 |     countPerCluster=V$lengths
42 |     uniqueClusters=V$values
43 |   }
44 |   
45 |   numberOfClusters = length(uniqueClusters)
46 |   ClusterPercentages = as.numeric(prop.table(countPerCluster)*100)
47 |   
48 |   # Overview=cbind(
49 |   #   uniqueClusters[ind],
50 |   #   as.numeric(countPerCluster)[ind],
51 |   #   ClusterPercentages[ind]
52 |   # )
53 |   # if(isTRUE(Ordered)){
54 |   #   ind=order(Overview[,1],decreasing = FALSE,na.last = T)
55 |   #   Overview=Overview[ind,,drop=FALSE]
56 |   # }
57 |   names(countPerCluster)=uniqueClusters
58 |   return(
59 |     list(
60 |       UniqueClusters = uniqueClusters,
61 |       CountPerCluster = countPerCluster,
62 |       NumberOfClusters = numberOfClusters,
63 |       ClusterPercentages = ClusterPercentages
64 |     )
65 |   )
66 | }
67 | 


--------------------------------------------------------------------------------
/R/MinimaxLinkageClustering.R:
--------------------------------------------------------------------------------
 1 | MinimaxLinkageClustering=function(DataOrDistances,ClusterNo=0,DistanceMethod="euclidean",ColorTreshold=0,...){
 2 |   # INPUT
 3 |   # DataOrDistances[1:n,1:d]    Dataset with n observations and d features or distance matrix with size n
 4 |   # ClusterNo                   Number of clusters to search for
 5 |   #
 6 |   # OPTIONAL
 7 |   # DistanceMethod    Choose distance metric.
 8 |   # ColorTreshold     draws cutline w.r.t. dendogram y-axis (height), height of line as scalar should be given
 9 |   # 
10 |   # OUTPUT
11 |   # Cls[1:n]          Clustering of data
12 |   # Dendrogram
13 |   # Object            Object of protoclust::protoclust algorithm
14 |   #
15 |   # Author: MT
16 |   if (!requireNamespace('protoclust',quietly = TRUE)) {
17 |     message(
18 |       'Subordinate clustering package (protoclust) is missing. No computations are performed.
19 |             Please install the package which is defined in "Suggests".'
20 |     )
21 |     return(
22 |       list(
23 |         Cls = rep(1, nrow(DataOrDistances)),
24 |         Object = "Subordinate clustering package (protoclust) is missing.
25 |                 Please install the package which is defined in 'Suggests'."
26 |       )
27 |     )
28 |   }
29 |   
30 |   if (!isSymmetric(unname(DataOrDistances))) {
31 |     if(requireNamespace("parallelDist",quietly = TRUE)){
32 |       pDist=as.dist(parallelDist::parDist(DataOrDistances,method=DistanceMethod))
33 |     }
34 |     else{
35 |       warning("Please install the parallelDist package, using dist()")
36 |       pDist=dist(DataOrDistances,method=DistanceMethod)
37 |     }
38 |     
39 |   }else if(!inherits(DataOrDistances,'dist')){
40 |     pDist=as.dist(DataOrDistances)
41 |   }else{
42 |     pDist=DataOrDistances
43 |   }
44 |   
45 |   hc <- protoclust::protoclust(pDist,...)
46 |   
47 |   m=paste("Minimax Linkage Clustering/ "," N=",nrow(as.matrix(pDist)))
48 |   
49 |   # Classification or Dendrogram
50 |   if(ClusterNo>0){
51 |     out=protoclust::protocut(hc,ClusterNo)
52 | 	  Cls=out$cl
53 | 	  Cls=ClusterRename(Cls,DataOrDistances)
54 |     return(list(Cls=Cls,Dendrogram=as.dendrogram(hc),Object=out))
55 |   } 
56 |   else{
57 |     x=as.dendrogram(hc);plot(x, main=m,xlab="Number of data points N", ylab="Distance",sub=" ",leaflab ="none",...)
58 |     axis(1,col="black",las=1)
59 |     if (ColorTreshold!=0){
60 |       rect.hclust(hc, h=ColorTreshold,border="red")}		  
61 |     else{
62 |     }
63 |     return(list(Cls=NULL,Dendrogram=x,Object=hc))
64 |   }
65 | }


--------------------------------------------------------------------------------
/man/ModelBasedClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{ModelBasedClustering}
 2 | \alias{ModelBasedClustering}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Model Based Clustering
 6 | }
 7 | \description{
 8 | Calls Model based clustering of [Fraley/Raftery, 2006] which models a  Mixture Of Gaussians (MoG).
 9 | }
10 | \usage{
11 | ModelBasedClustering(Data,ClusterNo=2,PlotIt=FALSE,...)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
16 | 
17 | \item{ClusterNo}{A number k which defines k different clusters to be built by the algorithm.}
18 | \item{PlotIt}{Default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
19 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
20 | 
21 | }
22 | \details{
23 | see [Thrun, 2017, p. 23] or [Fraley/Raftery, 2002] and [Fraley/Raftery, 2006].
24 | }
25 | \value{
26 | List of
27 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
28 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
29 | }
30 | \references{
31 | [Thrun, 2017]  Thrun, M. C.:A System for Projection Based Clustering through Self-Organization and Swarm Intelligence, (Doctoral dissertation), Philipps-Universitaet Marburg, Marburg, 2017.
32 | 
33 | [Fraley/Raftery, 2002]  Fraley, C., and Raftery, A. E.: Model-based clustering, discriminant analysis, and density estimation, Journal of the American Statistical Association, Vol. 97(458), pp. 611-631. 2002.
34 | 
35 | [Fraley/Raftery, 2006]  Fraley, C., and Raftery, A. E.MCLUST version 3: an R package for normal mixture modeling and model-based clustering,DTIC Document, 2006.
36 | }
37 | \author{
38 | Michael Thrun
39 | }
40 |  \examples{
41 | data('Hepta')
42 | out=ModelBasedClustering(Hepta$Data,PlotIt=FALSE)
43 | }
44 | \note{MoGclustering used in [Thrun, 2017] was renamed to \code{\link{ModelBasedClustering}} in this package.}
45 | \seealso{
46 | \code{\link{MoGclustering}}
47 | }
48 | 
49 | \keyword{MixtureOfGaussians}
50 | \keyword{MoG}
51 | \concept{Model based clustering}
52 | \concept{Mixture Of Gaussians}


--------------------------------------------------------------------------------
/R/ClusterApply.R:
--------------------------------------------------------------------------------
 1 | ClusterApply <- function(DataOrDistances, FUN, Cls,Simple=FALSE,...){
 2 |   #
 3 |   # Applies a given function to each dimension for each cluster in Cls for all observations in the data.
 4 |   #
 5 |   # INPUT
 6 |   # DataOrDistances(n,d)    n cases, d variables
 7 |   # FUN                     Function to be applied
 8 |   # Cls(n)                  Cls(i) == ClusterNumber of Data(i,:)
 9 |   # 
10 |   # OUTPUT
11 |   # UniqueClasses    The  AnzCluster unique clusters in Cls
12 |   # FUNPerCluster    FUNPerCluster[i] is the result of FUN for the data points in Cluster UniqueClusters[i]
13 |   #
14 |   # 
15 |   if(!is.matrix(DataOrDistances)){
16 |     warning('ClusterApply: DataOrDistances is not a matrix. Calling as.matrix')
17 |     DataOrDistances=as.matrix(DataOrDistances)
18 |   }
19 |   if(mode(DataOrDistances)!="numeric"){
20 |     warning('ClusterApply: DataOrDistances is not numeric, setting mode to numeric.')
21 |     mode(DataOrDistances)="numeric"
22 |   }
23 |   if (isSymmetric(unname(DataOrDistances))) {
24 |     Data=internalMDSestimate(DataOrDistances)
25 |   }else{
26 |     Data=DataOrDistances
27 |   }	
28 |   if(missing(Cls)){
29 |     Cls=rep(1,nrow(Data))
30 |   }
31 |   if(is.list(Cls)){
32 |     warning('ClusterApply: Cls is a list. Calling unlist')
33 |     Cls=unlist(Cls)
34 |   }
35 |   if(!is.vector(Cls)){
36 |     warning('ClusterApply: Cls is not a vector. Calling as.vector')
37 |     Cls=as.vector(Cls)
38 |   }
39 |   Names=colnames(Data)
40 | 
41 |   
42 |   #Option 2
43 |   if(isFALSE(Simple)){
44 |   Liste=split(x = as.data.frame(Data),f = Cls)
45 |   uniqueClusters=names(Liste)
46 |   PerClusterV=lapply(Liste, function(x,FUN) apply(x,FUN=FUN,MARGIN = 2),FUN)
47 |   resultPerCluster=do.call(rbind,PerClusterV)
48 |   
49 |   if(!is.null(Names)){
50 |     try({
51 |       colnames(resultPerCluster)=Names
52 |     })
53 |   }
54 |    try({
55 |      
56 |      if(length(uniqueClusters)==nrow(resultPerCluster))
57 |         rownames(resultPerCluster)=uniqueClusters
58 |      else
59 |        rownames(resultPerCluster)=NULL
60 |     })
61 | 
62 |   V=list(ResultPerCluster = resultPerCluster,UniqueClusters = uniqueClusters)
63 |   
64 |   tryCatch({
65 |     string=as.character(substitute(FUN))
66 |     names(V)=c('UniqueClusters',paste0(string,'PerCluster'))
67 |   },error=function(e){
68 |     message('ClusterApply: FUN could not be extracted because:')
69 |     message(e)
70 |   })
71 |   }else{
72 |     V=apply(Data,2,function(X,...) tapply(X, Cls, FUN = FUN,...))
73 |   }
74 |   return(V)
75 | }


--------------------------------------------------------------------------------
/R/DivisiveAnalysisClustering.R:
--------------------------------------------------------------------------------
 1 | DivisiveAnalysisClustering <-function(DataOrDistances,ClusterNo,PlotIt=FALSE,Standardization=TRUE,PlotTree=FALSE,Data,...){
 2 |   # Cls=DivisiveAnalysisClustering(Data,ClusterNo=2)
 3 |   # DivisiveAnalysisClustering (diana)
 4 |   #
 5 |   # INPUT
 6 |   # DataOrDistances[1:n,1:d]    Dataset with n observations and d features or distance matrix with size n
 7 |   # ClusterNo                   Number of clusters to search for
 8 |   # PlotIt                      Boolean. Decision to plot or not
 9 |   # Standardization             Boolean. Decision of use of standardization.
10 |   # 
11 |   # OUTPUT
12 |   # Cls[1:n]          Clustering of data
13 |   # dianaObject       Object of sota Alorithm
14 |   # 
15 |   # Author: MT 04/2018
16 |   if (!requireNamespace('cluster',quietly = TRUE)) {
17 |     message(
18 |       'Subordinate clustering package (cluster) is missing. No computations are performed.
19 |             Please install the package which is defined in "Suggests".'
20 |     )
21 |     return(
22 |       list(
23 |         Cls = rep(1, nrow(DataOrDistances)),
24 |         Object = "Subordinate clustering package (cluster) is missing.
25 |                 Please install the package which is defined in 'Suggests'."
26 |       )
27 |     )
28 |   }
29 |   
30 |   if(missing(DataOrDistances)){
31 |     DataOrDistances=Data
32 |   }
33 |   if(Standardization==1) Standardization=TRUE
34 |   if(Standardization==0) Standardization=FALSE
35 | 
36 |   if (isSymmetric(unname(DataOrDistances))) {
37 |       Input = as.dist(DataOrDistances)
38 |       AnzVar = ncol(DataOrDistances)
39 |       AnzData = nrow(DataOrDistances)
40 | 	  diss =TRUE
41 |     }else{
42 | 	  Input=DataOrDistances
43 | 	  diss =FALSE
44 |     }
45 | 	
46 |   res=cluster::diana(x=Input,diss =diss,stand=Standardization,...)
47 |   Dendrogram=as.dendrogram(as.hclust(res))
48 |   if(length(ClusterNo)!=1){
49 |     stop('ClusterNo has to be a numerical number not a vector of length higher than 1 or another object.')
50 |   }
51 |   if(ClusterNo>0){
52 |     Cls=cutree(as.hclust(res), k = ClusterNo)
53 |     
54 |     if(PlotIt){
55 | 		ClusterPlotMDS(DataOrDistances,Cls)
56 |     }
57 |   }
58 |   if(isTRUE(PlotTree))
59 |     ClusterDendrogram(Dendrogram,ClusterNo = ClusterNo,main='DIANA')
60 |   
61 |   if(ClusterNo<=0){
62 |     Cls=NULL
63 |     if(ClusterNo<0){
64 |       warning(('ClusterNo cannot be a negativ number'))
65 |     }
66 |   }
67 |   Cls=ClusterRename(Cls,DataOrDistances)
68 |   return(list(Cls=Cls,Dendrogram=Dendrogram,Object=res))
69 | }


--------------------------------------------------------------------------------
/R/ProjectionPursuitClustering.R:
--------------------------------------------------------------------------------
 1 | ProjectionPursuitClustering=function(Data,ClusterNo,Type="MinimumDensity",PlotIt=FALSE,PlotSolution=FALSE,...){
 2 |   # INPUT
 3 |   # Data[1:n,1:d]     Data set with n observations and d features
 4 |   # ClusterNo         Number of clusters to search for
 5 |   # 
 6 |   # OPTIONAL
 7 |   # Type              Either MinimumDensity, MaximumClusterbility or NormalisedCut.
 8 |   # PlotIt            Boolean. Decision to plot or not
 9 |   # PlotSolution      Plots the partioning solution as a tree as described in 
10 |   #
11 |   # OUTPUT
12 |   # Cls[1:n]          Clustering of data
13 |   # Object            Object of PPCI::ncuth algorithm
14 |   #
15 |   # Author: MT, 04/2020
16 |   if (!requireNamespace('PPCI',quietly = TRUE)) {
17 |     message(
18 |       'Subordinate clustering package (PPCI) is missing. No computations are performed.
19 |             Please install the package which is defined in "Suggests".'
20 |     )
21 |     return(
22 |       list(
23 |         Cls = rep(1, nrow(Data)),
24 |         Object = "Subordinate clustering package (PPCI) is missing.
25 |                 Please install the package which is defined in 'Suggests'."
26 |       )
27 |     )
28 |   }
29 |     switch(Type,
30 |       'MinimumDensity'={
31 |         out=PPCI::mddc(X=Data,K=ClusterNo,...)
32 |         },
33 |       'MaximumClusterbility'={out=PPCI::mcdc(X=Data,K=ClusterNo,...)},
34 |       'NormalisedCut'={out=PPCI::ncutdc(X=Data,K=ClusterNo,...)},
35 |       'KernelPCA'={
36 |         if(!missing(ClusterNo)){
37 |           message('ProjectionPursuitClustering of type KernelPCA does not require "ClusterNo" and will determine the number of clusters automatically.')
38 |         }
39 |         
40 |         if(requireNamespace("kernlab",quietly = TRUE)){
41 |           x2=kernlab::kpca(Data,kernel="rbfdot",kpar=list(sigma=3))@rotated
42 |         }
43 |         else{
44 |           stop('kernlab package not loaded or installed.')
45 |         }
46 |         out=PPCI::ncuth(x2,...) 
47 |       },{
48 |         warning('Incorrect option selected')
49 |         return('Incorrect option selected')
50 |       }
51 |     )
52 |   #  out=out
53 |    
54 |     Cls=out$cluster
55 | 
56 |     if(!is.null(rownames(Data)))
57 |       names(Cls)=rownames(Data)
58 |     else
59 |       names(Cls)=1:nrow(Data)
60 |     
61 |     if(isTRUE(PlotIt)){
62 |        ClusterPlotMDS(Data,Cls)
63 |     }
64 |     if(isTRUE(PlotSolution)){
65 |       plot(out)
66 |     }
67 | 	Cls=ClusterRename(Cls,Data)
68 |     return(list(Cls=Cls,Object=out))
69 |   }


--------------------------------------------------------------------------------
/man/ADPclustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{ADPclustering}
 2 | \alias{ADPclustering}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | (Adaptive) Density Peak Clustering algorithm using automatic parameter selection
 6 | }
 7 | \description{
 8 | The algorithm was introduced in [Rodriguez/Laio, 2014] and here implemented by [Wang/Xu, 2017]. The algorithm is adaptive in the sense that only \code{ClusterNo} has to be set instead of the paramters of [Rodriguez/Laio, 2014] implemented in \code{\link{ADPclustering}}.
 9 | }
10 | \usage{
11 | ADPclustering(Data,ClusterNo=NULL,PlotIt=FALSE,...)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
16 | \item{ClusterNo}{Optional, either: A number k which defines k different Clusters to be build by the algorithm, or a range of \code{ClusterNo} to let the algorithm choose from.}
17 | 
18 | \item{PlotIt}{default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
19 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
20 | }
21 | \details{
22 | The ADP algorithm decides the k number of clusters. This is contrary to the other version of the algorithm from another package which can be called with \code{\link{DensityPeakClustering}}.
23 | }
24 | \value{
25 | List of
26 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
27 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
28 | }
29 | \references{
30 | [Rodriguez/Laio, 2014]  Rodriguez, A., & Laio, A.: Clustering by fast search and find of density peaks, Science, Vol. 344(6191), pp. 1492-1496. 2014.
31 | 
32 | [Wang/Xu, 2017]  Wang, X.-F., & Xu, Y.: Fast clustering using adaptive density peak detection, Statistical methods in medical research, Vol. 26(6), pp. 2800-2811. 2017.}
33 | \author{
34 | Michael Thrun
35 | }
36 | \seealso{
37 | \code{\link{DensityPeakClustering}}
38 | 
39 | \code{\link[ADPclust]{adpclust}}
40 | }
41 | \examples{
42 | data('Hepta')
43 | out=ADPclustering(Hepta$Data,PlotIt=FALSE)
44 | }
45 | 
46 | \keyword{ADPclustering}
47 | \concept{fast search and find of density peaks}
48 | 


--------------------------------------------------------------------------------
/man/SpectralClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{SpectralClustering}
 2 | \alias{SpectralClustering}
 3 | \title{ Spectral Clustering }
 4 | 
 5 | \usage{
 6 | SpectralClustering(Data, ClusterNo,PlotIt=FALSE,...)
 7 | }
 8 | \arguments{
 9 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
10 | 
11 | \item{ClusterNo}{A number k which defines k different clusters to be built by the algorithm.}
12 | \item{PlotIt}{default: FALSE, if TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
13 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.
14 | e.g.:
15 | 
16 | \code{kernel} : Kernelmethod, possible options: rbfdot          Radial Basis kernel function "Gaussian" polydot         Polynomial kernel function vanilladot      Linear kernel function tanhdot         Hyperbolic tangent kernel function laplacedot      Laplacian kernel function besseldot       Bessel kernel function anovadot        ANOVA RBF kernel function splinedot       Spline kernel stringdot       String kernel
17 | 
18 | \code{kpar} : Kernelparameter: a character string or the list of hyper-parameters (kernel parameters). The default character string "automatic" uses a heuristic to determine a suitable value for the width parameter of the RBF kernel. "local" (local scaling) uses a more advanced heuristic and sets a width parameter for every point in the data set. A list can also be used containing the parameters to be used with the kernel function.
19 | }
20 | }
21 | \value{
22 | List of
23 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
24 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
25 | }
26 | 
27 | 
28 | 
29 | \description{
30 |  Clusters the Data into "ClusterNo" different clusters using the Spectral Clustering method}
31 |  
32 |  \examples{
33 | data('Hepta')
34 | out=SpectralClustering(Hepta$Data,ClusterNo=7,PlotIt=FALSE)
35 | }
36 | 
37 | \author{Michael Thrun}
38 | 
39 | \references{
40 | [Ng et al., 2002]  Ng, A. Y., Jordan, M. I., & Weiss, Y.: On spectral clustering: Analysis and an algorithm, Advances in neural information processing systems, Vol. 2, pp. 849-856. 2002.
41 | }
42 | \keyword{SpectralClustering}
43 | \concept{Spectral Clustering}


--------------------------------------------------------------------------------
/R/ClusterShannonInfo.R:
--------------------------------------------------------------------------------
 1 | ClusterShannonInfo=function(ClsMatrix){
 2 | # Info =  ShannonInformation(ClsMatrix) 
 3 |   
 4 | #  Shannon Information for each column in ClsMatrix, measured in Percent
 5 | # INPUT
 6 | # ClsMatrix(1:n,1:d)    a matrix of Class vectors , a class vector containing integer values
 7 | #
 8 | # OUTPUT
 9 | # Info(1:d)  = sum(-p * log(p)/MaxInfo) for all unique cases with probability p in ClsMatrix(:,c)
10 | #             for a column with k klasses  MaxInfo = -(1/k)*log(1/k)
11 | # author: ALU, reimplemented from matlab by mct  
12 | 
13 | #Example
14 |   # data <- as.matrix(iris[,1:4])
15 |   # 
16 |   # # Creating the clusterings for the data set
17 |   # #(here with method complete) for the number of classes 2 to 8
18 |   # hc <- hclust(dist(data), method = "complete")
19 |   #numberOfClusters=100
20 |   # clsm <- matrix(data = 0, nrow = dim(data)[1],  ncol = numberOfClusters-1)
21 |   # for (i in 2:numberOfClusters) {
22 |   #   clsm[,i-1] <- cutree(hc,i)
23 |   # }
24 |   # ClusterShannonInfo(clsm)
25 |  
26 | V = dim(ClsMatrix)
27 | AnzCases=V[1]
28 | AnzVariablen=V[2]
29 | 
30 | Info=vector(mode = "numeric",length = AnzVariablen)
31 | AnzValues=Info
32 | MaxInfo=Info
33 | MinInfo=Info
34 | MedianInfo=Info
35 | MeanInfo=Info  # INIT
36 | 
37 | for(c in 1: AnzVariablen){
38 |   V= ClusterCount(ClsMatrix[,c])
39 |   #$UniqueClusters
40 |   #$CountPerCluster
41 |   NumberOfClusters=V$NumberOfClusters
42 |   ClusterPercentages=V$ClusterPercentages
43 | 
44 | 
45 | ProbOfClass   = ClusterPercentages/100
46 | InfOfClass    = -ProbOfClass*log(ProbOfClass)
47 | MaxLnInfo     = -(1/NumberOfClusters)*log((1/NumberOfClusters)) # maximale Information bei dieser Anz Auspraegungen
48 | 
49 | if(MaxLnInfo  >0){
50 |   InfOfClass =  InfOfClass/MaxLnInfo  # Prozentuale Information
51 | }else{
52 |   InfOfClass    = 0 
53 | }
54 | InfOfVariable = InfOfClass
55 | # if(c==1) Info=list(InfOfVariable)
56 | # else Info[c]       = list(InfOfVariable)
57 | 
58 | if(c==1) Info=InfOfVariable
59 | else Info=DataVisualizations::CombineCols(Info,InfOfVariable)
60 | 
61 | 
62 | AnzValues[c]  = NumberOfClusters 
63 | MaxInfo[c]    = max(InfOfClass) 
64 | MinInfo[c]    = min(InfOfClass) 
65 | MedianInfo[c] = median(InfOfClass) 
66 | MeanInfo[c]   = mean(InfOfClass) 
67 | } # fuer alle Variablen
68 | if(!is.null(colnames(ClsMatrix)))
69 |   colnames(Info)=colnames(ClsMatrix)
70 | else
71 |   colnames(Info)=paste0("ClusterNo ",AnzValues)
72 | 
73 | return(list(Info=Info,ClusterNo=AnzValues,MaxInfo=MaxInfo,MinInfo=MinInfo,MedianInfo=MedianInfo,MeanInfo=MeanInfo))
74 | }


--------------------------------------------------------------------------------
/man/PenalizedRegressionBasedClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{PenalizedRegressionBasedClustering}
 2 | \alias{PenalizedRegressionBasedClustering}
 3 | 
 4 | \title{
 5 | Penalized Regression-Based Clustering of [Wu et al., 2016].
 6 | }
 7 | \description{
 8 | Clustering is performed through penalized regression with grouping pursuit
 9 | }
10 | \usage{
11 | PenalizedRegressionBasedClustering(Data, FirstLambda, 
12 | 
13 | SecondLambda, Tau, PlotIt = FALSE, \dots)
14 | }
15 | 
16 | \arguments{
17 |   \item{Data}{
18 | [1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.
19 | }
20 |   \item{FirstLambda}{
21 |  Set 1 for quadratic penalty based algorithm, 0.4 for revised ADMM.
22 | }
23 |   \item{SecondLambda}{
24 | The magnitude of grouping penalty.
25 | }
26 |   \item{Tau}{
27 | Tuning parameter: tau, related to grouping penalty.
28 | }
29 |   \item{PlotIt}{
30 | Default: FALSE, if TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}
31 | }
32 |   \item{\dots}{
33 | Further arguments for \code{\link[prclust]{PRclust}}, enables also usage of [Pan et al., 2013].
34 | }
35 | }
36 | \details{
37 | Parameters are rather challenging to choose.
38 | }
39 | \value{
40 | List of
41 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.
42 | }
43 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
44 | }
45 | \references{
46 | [Pan et al., 2013]  Pan, W., Shen, X., & Liu, B.: Cluster analysis: unsupervised learning via supervised learning with a non-convex penalty, The Journal of Machine Learning Research, Vol. 14(1), pp. 1865-1889. 2013.
47 | 
48 | [Wu et al., 2016]  Wu, C., Kwon, S., Shen, X., & Pan, W.: A new algorithm and theory for penalized regression-based clustering, The Journal of Machine Learning Research, Vol. 17(1), pp. 6479-6503. 2016.
49 | 
50 | 
51 | }
52 | \author{
53 | Michael Thrun
54 | }
55 | \note{
56 | Data matrix is internally transposed in order to fit the definition of the algorithm.
57 | }
58 | 
59 | \examples{
60 | data(Hepta)
61 | Data=Hepta$Data
62 | out=PenalizedRegressionBasedClustering(Data,0.4,1,2,PlotIt=FALSE)
63 | table(out$Cls,Hepta$Cls)
64 | }
65 | \keyword{ADMM}
66 | \keyword{PenalizedRegressionBasedClustering}
67 | \concept{DC-ADMM}
68 | \concept{Penalized Regression Based Clustering}


--------------------------------------------------------------------------------
/R/ClusterDistances.R:
--------------------------------------------------------------------------------
 1 | ClusterDistances=IntraClusterDistances=ClusterIntraDistances=function(FullDistanceMatrix,Cls,Names,PlotIt=FALSE){
 2 |   #
 3 |   # INPUT
 4 |   # FullDistanceMatrix    symmetric distance matrix
 5 |   # Cls                   numerical vector of k classes
 6 |   #
 7 |   # OPTIONAL
 8 |   # Names                 character vector naming k classes
 9 |   # PlotIt                Boolean
10 |   #
11 |   # OUTPUT
12 |   # matrix [1:m,1:(k+1)] of k clusters, each columns consists of the distances in a cluster,
13 |   # filled up with NaN at the end to be of the same length as the complete distance matrix.
14 |   #
15 |   #
16 |   if(missing(Cls)){
17 |     Cls=rep(1,nrow(FullDistanceMatrix))
18 |   }
19 |   if(!is.vector(Cls)){
20 |     warning('ClusterDistances: Cls is not a vector. Calling as.numeric(as.character(Cls))')
21 |     Cls=as.numeric(as.character(Cls))
22 |   }
23 |   
24 |   if(nrow(FullDistanceMatrix)!=length(Cls)){
25 | 	stop('ClusterDistances: Dimensionality of distance matrix "FullDistanceMatrix" is not consistent with "Cls" classification vector')
26 |   }
27 |   
28 |   if(!isSymmetric(unname(FullDistanceMatrix))){
29 | 	stop('ClusterDistances: Distance matrix "FullDistanceMatrix" is not symmetric. Please check this, e.g. DataVisualizations::Pixelmatrix.')
30 |   }
31 |   u=sort(unique(Cls))
32 | 
33 |   classdist=list(FullDistanceMatrix[upper.tri(FullDistanceMatrix,diag = F)])
34 |   if(length(u)==1) return(unlist(classdist))
35 |   # Does not work for clustersize==1!
36 |   for(i in u){
37 |     classdistcur=FullDistanceMatrix[Cls==i,Cls==i]
38 |     distvec=classdistcur[upper.tri(classdistcur,diag = F)]
39 |     classdist=c(classdist,list(distvec))
40 |   }
41 | 
42 |     Intraclusterdistances=do.call(DataVisualizations::CombineCols,classdist)
43 |     Intraclusterdistances=as.matrix(Intraclusterdistances)
44 |     if(missing(Names)){
45 |       colnames(Intraclusterdistances)=c('Full',paste0('Cluster',u))
46 |     }else{
47 |       if(length(u)!=length(Names)){
48 |         warning('ClusterDistances: Length of Names has to be equal of length of unique Cls.')
49 |         colnames(Intraclusterdistances)=c('Full',paste0('Cluster',Names))
50 |       }else{
51 |         colnames(Intraclusterdistances)=c('Full',Names)
52 |       }
53 |     }
54 |     
55 |     if(PlotIt){
56 |       ggobject=DataVisualizations::MDplot(Intraclusterdistances,OnlyPlotOutput = TRUE)
57 |       print(ggobject)
58 |       return(list(ClusterDists=as.matrix(Intraclusterdistances),ggobject=ggobject))
59 |     }
60 | 
61 |   return(Intraclusterdistances)
62 | }
63 | 


--------------------------------------------------------------------------------
/R/OPTICSclustering.R:
--------------------------------------------------------------------------------
 1 | OPTICSclustering=function(Data, MaxRadius,RadiusThreshold, minPts = 5, PlotIt=FALSE,...){
 2 |   # INPUT
 3 |   # Data[1:n,1:d]     Data set with n observations and d features
 4 |   # MaxRadius         upper limit neighborhood in the R-ball graph/unit disk graph), size of the
 5 |   #                   epsilon neighborhood  (eps) [Ester et al., 1996, p. 227]. If NULL, automatic
 6 |   #                   estimation is done using insights of [Ultsch, 2005].
 7 |   # RadiusThreshold   Threshold to identify clusters (RadiusThreshold <= MaxRadius), if NULL 0.9*MaxRadius is set.
 8 |   #
 9 |   # OPTIONAL
10 |   # minPts            Default = 5
11 |   # PlotIt            Boolean. Decision to plot or not
12 |   #
13 |   # OUTPUT
14 |   # Cls[1:n]          Clustering of data
15 |   # Object            Object of adpclust algorithm
16 |   #
17 |   # Author: MT, 04/2018
18 |   if (!requireNamespace('dbscan',quietly = TRUE)) {
19 |     message(
20 |       'Subordinate clustering package (dbscan) is missing. No computations are performed.
21 |             Please install the package which is defined in "Suggests".'
22 |     )
23 |     return(
24 |       list(
25 |         Cls = rep(1, nrow(Data)),
26 |         Object = "Subordinate clustering package (dbscan) is missing.
27 |                 Please install the package which is defined in 'Suggests'."
28 |       )
29 |     )
30 |   }
31 |   
32 |   if(is.null(MaxRadius)){  
33 |     warning('The MaxRadius (eps) parameter is missing but it is required in OPTICS. Trying to estimate..')
34 | 	  if(requireNamespace("DataVisualizations",quietly = TRUE)){
35 | 	    MaxRadius=0.5*DataVisualizations::ParetoRadius(Data)
36 | 	  }else{
37 | 	    stop('DataVisualizations package not loaded or installed.')
38 | 	  }
39 |   }
40 |   if(is.null(RadiusThreshold)){  
41 |     warning('The RadiusThreshold (eps_cl) parameter is missing but it is required in OPTICS. Trying to estimate..')
42 |     RadiusThreshold=0.9*MaxRadius
43 |   } 
44 |   if(is.null(minPts)){
45 |     minPts=round(0.025*nrow(Data),0)
46 |     warning('The minPts parameter is missing but it is required in DBscan. Trying to estimate..')
47 |   }  
48 |   out=dbscan::optics(Data,eps=MaxRadius,minPts=minPts,...)
49 |   OPTICScobject=dbscan::extractDBSCAN(out, eps_cl = RadiusThreshold)
50 | 
51 |   Cls=OPTICScobject$cluster
52 |   Cls[!is.finite(Cls)]=0
53 |   if(!is.null(rownames(Data))){
54 |     names(Cls)=rownames(Data)
55 |   }
56 |   
57 |   if(PlotIt){
58 |     ClusterPlotMDS(Data,Cls)
59 |   }
60 | 	Cls=ClusterRename(Cls,Data)
61 |   return(list(Cls=Cls,Object=OPTICScobject))
62 | }
63 | 


--------------------------------------------------------------------------------
/man/LargeApplicationClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{LargeApplicationClustering}
 2 | \alias{LargeApplicationClustering}
 3 | \title{Large Application Clustering}
 4 | \description{
 5 | Clustering Large Applications  (clara) of  [Rousseeuw/Kaufman, 1990, pp. 126-163]
 6 | }
 7 | \usage{
 8 | LargeApplicationClustering(Data, ClusterNo,
 9 | 
10 | PlotIt=FALSE,Standardization=TRUE,Samples=50,Random=TRUE,...)
11 | }
12 | 
13 | \arguments{
14 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
15 | 
16 | \item{ClusterNo}{A number k which defines k different clusters to be built by the algorithm.}
17 | \item{PlotIt}{Default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
18 | \item{Standardization}{
19 | \code{Data} is standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation.
20 | }
21 | \item{Samples}{Integer, say N, the number of samples to be drawn from the dataset. Default value set as recommended by documentation of \code{\link[cluster]{clara}}}
22 | \item{Random}{Logical indicating if R's random number generator should be used instead of the primitive clara()-builtin one.}
23 | 
24 | 
25 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
26 | }
27 | 
28 | \details{
29 | It is recommended to use \code{set.seed} if clustering output should be always the same instead of setting Random=FALSE in order to use the primitive clara()-builtin random number generator.
30 | }
31 | \value{
32 | List of
33 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
34 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
35 | }
36 | 
37 |  \examples{
38 | data('Hepta')
39 | out=LargeApplicationClustering(Hepta$Data,ClusterNo=7,PlotIt=FALSE)
40 | }
41 | \author{Michael Thrun}
42 | 
43 | \references{
44 | [Rousseeuw/Kaufman, 1990]	Rousseeuw, P. J., & Kaufman, L.: Finding groups in data, Belgium, John Wiley & Sons Inc., ISBN: 0471735787, doi 10.1002/9780470316801, Online ISBN: 9780470316801, 1990.
45 | }
46 | \keyword{LargeApplicationClustering}
47 | \keyword{Clustering}
48 | \concept{Large Application Clusteringg}
49 | \keyword{clara}


--------------------------------------------------------------------------------
/R/APclustering.R:
--------------------------------------------------------------------------------
 1 | APclustering=function(DataOrDistances,InputPreference=NA,ExemplarPreferences=NA,DistanceMethod="euclidean",Seed=7568,PlotIt=FALSE,Data,...){
 2 |   # Cls=APcluster(Data,Seed=7568)$Cls
 3 |   # Affinity Propagation clustering introduced by Frey and Dueck (2007) <doi:10.1126/science.1136800>.
 4 |   #
 5 |   # INPUT
 6 |   # DataOrDistances[1:n,1:d]    Dataset with n observations and d features or distance matrix with size n
 7 |   #
 8 |   # OPTIONAL
 9 |   # InputPreference see \code{apcluster}
10 |   # ExemplarPreferences \code{apcluster}
11 |   # DistanceMethod
12 |   # Seed
13 |   # PlotIt            Boolean. Decision to plot or not
14 |   # 
15 |   # OUTPUT
16 |   # Cls[1:n]    Clustering of data
17 |   # Object      Object of apcluster::apcluster algorithm
18 |   #
19 |   # Author MT: 04/2018
20 |   
21 |   # Note: # NoNoise see \code{apcluster}
22 |   if (!requireNamespace('apcluster',quietly = TRUE)) {
23 |     message(
24 |       'Subordinate clustering package (apcluster) is missing. No computations are performed.
25 |             Please install the package which is defined in "Suggests".'
26 |     )
27 |     return(
28 |       list(
29 |         Cls = rep(1, nrow(DataOrDistances)),
30 |         Object = "Subordinate clustering package (apcluster) is missing.
31 |                 Please install the package which is defined in 'Suggests'."
32 |       )
33 |     )
34 |   }
35 | 
36 |   if(missing(DataOrDistances)){
37 |     DataOrDistances=Data
38 |   }
39 |   
40 |   if(!is.matrix(DataOrDistances)){
41 |     warning('DataOrDistances is not a matrix. Calling as.matrix()')
42 |     DataOrDistances=as.matrix(DataOrDistances)
43 |   }
44 | 
45 |   if(!mode(DataOrDistances)=='numeric'){
46 |     warning('Data is not a numeric matrix. Calling mode(DataOrDistances)="numeric"')
47 |     mode(DataOrDistances)='numeric'
48 |   }
49 |   AnzData = nrow(DataOrDistances)
50 | 
51 |   if (isSymmetric(unname(DataOrDistances))) {
52 |     s=-(DataOrDistances)^2
53 |     apres <- apcluster::apcluster(s=s,p=InputPreference, details=TRUE,q=ExemplarPreferences,seed=Seed,...)
54 | 
55 |   }
56 |   else{
57 |     s=DataOrDistances
58 |     apres <- apcluster::apcluster(apcluster::negDistMat(method = DistanceMethod,r=2), x=DataOrDistances,p=InputPreference,q=ExemplarPreferences, details=TRUE,seed=Seed,...)
59 |   }
60 |   ClsIndList=apres@clusters
61 |   Cls=rep(NaN,AnzData)
62 |   for(i in 1:length(ClsIndList)){
63 |     Cls[ClsIndList[[i]]]=i  
64 |   }
65 |   if(PlotIt){
66 | 	  ClusterPlotMDS(DataOrDistances,Cls)
67 |   }
68 |   Cls=ClusterRename(Cls,DataOrDistances)
69 |   return(list(Cls=Cls,Object=apres))
70 | }


--------------------------------------------------------------------------------
/R/MinimalEnergyClustering.R:
--------------------------------------------------------------------------------
 1 | MinimalEnergyClustering <-function(DataOrDistances,ClusterNo=0,DistanceMethod="euclidean",ColorTreshold=0,Data,...){
 2 |   # HierarchicalClusterDists(pDist)
 3 |   # HierarchicalClusterDists(pDist,0,"ward.D2",100)
 4 |   # Cls=HierarchicalClusterDists(pDist,6,"ward.D2")
 5 |   #
 6 |   # Either draws dendrogram or returns class assignment
 7 |   #
 8 |   # INPUT
 9 |   # DataOrDistances[1:n,1:d]    Dataset with n observations and d features or distance matrix with size n
10 |   #
11 |   # OPTIONAL
12 |   # ClusterNo         Number of clusters to search for. ClusterNo=0 means use of dendrogram
13 |   # DistanceMethod    Choose distance metric.
14 |   # ColorTreshold			Draws intersection at appropriate dendrogram y-ax (height). Height of line is number.
15 |   #
16 |   # OUTPUT
17 |   # Cls[1:n]          Clustering of data
18 |   # Dendrogram
19 |   # Object            Object of energy::energy.hclust algorithm
20 |   # 
21 |   # Author: MT, 2019
22 | 
23 |   if (!requireNamespace('energy',quietly = TRUE)) {
24 |     message(
25 |       'Subordinate clustering package (energy) is missing. No computations are performed.
26 |             Please install the package which is defined in "Suggests".'
27 |     )
28 |     return(
29 |       list(
30 |         Cls = rep(1, nrow(DataOrDistances)),
31 |         Object = "Subordinate clustering package (energy) is missing.
32 |                 Please install the package which is defined in 'Suggests'."
33 |       )
34 |     )
35 |   }
36 |   
37 |   # Clustering
38 |   if(missing(DataOrDistances)){
39 |     DataOrDistances=Data
40 |   }
41 |   
42 |   if (!isSymmetric(unname(DataOrDistances))) {
43 |     requireNamespace('parallelDist')
44 |     pDist=as.dist(parallelDist::parDist(DataOrDistances,method=DistanceMethod))
45 |   }else if(!inherits(DataOrDistances,'dist')){
46 |     pDist=as.dist(DataOrDistances)
47 |   }else{
48 |     pDist=DataOrDistances
49 |   }
50 | 	hc <- energy::energy.hclust(pDist)
51 | 	m=paste("Minimal Energy Clustering/ "," N=",nrow(as.matrix(pDist)))
52 |   # Classification or Dendrogram
53 | 	if (ClusterNo>0){
54 | 		Cls=cutree(hc,ClusterNo)
55 | 		Cls=ClusterRename(Cls,DataOrDistances)
56 | 		return(list(Cls=Cls,Dendrogram=as.dendrogram(hc),Object=hc))
57 | 	} 
58 | 	else{
59 | 		x=as.dendrogram(hc)
60 | 		plot(x, main=m,xlab="Number of Data Points N", ylab="Distance",sub=" ",leaflab ="none",...)
61 | 		axis(1,col="black",las=1)
62 | 		if (ColorTreshold!=0){
63 | 		  rect.hclust(hc, h=ColorTreshold,border="red")}		  
64 | 		else{
65 | 		}
66 | 		return(list(Cls=NULL,Dendrogram=x,Object=hc))
67 | 	}
68 | }
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/man/ClusterShannonInfo.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterShannonInfo}
 2 | \alias{ClusterShannonInfo}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Shannon Information
 6 | }
 7 | \description{
 8 | Shannon Information [Shannon, 1948] for each column in ClsMatrix.
 9 | }
10 | \usage{
11 | ClusterShannonInfo(ClsMatrix)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{ClsMatrix}{
16 | [1:n,1:C] matrix of C clusterings each columns is defined as: 
17 | 
18 | 1:n numerical vector of numbers defining the classification as the main output of the clustering algorithm for the n cases of data. It has k unique numbers representing the arbitrary labels of the clustering.
19 | }
20 | }
21 | \details{
22 | Info[1:d]  = sum(-p * log(p)/MaxInfo) for all unique cases with probability p in ClsMatrix[,c] for a column with k clusters  MaxInfo = -(1/k)*log(1/k)
23 | }
24 | \value{
25 | \item{Info}{
26 |  [1:max.nc,1:C] matrix of Shannin informaton as defined in details, each column represents one \code{Cls} of \code{ClsMatrix},each row yields the information of one cluster up the \code{ClusterNo} k, if k<max.nc (highest number of clusters) then NaN are filled.
27 | }
28 | \item{ClusterNo}{
29 | Number of Clusters k found for each \code{Cls}  respectively
30 | }
31 | \item{MaxInfo}{
32 | max per column of \code{Info}
33 | }
34 | \item{MinInfo}{
35 | min per column of \code{Info}
36 | }
37 | \item{MedianInfo}{
38 | median per column of \code{Info}
39 | }
40 | \item{MeanInfo}{
41 | mean per column of \code{Info}
42 | }
43 | }
44 | \references{
45 | [Shannon, 1948]  Shannon, C. E.: A Mathematical Theory of Communication, Bell System Technical Journal, Vol. 27(3), pp. 379-423. doi doi:10.1002/j.1538-7305.1948.tb01338.x, 1948.
46 | }
47 | \author{
48 | Michael Thrun
49 | }
50 | \note{
51 | reeimplemented from Alfred's Ultsch Matlab version but not verified yet.
52 | }
53 | \examples{
54 | # Reading the iris dataset from the standard R-Package datasets
55 | data <- as.matrix(iris[,1:4])
56 | max.nc = 7
57 | # Creating the clusterings for the data set
58 | #(here with method complete) for the number of classes 2 to 8
59 | hc <- hclust(dist(data), method = "complete")
60 | clsm <- matrix(data = 0, nrow = dim(data)[1],
61 | 
62 | ncol = max.nc)
63 | for (i in 2:(max.nc+1)) {
64 |   clsm[,i-1] <- cutree(hc,i)
65 | }
66 | 
67 | ClusterShannonInfo(clsm)
68 | }
69 | % Add one or more standard keywords, see file 'KEYWORDS' in the
70 | % R documentation directory (show via RShowDoc("KEYWORDS")):
71 | \keyword{Shannon}
72 | \keyword{information}
73 | 
74 | \concept{Shannon information}
75 | 


--------------------------------------------------------------------------------
/R/SOMclustering.R:
--------------------------------------------------------------------------------
 1 | SOMclustering=function(Data,LC=c(1,2),ClusterNo=NULL,Mode="online",PlotIt=FALSE,rlen=100,alpha = c(0.05, 0.01),...){
 2 |   # INPUT
 3 |   # Data[1:n,1:d]     Data set with n observations and d features
 4 |   #
 5 |   # OPTIONAL
 6 |   # LC                Lines and Columns of a very small SOM, usually every unit is a cluster,
 7 |   #                   will be ignored if ClusterNo is not NULL.
 8 |   # ClusterNo         Number of clusters to search for
 9 |   # Mode              Either "batch" or "online"
10 |   # PlotIt            Boolean. Decision to plot or not
11 |   # rlen              Please see kohonen::supersom
12 |   # alpha             Please see kohonen::supersom
13 |   # 
14 |   # OUTPUT
15 |   # Cls[1:n]          Clustering of data
16 |   # Object            Object of kohonen::supersom
17 |   #
18 |   # Author: MT, 04/2018
19 |   if (!requireNamespace('kohonen',quietly = TRUE)) {
20 |     message(
21 |       'Subordinate clustering package (kohonen) is missing. No computations are performed.
22 |             Please install the package which is defined in "Suggests".'
23 |     )
24 |     return(
25 |       list(
26 |         Cls = rep(1, nrow(Data)),
27 |         Object = "Subordinate clustering package (kohonen) is missing.
28 |                 Please install the package which is defined in 'Suggests'."
29 |       )
30 |     )
31 |   }
32 |   
33 |   if(missing(LC)){
34 |     if(is.null(ClusterNo)){stop('Either LinesColumns (LC) has to be set or the ClusterNo.')}
35 |   }
36 | 	
37 |   if(is.null(ClusterNo)){
38 |     koh=kohonen::supersom(Data,grid = kohonen::somgrid(LC[1],LC[2],...),keep.data=TRUE,mode=Mode,rlen=rlen,alpha=alpha)
39 |     Cls=koh$unit.classif
40 |   }else{
41 |       if(ClusterNo==2){
42 |         LC=c(1,2)
43 |       }
44 |       if(ClusterNo>2&ClusterNo<5){
45 |         LC=c(2,2)
46 |       }
47 |       if(ClusterNo>4&ClusterNo<10){
48 |         LC=c(3,3)
49 |       }
50 |       if(ClusterNo>9&ClusterNo<17){
51 |         LC=c(4,4)
52 |       }
53 |       if(ClusterNo>16&ClusterNo<26){
54 |         LC=c(5,5)
55 |       }
56 |       if(ClusterNo>26&ClusterNo<36){
57 |         LC=c(6,6)
58 |       }
59 |       if(ClusterNo>36){
60 |         LC=c(10,10)
61 |       }
62 |   }
63 |   
64 |   koh=kohonen::supersom(Data,grid = kohonen::somgrid(LC[1],LC[2],...),keep.data=TRUE,mode=Mode,rlen=rlen,alpha=alpha)
65 |   Cls=koh$unit.classif
66 |   if(!is.null(rownames(Data)))
67 |     names(Cls)=rownames(Data)
68 |   else
69 |     names(Cls)=1:nrow(Data)
70 |   
71 |   if(PlotIt){
72 |     ClusterPlotMDS(Data,Cls)
73 |   }
74 |   Cls=ClusterRename(Cls,Data)
75 |   return(list(Cls=Cls,Object=koh))
76 | }


--------------------------------------------------------------------------------
/man/ModelBasedVarSelClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{ModelBasedVarSelClustering}
 2 | \alias{ModelBasedVarSelClustering}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Model Based Clustering with Variable Selection
 6 | }
 7 | \description{
 8 | Model-based clustering with variable selection and estimation of the number of
 9 | clusters which is either based on [Marbac/Sedki, 2017],[Marbac et al., 2020], or on [Scrucca and Raftery, 2014].
10 | }
11 | \usage{
12 | ModelBasedVarSelClustering(Data,ClusterNo,Type,PlotIt=FALSE, ...)
13 | }
14 | \arguments{
15 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases
16 | of d-dimensional data points. Every case has d attributes, variables or
17 | features.}
18 | \item{ClusterNo}{Numeric which defines number of cluster to search for.}
19 | \item{Type}{String, either \code{VarSelLCM} [Marbac/Sedki, 2017],[Marbac et al., 2020], or \code{clustvarsel} [Scrucca and Raftery, 2014].}
20 | 
21 | \item{PlotIt}{(optional) Boolean. Default = FALSE = No plotting performed.}
22 | \item{\dots}{Further arguments passed on to \link[VarSelLCM]{VarSelCluster} or \link[clustvarsel]{clustvarsel}.}
23 | }
24 | \value{
25 | List of
26 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as
27 | the main output of the clustering algorithm. It has k unique numbers
28 | representing the arbitrary labels of the clustering.}
29 | \item{Object}{Object defined by clustering algorithm as the other output of this
30 | algorithm}
31 | }
32 | \references{
33 | [Marbac/Sedki, 2017] Marbac, M. and Sedki, M.: Variable selection for
34 | model-based clustering using the integrated complete-data likelihood. Statistics
35 | and Computing, 27(4), pp. 1049-1063, 2017.
36 | 
37 | [Marbac et al., 2020]  Marbac, M., Sedki, M., & Patin, T.: Variable selection for mixed data clustering: application in human population genomics, Journal of Classification, Vol. 37(1), pp. 124-142. 2020.
38 | 
39 | 
40 | }
41 | \author{
42 | Quirin Stier, Michael Thrun
43 | }
44 | \examples{
45 | # Hepta
46 | data("Hepta")
47 | Data = Hepta$Data
48 | V = ModelBasedVarSelClustering(Data, ClusterNo=7,Type="VarSelLCM")
49 | Cls = V$Cls
50 | ClusterAccuracy(Hepta$Cls, Cls, K = 7)
51 | 
52 | V = ModelBasedVarSelClustering(Data, ClusterNo=7,Type="clustvarsel")
53 | Cls = V$Cls
54 | ClusterAccuracy(Hepta$Cls, Cls, K = 7)
55 | 
56 | \dontrun{
57 | # Hearts
58 | heart=VarSelLCM::heart
59 | ztrue <- heart[,"Class"]
60 | Data <- heart[,-13]
61 | V <- ModelBasedVarSelClustering(Data,2,Type="VarSelLCM")
62 | Cls = V$Cls
63 | ClusterAccuracy(ztrue, Cls, K = 2)
64 | }
65 | }
66 | \concept{Variable Selection}
67 | \concept{Model-based clustering}


--------------------------------------------------------------------------------
/man/SharedNearestNeighborClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{SharedNearestNeighborClustering}
 2 | \alias{SharedNearestNeighborClustering}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | SNN clustering 
 6 | }
 7 | \description{
 8 | Shared Nearest Neighbor Clustering of [Ertoz et al., 2003].
 9 | }
10 | \usage{
11 | SharedNearestNeighborClustering(Data,Knn,
12 | 
13 | Radius,minPts,PlotIt=FALSE,
14 | 
15 | UpperLimitRadius,\dots)
16 | }
17 | %- maybe also 'usage' for other objects documented here.
18 | \arguments{
19 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
20 |   \item{Knn}{
21 | Number of neighbors to consider to calculate the shared nearest neighbors.
22 | }
23 |  \item{Radius}{
24 |  Eps [Ester et al., 1996, p. 227] neighborhood in the R-ball graph/unit disk graph), size of the epsilon neighborhood.
25 |  If NULL, automatic estimation is done using insights of [Ultsch, 2005].
26 | }
27 |   \item{minPts}{
28 |  Number of minimum points in the eps region (for core points). 
29 |  In principle minimum number of points in the unit disk, if the unit disk is within the cluster (core) [Ester et al., 1996, p. 228].
30 |  if NULL, its 2.5 percent of points.
31 | }
32 | \item{PlotIt}{Default: FALSE, if TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
33 | \item{UpperLimitRadius}{Limit for radius search, experimental}
34 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
35 | 
36 | }
37 | \details{
38 | ..
39 | }
40 | \value{
41 | List of
42 | \item{Cls}{[1:n]  numerical vector defining the clustering; this classification is the main output of the algorithm. Points which cannot be assigned to a cluster will be reported as members of the noise cluster with 0.}
43 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
44 | }
45 | \references{
46 | [Ertoz et al., 2003]   Levent Ertoz, Michael Steinbach, Vipin Kumar: Finding Clusters of Different Sizes, Shapes, and Densities in Noisy, High Dimensional Data, SIAM International Conference on Data Mining, 47-59, 2003.
47 | }
48 | \author{
49 | Michael Thrun
50 | }
51 | 
52 | \seealso{
53 | \code{\link[dbscan]{sNNclust}}
54 | }
55 | \examples{
56 | data('Hepta')
57 | out=SharedNearestNeighborClustering(
58 | Hepta$Data, Knn=7,Radius=NULL,minPts=NULL,PlotIt = FALSE)
59 | }
60 | 
61 | \keyword{snn}% use one of  RShowDoc("KEYWORDS")
62 | \concept{SharedNearest Neighbor Clustering}% __ONLY ONE__ keyword per line
63 | 


--------------------------------------------------------------------------------
/R/AgglomerativeNestingClustering.R:
--------------------------------------------------------------------------------
 1 | AgglomerativeNestingClustering <-function(DataOrDistances,ClusterNo,PlotIt=FALSE,Standardization=TRUE,...){
 2 |   # Cls=DivisiveAnalysisClustering(Data,ClusterNo=2)$Cls
 3 |   # DivisiveAnalysisClustering (Diana)
 4 |   # Returns class assignment
 5 |   #
 6 |   # INPUT
 7 |   # DataOrDistances[1:n,1:d]    Dataset with n observations and d features or distance matrix with size n
 8 |   # ClusterNo                   Number of clusters to search for
 9 |   # 
10 |   # OPTIONAL
11 |   # PlotIt                      Boolean. Decision to plot or not.
12 |   # Standardization             Boolean. If TRUE, then data gets standardized before calculating dissimilarities.
13 |   #                                      If distances are given, this argument gets ignored
14 |   # 
15 |   # OUTPUT
16 |   # Cls[1:n]            Clustering of data
17 |   # Object         Object of sota algorithm
18 |   # Dendrogram
19 |   #
20 |   # Author: MT 04/2018
21 |   # if(missing(DataOrDistances)){
22 |   #   DataOrDistances=Data
23 |   # }
24 |   if(Standardization==1) Standardization=TRUE
25 |   if(Standardization==0) Standardization=FALSE
26 | 
27 | 
28 |   if (!requireNamespace('cluster',quietly = TRUE)) {
29 |     message(
30 |       'Subordinate clustering package (cluster) is missing. No computations are performed.
31 |             Please install the package which is defined in "Suggests".'
32 |     )
33 |     return(
34 |       list(
35 |         Cls = rep(1, nrow(DataOrDistances)),
36 |         Object = "Subordinate clustering package (cluster) is missing.
37 |                 Please install the package which is defined in 'Suggests'."
38 |       )
39 |     )
40 |   }
41 |  
42 |   if (isSymmetric(unname(DataOrDistances))) {
43 |     Input = as.dist(DataOrDistances)
44 |     requireNamespace('ProjectionBasedClustering')
45 |     AnzVar = ncol(DataOrDistances)
46 |     AnzData = nrow(DataOrDistances)
47 |     diss =TRUE
48 |   }else{
49 |     Input=DataOrDistances
50 |     diss =FALSE
51 |   }
52 | 	
53 |   res=cluster::agnes(x=Input,diss =diss,stand=Standardization,...)
54 |   if(length(ClusterNo)!=1){
55 |     stop('ClusterNo has to be a numerical number not a vector of length higher than 1 or another object.')
56 |   }
57 |   if(ClusterNo>0){
58 |     Cls=cutree(as.hclust(res), k = ClusterNo)
59 |     
60 |     if(PlotIt){
61 | 		ClusterPlotMDS(DataOrDistances,Cls)
62 |     }
63 | 	Cls=ClusterRename(Cls,DataOrDistances)
64 |   }
65 |   if(ClusterNo<=0){
66 |     Cls=NULL
67 |     plot(res)
68 |     if(ClusterNo<0){
69 |       warning(('ClusterNo cannot be a negativ number'))
70 |     }
71 |   }
72 |   return(list(Cls=Cls, Object=res, Dendrogram=as.dendrogram(as.hclust(res))))
73 | }


--------------------------------------------------------------------------------
/man/PAMClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{PAMclustering}
 2 | \alias{PAMclustering}
 3 | \alias{PAMClustering}
 4 | \title{
 5 | Partitioning Around Medoids (PAM)
 6 | }
 7 | \description{
 8 | Partitioning (clustering) of the data into k clusters around medoids, a more robust version of k-means [Rousseeuw/Kaufman, 1990, p. 68-125] .
 9 | }
10 | \usage{
11 | PAMclustering(DataOrDistances,ClusterNo,
12 | 
13 | PlotIt=FALSE,Standardization=TRUE,Data,...)
14 | }
15 | \arguments{
16 | \item{DataOrDistances}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features. Alternatively, symmetric [1:n,1:n] distance matrix}
17 | \item{ClusterNo}{A number k which defines k different clusters to be built by the algorithm.}
18 | \item{PlotIt}{Default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
19 | \item{Standardization}{
20 | \code{DataOrDistances} is standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation.If \code{DataOrDistances} is already a distance matrix, then this argument will be ignored.
21 | }
22 | \item{Data}{[1:n,1:d] data matrix in the case that \code{DataOrDistances} is missing and partial matching does not work.}
23 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
24 | }
25 | \details{
26 | [Rousseeuw/Kaufman, 1990, chapter 2] or [Reynolds et al., 1992].
27 | }
28 | \value{
29 | List of
30 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
31 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
32 | }
33 | \references{
34 |  [Rousseeuw/Kaufman, 1990]	Rousseeuw, P. J., & Kaufman, L.: Finding groups in data, Belgium, John Wiley & Sons Inc., ISBN: 0471735787, doi:10.1002/9780470316801, Online ISBN: 9780470316801, 1990.
35 | 
36 |  [Reynolds et al., 1992]	Reynolds, A., Richards, G.,de la Iglesia, B. and Rayward-Smith, V.: Clustering rules: A comparison of partitioning and hierarchical clustering algorithms, Journal of Mathematical Modelling and Algorithms 5, 475-504, DOI:10.1007/s10852-005-9022-1, 1992.
37 | }
38 | \author{
39 | Michael Thrun
40 | }
41 | \examples{
42 | data('Hepta')
43 | out=PAMclustering(Hepta$Data,ClusterNo=7,PlotIt=FALSE)
44 | }
45 | \keyword{PAM}
46 | \concept{Partitioning Around Medoids}
47 | \keyword{clustering}


--------------------------------------------------------------------------------
/man/SOMclustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{SOMclustering}
 2 | \alias{SOMclustering}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | self-organizing maps based clustering implemented by [Wherens, Buydens, 2017]. 
 6 | }
 7 | \description{
 8 | Either the variant k-batch or k-online is possible in which every unit can be seen approximately as an cluster.
 9 | }
10 | \usage{
11 | SOMclustering(Data,LC=c(1,2),ClusterNo=NULL,
12 | 
13 | Mode="online",PlotIt=FALSE,rlen=100,alpha = c(0.05, 0.01),...)
14 | }
15 | %- maybe also 'usage' for other objects documented here.
16 | \arguments{
17 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
18 | \item{LC}{Lines and Columns of a very small SOM, usually every unit is a cluster, will be ignored if ClusterNo is not NULL.}
19 | \item{ClusterNo}{Optional, A number k which defines k different clusters to be built by the algorithm. LC will then be set accordingly.}
20 | \item{Mode}{Either "batch" or "online"}
21 | \item{PlotIt}{Default: FALSE, if TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
22 | 
23 | \item{rlen}{Please see \code{\link[kohonen]{supersom}}}
24 | \item{alpha}{Please see \code{\link[kohonen]{supersom}}}
25 | \item{\dots}{Further arguments to be set for the clustering algorithm in 
26 | \code{\link[kohonen:unit.distances]{somgrid}}, if not set, default arguments are used.}
27 | }
28 | \details{
29 | This clustering algorithm is based on very small maps and, hence, not emergent (c.f. [Thrun, 2018, p.37]). A 3x3 map means 9 units leading to 9 clusters.
30 | 
31 | Batch is a deterministic clustering approach whereas online is a stochastic clustering approach and research indicates that online should be preferred (c.f. [Thrun, 2018, p.37]).
32 | }
33 | \value{
34 | List of
35 | \item{Cls}{[1:n]  numerical vector defining the classification as the main output of the clustering algorithm}
36 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
37 | }
38 | \references{
39 | [Wherens, Buydens, 2017] R. Wehrens and L.M.C. Buydens, J. Stat. Softw. 21 (5), 2007; R. Wehrens and J. Kruisselbrink, submitted, 2017.
40 | 
41 | [Thrun, 2018] Thrun, M.C., Projection Based Clustering through Self-Organization and Swarm Intelligence. 2018, Heidelberg: Springer.
42 | }
43 | \author{
44 | Michael Thrun
45 | }
46 | 
47 | \examples{
48 | data('Hepta')
49 | out=SOMclustering(Hepta$Data,ClusterNo=7,PlotIt=FALSE)
50 | }
51 | 
52 | \keyword{SOM}
53 | \keyword{k-batch}
54 | \concept{k-batch clustering}
55 | \concept{som clustering}


--------------------------------------------------------------------------------
/man/DivisiveAnalysisClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{DivisiveAnalysisClustering}
 2 | \alias{DivisiveAnalysisClustering}
 3 | \title{Large DivisiveAnalysisClustering Clustering}
 4 | \description{
 5 | Divisive Analysis Clustering (diana) of  [Rousseeuw/Kaufman, 1990, pp. 253-279]
 6 | }
 7 | \usage{
 8 | DivisiveAnalysisClustering(DataOrDistances, ClusterNo,
 9 | 
10 | PlotIt=FALSE,Standardization=TRUE,PlotTree=FALSE,Data,...)
11 | }
12 | 
13 | \arguments{
14 | \item{DataOrDistances}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features. Alternatively, symmetric [1:n,1:n] distance matrix}
15 | 
16 | \item{ClusterNo}{A number k which defines k different clusters to be build by the algorithm.
17 | if \code{ClusterNo=0} and \code{PlotTree=TRUE}, the dendrogram is generated instead of a clustering to estimate the numbers of clusters.
18 | }
19 | \item{PlotIt}{Default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
20 | \item{Standardization}{
21 | \code{DataOrDistances} Is standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation.If \code{DataOrDistances} Is already a distance matrix, then this argument will be ignored.
22 | }
23 | \item{PlotTree}{
24 | TRUE: Plots the dendrogram, FALSE: no plot
25 | }
26 | \item{Data}{[1:n,1:d] data matrix in the case that \code{DataOrDistances} is missing and partial matching does not work.}
27 | 
28 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
29 | }
30 | \value{
31 | List of
32 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
33 | \item{Dendrogram}{Dendrogram of hierarchical clustering algorithm}
34 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
35 | }
36 | 
37 | \examples{
38 | data('Hepta')
39 | CA=DivisiveAnalysisClustering(Hepta$Data,ClusterNo=7,PlotIt=FALSE)
40 | \donttest{
41 | print(CA$Object)
42 | plot(CA$Object)
43 | ClusterDendrogram(CA$Dendrogram,7,main='DIANA')
44 | }
45 | }
46 | \author{Michael Thrun}
47 | 
48 | \references{
49 | [Rousseeuw/Kaufman, 1990]	Rousseeuw, P. J., & Kaufman, L.: Finding groups in data, Belgium, John Wiley & Sons Inc., ISBN: 0471735787, doi: 10.1002/9780470316801, Online ISBN: 9780470316801, 1990.
50 | }
51 | \concept{Divisive Analysis Clustering}
52 | \keyword{diana}
53 | \keyword{DivisiveAnalysisClustering}


--------------------------------------------------------------------------------
/man/QTclustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{QTclustering}
 2 | \alias{QTclustering}
 3 | \alias{QTClustering}
 4 | %- Also NEED an '\alias' for EACH other topic documented here.
 5 | \title{
 6 | Stochastic QT Clustering
 7 | }
 8 | \description{
 9 | Stochastic quality clustering of [Heyer et al., 1999] with an improved implementation by [Scharl/Leisch, 2006].
10 | }
11 | \usage{
12 | QTclustering(Data,Radius,PlotIt=FALSE,...)
13 | }
14 | %- maybe also 'usage' for other objects documented here.
15 | \arguments{
16 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
17 | \item{Radius}{Maximum radius of clusters. If NULL, automatic estimation can be done with [Thrun et al., 2016] if not otherwise set.}
18 | \item{PlotIt}{Default: FALSE, if TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
19 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
20 | }
21 | 
22 | \value{
23 | List of
24 | \item{Cls}{[1:n] numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.
25 | Points which cannot be assigned to a cluster will be reported with 0.
26 | }
27 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
28 | }
29 | \references{
30 | [Heyer et al., 1999]  Heyer, L. J., Kruglyak, S., & Yooseph, S.: Exploring expression data: identification and analysis of coexpressed genes, Genome research, Vol. 9(11), pp. 1106-1115. 1999.
31 | 
32 | [Scharl/Leisch, 2006]  Scharl, T., & Leisch, F.: The stochastic QT-clust algorithm: evaluation of stability and variance on time-course microarray data, in Rizzi , A. & Vichi, M. (eds.), Proc. Proceedings in Computational Statistics (Compstat), pp. 1015-1022, Physica Verlag, Heidelberg, Germany, 2006.
33 | 
34 | [Thrun et al., 2016]  Thrun, M. C., Lerch, F., Loetsch, J., & Ultsch, A. : Visualization and 3D Printing of Multivariate Data of Biomarkers, in Skala, V. (Ed.), International Conference in Central Europe on Computer Graphics, Visualization  and Computer Vision (WSCG), Vol. 24, Plzen, 2016. 
35 | 
36 | [Ultsch, 2005]  Ultsch, A.: Pareto density estimation: A density estimation for knowledge discovery, In Baier, D. & Werrnecke, K. D. (Eds.), Innovations in classification, data science, and information systems, (Vol. 27, pp. 91-100), Berlin, Germany, Springer, 2005.
37 | }
38 | \author{
39 | Michael Thrun
40 | }
41 | 
42 | \examples{
43 | data('Hepta')
44 | out=QTclustering(Hepta$Data,Radius=NULL,PlotIt=FALSE)
45 | }
46 | 
47 | \keyword{QTClustering}
48 | 
49 | 


--------------------------------------------------------------------------------
/man/Spectrum.Rd:
--------------------------------------------------------------------------------
 1 | \name{Spectrum}
 2 | \alias{Spectrum}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Fast Adaptive Spectral Clustering [John et al, 2020]
 6 | }
 7 | \description{
 8 | Spectrum is a self-tuning spectral clustering method for single or multi-view data. In this wrapper restricted to the standard use in other clustering algorithms.
 9 | }
10 | \usage{
11 | Spectrum(Data, Type = 2, ClusterNo = NULL, 
12 | 
13 | PlotIt = FALSE, Silent = TRUE,PlotResults = FALSE, \dots)
14 | }
15 | %- maybe also 'usage' for other objects documented here.
16 | \arguments{
17 |   \item{Data}{
18 | 1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.
19 | }
20 |   \item{Type}{
21 | Type=1: default eigengap method (Gaussian clusters)
22 | 
23 | Type=2: multimodality gap method (Gaussian/ non-Gaussian clusters)
24 | 
25 | Type=3: Allows to setClusterNo
26 | }
27 |   \item{ClusterNo}{Optional, A number k which defines k different clusters to be built by the algorithm.
28 | For default \code{ClusterNo=NULL} please see details.
29 | }
30 | 
31 | \item{PlotIt}{Default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
32 | 
33 |   \item{Silent}{
34 | Silent progress of algorithm=TRUE
35 | }
36 |   \item{PlotResults}{
37 | Plots result of spectrum with plot function
38 | }
39 |   \item{\dots}{
40 | Method: numerical value: 1 = default eigengap method (Gaussian clusters), 2 = multimodality
41 | gap method (Gaussian/ non-Gaussian clusters), 3 = no automatic
42 | method (see fixk param)
43 | 
44 | Other parameters defined in Spectrum packages
45 | }
46 | }
47 | \details{
48 | Spectrum is a partitioning algorithm and either uses the eigengap or multimodality gap heuristics to determine the number of clusters, please see Spectrum package for details
49 | }
50 | \value{
51 | List of
52 | \item{Cls}{[1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering.}
53 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
54 | }
55 | \references{
56 | [John et al, 2020] John, C. R., Watson, D., Barnes, M. R., Pitzalis, C., & Lewis, M. J.: Spectrum: Fast density-aware spectral clustering for single and multi-omic data. Bioinformatics, Vol. 36(4), pp. 1159-1166, 2020.
57 | }
58 | \author{
59 | Michael Thrun
60 | }
61 | 
62 | 
63 | \seealso{
64 | \code{\link[Spectrum]{Spectrum}}
65 | }
66 |  \examples{
67 | data('Hepta')
68 | out=Spectrum(Hepta$Data,PlotIt=FALSE)
69 | \donttest{
70 | out=Spectrum(Hepta$Data,PlotIt=TRUE)
71 | }
72 | }
73 | 
74 | \keyword{Spectrum}
75 | \keyword{FCPS}
76 | \concept{Spectral Clustering}


--------------------------------------------------------------------------------
/man/HierarchicalClustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{HierarchicalClustering}
 2 | \alias{HierarchicalClustering}
 3 | 
 4 | \title{
 5 | Hierarchical Clustering
 6 | }
 7 | \description{
 8 | Wrapper for various agglomerative hierarchical clustering algorithms.
 9 | }
10 | \usage{
11 | HierarchicalClustering(DataOrDistances,ClusterNo,Type='SingleL',Fast=TRUE,Data,\dots)
12 | }
13 | 
14 | \arguments{
15 | \item{DataOrDistances}{
16 | Either nonsymmetric [1:n,1:d] numerical matrix of a dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.
17 | 
18 | or
19 | 
20 | symmetric [1:n,1:n] distance matrix, e.g. \code{as.matrix(dist(Data,method))}
21 | 
22 | }
23 | 
24 | \item{ClusterNo}{A number k which defines k different clusters to be built by the algorithm.}
25 | \item{Type}{Method of cluster analysis: "Ward", "SingleL", "CompleteL", "AverageL" (UPGMA), "WPGMA" (mcquitty), "MedianL" (WPGMC), "CentroidL" (UPGMC), "Minimax", "MinEnergy", "Gini","HDBSCAN", or "Sparse"}
26 | \item{Fast}{If TRUE and fastcluster installed, then a faster implementation of the methods above can be used except for "Minimax", "MinEnergy", "Gini" or "HDBSCAN"}
27 | 
28 | \item{Data}{[1:n,1:d] data matrix in the case that \code{DataOrDistances} is missing and partial matching does not work.}
29 | 
30 |   \item{\dots}{
31 | Further arguments passed on to either \code{\link{HierarchicalClusterData}}, \code{\link{HierarchicalClusterDists}}, \code{\link{MinimalEnergyClustering}} or \code{\link{GenieClustering}} (for "Gini"), \code{\link{HierarchicalDBSCAN}} (for HDBSCAN) or \code{\link{SparseClustering}} (for Sparse).
32 | }
33 | }
34 | \details{
35 | Please see \code{\link{HierarchicalClusterData}} and \code{\link{HierarchicalClusterDists}} or the other functions listed above.
36 | 
37 | It should be noted that in case of "HDBSCAN" the number of clusters is manually selected by \code{cutree} to have the same convention as the other algorithms. Usually, "HDBSCAN" selects the number of clusters automatically.
38 | }
39 | \value{
40 | List of
41 | \item{Cls}{If, ClusterNo>0: [1:n]  numerical vector with n numbers defining the classification as the main output of the clustering algorithm. It has k unique numbers representing the arbitrary labels of the clustering. Otherwise for ClusterNo=0: NULL}
42 | \item{Dendrogram}{Dendrogram of hierarchical clustering algorithm}
43 | \item{Object}{Ultrametric tree of hierarchical clustering algorithm}
44 | }
45 | 
46 | \author{
47 | Michael Thrun
48 | }
49 | 
50 | \seealso{
51 | \code{\link{HierarchicalClusterData}}
52 | 
53 | \code{\link{HierarchicalClusterDists}},
54 | 
55 | \code{\link{MinimalEnergyClustering}}.
56 | }
57 |  \examples{
58 | data('Hepta')
59 | out=HierarchicalClustering(Hepta$Data,ClusterNo=7)
60 | }
61 | 
62 | \keyword{HierarchicalClustering}
63 | \keyword{Hierarchical}
64 | \keyword{Clustering}
65 | \concept{Hierarchical Clustering}


--------------------------------------------------------------------------------
/man/FCPS-package.Rd:
--------------------------------------------------------------------------------
 1 | \name{FCPS-package}
 2 | \alias{FCPS-package}
 3 | \alias{ClusteringAlgorithms}
 4 | 
 5 | \docType{package}
 6 | \title{
 7 | \packageTitle{FCPS}
 8 | }
 9 | \description{
10 | 
11 | \packageDescription{FCPS}
12 | 
13 | The package consists of many algorithms and fundamental datasets for clustering published in [Thrun/Stier, 2021]. Originally, the 'Fundamental Clustering Problems Suite' (FCPS) offered a variety of clustering problems
14 | any algorithm shall be able to handle when facing real world data.  Nine of the here presented artificial datasets were priorly named FCPS with a fixed sample size in Ultsch, A.: "Clustering with SOM: U*C", In Workshop on Self-Organizing Maps, 2005. FCPS often served in the paper as an elementary benchmark for clustering algorithms. The FCPS package extends datasets, enables variable sample sizes for these datasets, and provides a standardized and easy access to many clustering algorithms.
15 | 
16 | \url{https://www.deepbionics.org/}
17 | }
18 | 
19 | \details{
20 | FCPS datasets consists of data sets with known a priori classification to be reproduced by the algorithms.
21 |  All data sets are intentionally created to be simple and might be visualized in two or three dimensions.
22 |  Each data sets represents a certain problem that is solved by known clustering algorithms with varying success. 
23 |  This is done in order to reveal benefits and shortcomings of algorithms in question. Standard clustering methods,
24 |  e.g. single-linkage, ward and k-means, are not able to solve all FCPS problems satisfactorily. "Lsun3D and each of the nine artificial data sets of "Fundamental Clustering Problems Suite" (FCPS) 
25 | were defined separately for a specific clustering problem as cited (in [Thrun/Ultsch, 2020]). The original 
26 | sample size defined in the respective first publication mentioning the data was used in [Thrun/Ultsch, 2020],
27 |  but using the R function "ClusterChallenge" (...) any sample size can be drawn for all artificial data sets.
28 | [Thrun/Ultsch, 2020]
29 | 
30 | \packageIndices{DatabionicSwarm}
31 | }
32 | 
33 | 
34 | \author{
35 | \packageAuthor{FCPS}
36 | 
37 | Maintainer: \packageMaintainer{FCPS}
38 | }
39 | \references{
40 | [Thrun/Ultsch, 2020]  Thrun, M. C., & Ultsch, A.: Clustering Benchmark Datasets Exploiting the Fundamental Clustering Problems, Data in Brief, Vol. 30(C), pp. 105501, \doi{10.1016/j.dib.2020.105501}, 2020.
41 | 
42 | [Thrun/Stier, 2021]  Thrun, M. C., & Stier, Q.: Fundamental Clustering Algorithms Suite SoftwareX, Vol. 13(C), in press, pp. 100642. \doi{10.1016/j.softx.2020.100642}, 2021.
43 | 
44 | [Ultsch, 2005] Ultsch, A.: Clustering with SOM: U*C, In Proc. Workshop on Self-Organizing Maps, pp. 75-82, Paris, France, 2005.
45 | }
46 | 
47 | \keyword{FCPS}
48 | \concept{data set}
49 | \keyword{benchmarking}
50 | \keyword{clustering}
51 | \keyword{cluster}
52 | \concept{Fundamental Clustering Problems Suite}
53 | 


--------------------------------------------------------------------------------
/man/OPTICSclustering.Rd:
--------------------------------------------------------------------------------
 1 | \name{OPTICSclustering}
 2 | \alias{OPTICSclustering}
 3 | \title{
 4 | OPTICS Clustering
 5 | }
 6 | \description{
 7 | OPTICS (Ordering points to identify the clustering structure) clustering algorithm [Ankerst et al.,1999].
 8 | }
 9 | \usage{
10 | OPTICSclustering(Data, MaxRadius,RadiusThreshold, minPts = 5, PlotIt=FALSE,\dots)
11 | }
12 | \arguments{
13 | \item{Data}{[1:n,1:d] matrix of dataset to be clustered. It consists of n cases of d-dimensional data points. Every case has d attributes, variables or features.}
14 | 
15 |   \item{MaxRadius}{
16 | Upper limit neighborhood in the R-ball graph/unit disk graph), size of the epsilon neighborhood  (eps) [Ester et al., 1996, p. 227].
17 |  If NULL, automatic estimation is done using insights of [Ultsch, 2005].
18 | }
19 |   \item{RadiusThreshold}{
20 | Threshold to identify clusters (RadiusThreshold <= MaxRadius), if NULL \code{0.9*MaxRadius} is set.
21 | }
22 |   \item{minPts}{
23 |  Number of minimum points in the eps region (for core points). 
24 |  In principle minimum number of points in the unit disk, if the unit disk is within the cluster (core) [Ester et al., 1996, p. 228].
25 |  If NULL, its 2.5 percent of points.
26 | }
27 | \item{PlotIt}{Default: FALSE, If TRUE plots the first three dimensions of the dataset with colored three-dimensional data points defined by the clustering stored in \code{Cls}}
28 | \item{\dots}{Further arguments to be set for the clustering algorithm, if not set, default arguments are used.}
29 | }
30 | \details{
31 | ...
32 | }
33 | \value{
34 | List of
35 | \item{Cls}{[1:n]  numerical vector defining the clustering; this classification is the main output of the algorithm. Points which cannot be assigned to a cluster will be reported as members of the noise cluster with 0.}
36 | \item{Object}{Object defined by clustering algorithm as the other output of this algorithm}
37 | }
38 | \references{
39 | [Ankerst et al.,1999]   Mihael Ankerst, Markus M. Breunig, Hans-Peter Kriegel, Joerg Sander: OPTICS: Ordering Points To Identify the Clustering Structure, ACM SIGMOD international conference on Management of data, ACM Press, pp. 49-60, 1999.
40 | 
41 | [Ester et al., 1996] Ester, M., Kriegel, H.-P., Sander, J., & Xu, X.: A density-based algorithm for discovering clusters in large spatial databases with noise, Proc. Kdd, Vol. 96, pp. 226-231, 1996.
42 | 
43 | [Ultsch, 2005] Ultsch, A.: Pareto density estimation: A density estimation for knowledge discovery, In Baier, D. & Werrnecke, K. D. (Eds.), Innovations in classification, data science, and information systems, (Vol. 27, pp. 91-100), Berlin, Germany, Springer, 2005.
44 | }
45 | \author{
46 | Michael Thrun
47 | }
48 | \seealso{
49 |  \code{\link[dbscan]{optics}}
50 | }
51 | \examples{
52 | data('Hepta')
53 | out=OPTICSclustering(Hepta$Data,MaxRadius=NULL,RadiusThreshold=NULL,minPts=NULL,PlotIt = FALSE)
54 | }
55 | \keyword{optics}% use one of  RShowDoc("KEYWORDS")
56 | \keyword{Clustering}


--------------------------------------------------------------------------------
/R/HierarchicalClustering.R:
--------------------------------------------------------------------------------
 1 | HierarchicalClustering=function(DataOrDistances,ClusterNo,Type='SingleL',Fast=TRUE,Data,...){
 2 |   # INPUT
 3 |   # Data[1:n,1:d]     Data set with n observations and d features or distance matrix of size n
 4 |   # ClusterNo         Number of clusters to search for
 5 |   #
 6 |   # OPTIONAL
 7 |   # Type            Type of cluster analysis: "Ward", "SingleL", "CompleteL", "AverageL" (UPGMA),
 8 |   #                   "WPGMA" (mcquitty), "MedianL" (WPGMC), "CentroidL" (UPGMC), "Minimax", "MinEnergy",
 9 |   #                   "Gini" or "HDBSCAN".
10 |   # Fast              Boolean. If TRUE and fastcluster installed, then a faster implementation of the Types
11 |   #                   above can be used except for "Minimax", "MinEnergy", "Gini" or "HDBSCAN"
12 |   #
13 |   # OUTPUT
14 |   # Cls[1:n]          Clustering of data
15 |   # Object            Object of adpclust
16 |   #
17 |   # Author: MT, 04/2018
18 |   if(missing(DataOrDistances)){
19 |     DataOrDistances=Data
20 |   }
21 | 	if(missing(ClusterNo)) ClusterNo=0
22 |   # Unification for paper
23 |   if(Type=='SingleL') Type="single"
24 |   if(Type=="Ward") Type="ward.D2"
25 |   if(Type=='CompleteL') Type="complete"
26 |   if(Type=='AverageL') Type="average"
27 |   if(Type=='WPGMA') Type="mcquitty"
28 |   if(Type=='MedianL') Type="median"
29 |   if(Type=='CentroidL') Type="centroid"
30 |   
31 |   # Backwards compatibility to matlab, otherwise could be programmed better :-(
32 |   if(Type=='MinEnergy'){
33 |     return(MinimalEnergyClustering(DataOrDistances = DataOrDistances,ClusterNo = ClusterNo,...))
34 |   }else if(Type=="Gini"){
35 |     return(GenieClustering(DataOrDistances = DataOrDistances,ClusterNo = ClusterNo,...))
36 |   }else if(Type=="Minimax"){
37 |     return(MinimaxLinkageClustering(DataOrDistances = DataOrDistances,ClusterNo = ClusterNo,...))
38 |   }else if(Type=="Sparse"){
39 |     return(SparseClustering(DataOrDistances = DataOrDistances,ClusterNo = ClusterNo,Strategy = "Hierarchical",...))
40 |   }else if(Type=="HDBSCAN"){
41 |     V=HierarchicalDBSCAN(DataOrDistances = DataOrDistances,...)
42 |     if(ClusterNo>1){
43 |       Cls = cutree(V$Tree, ClusterNo)
44 |     }else{
45 |       #ClusterDendrogram(V$Tree,1,Colorsequence = 'black',main = 'HDBSCAN Clustering')
46 |       Cls=V$Cls#automatic number of clusters selection by Hierarchical_DBSCAN
47 |     }
48 |     return(list(Cls=Cls,Dendrogram=V$Dendrogram,Object=V$Tree,OriginalObject=V$Object))
49 |   }else if (isSymmetric(unname(DataOrDistances))) {
50 |     if(!inherits(DataOrDistances,'dist')){
51 |       Input=as.dist(DataOrDistances)
52 |     }else{
53 |       Input=DataOrDistances
54 |     }
55 |     return(HierarchicalClusterDists(pDist = Input,ClusterNo = ClusterNo,Type = Type,Fast=Fast,...))
56 |   }else{# Data given
57 |     return(HierarchicalClusterData(Data = DataOrDistances,ClusterNo = ClusterNo,Type = Type,Fast=Fast,...))
58 |   }#endisSymmetric(DataOrDistances)
59 |   
60 | }


--------------------------------------------------------------------------------
/man/ClusterMCC.Rd:
--------------------------------------------------------------------------------
 1 | \name{ClusterMCC}
 2 | \alias{ClusterMCC}
 3 | 
 4 | \title{
 5 | Matthews Correlation Coefficient (MCC)
 6 | }
 7 | \description{
 8 | Matthews correlation coefficient eneralized to the multiclass case (a.k.a.  R_K statistic).
 9 | }
10 | \usage{
11 | ClusterMCC(PriorCls, CurrentCls,Force=TRUE)
12 | }
13 | 
14 | \arguments{
15 | \item{PriorCls}{
16 | Ground truth,[1:n] numerical vector with n numbers defining the classification. It has k unique numbers representing the labels of the clustering.
17 | }
18 | \item{CurrentCls}{
19 | Main output of the clustering, [1:n]  numerical vector with n numbers defining the classification. It has k unique numbers representing the labels of the clustering.
20 | }
21 | 
22 | \item{Force}{
23 | Boolean, if is TRUE: forces code even if one or more than one of the k numbers given in \code{PriorCls} is missing in \code{CurrentCls} or vice versa. In this case, one label per missing number is added ad the end of the vectors.
24 | 
25 | }
26 | }
27 | \details{
28 | Contrary to accuracy, the MCC is balanced measure which can be used even if the classes are of very different sizes. When there are more than two labels the MCC will no longer range between -1 and +1. Instead the minimum value will be between -1 and 0 depending on the true distribution. The maximum value is always +1.
29 | Beware that in contrast to \code{\link{ClusterAccuracy}}, the labels cannot be arbitrary. Instead each label of \code{PriorCls} and \code{CurrentCls} has to be mapped to the same cluster of data points. Typically this has to be ensured manually.
30 | }
31 | \value{
32 | Single scalar of MCC in a range described in details.
33 | }
34 | \references{
35 | Matthews, B. W.: Comparison of the predicted and observed secondary structure of T4 phage lysozyme, Biochimica et Biophysica Acta (BBA), Protein Structure, Vol. 405(2), pp. 442-451, 1975.
36 | 
37 | Boughorbel, S.B: Optimal classifier for imbalanced data using Matthews Correlation Coefficient metric, PLOS ONE, Vol. 12(6), pp. e0177678, 2017.
38 | 
39 | Chicco, D.; Toetsch, N. and Jurman, G.: The Matthews correlation coefficient (MCC) is more reliable than balanced accuracy, bookmaker informedness, and markedness in two_class confusion matrix evaluation. BioData Mining. Vol. 14., 2021.
40 | }
41 | \author{
42 | Michael Thrun
43 | }
44 | \note{
45 | If No. of Clusters is not equivalent, internally the number is allgined with zero datapoints belonging to the missing clusters.
46 | }
47 | 
48 | \seealso{
49 | \code{\link{ClusterAccuracy}}
50 | }
51 | \examples{
52 | #Beware that algorithm arbitrary defines the labels
53 | data(Hepta)
54 | V=kmeansClustering(Hepta$Data,Type = "Hartigan",7)
55 | table(V$Cls,Hepta$Cls)
56 | #result is only valid if the above issue is resolved manually
57 | ClusterMCC(Hepta$Cls,V$Cls)
58 | }
59 | 
60 | \keyword{MCC}
61 | \keyword{Matthews}
62 | 
63 | \concept{Matthews Correlation Coefficient}
64 | \concept{Matthews Correlation}
65 | \concept{Rk statistic}
66 | 


--------------------------------------------------------------------------------