├── .Rbuildignore ├── .gitignore ├── CHANGELOG ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS ├── R ├── clusterfly.r ├── data.r ├── ellipsoid.r ├── extract.r ├── ggplot.r ├── hclust.r ├── hulls.r ├── model-based.r ├── som-iteration.r ├── som.r ├── sysdata.rda └── utils.r ├── clusterfly.Rproj └── man ├── addhull.Rd ├── as.data.frame.clusterfly.Rd ├── cfly_animate.Rd ├── cfly_clarify.Rd ├── cfly_cluster.Rd ├── cfly_dist.Rd ├── cfly_fluct.Rd ├── cfly_pcp.Rd ├── cfly_show.Rd ├── clarify.Rd ├── clusterfly.Rd ├── clusters.Rd ├── cut.hierfly.Rd ├── ellipse.Rd ├── ggobi.hierfly.Rd ├── ggobi.som.Rd ├── hierarchical.Rd ├── hierfly.Rd ├── mefly.Rd └── olive_example.Rd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^\.travis\.yml$ 2 | ^.*\.Rproj$ 3 | ^\.Rproj\.user$ 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | Version 0.3 2 | --------------------------------------- 3 | 4 | * Update documentation 5 | * Remove functions that are irrelevant to clusterfly 6 | * Add a NAMESPACE 7 | * 8 | 9 | Version 0.2.3 10 | ---------------------------------------- 11 | 12 | * Many tweaks to the display of SOMs 13 | * Modified SOM visualisation to also work with SOMs from the Kohonen package 14 | * added extra argument to clusterfly, to allow visualising variables that you don't use to cluster 15 | * added method to extract clustering from partition objects (including clara and daisy) object -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: clusterfly 2 | Type: Package 3 | Title: Explore clustering interactively using R and GGobi 4 | Version: 0.4.0.99 5 | Author: Hadley Wickham 6 | Maintainer: Hadley Wickham 7 | Description: Visualise clustering algorithms with GGobi. Contains both 8 | general code for visualising clustering results and specific 9 | visualisations for model-based, hierarchical and SOM clustering. 10 | URL: http://had.co.nz/clusterfly 11 | Depends: 12 | rggobi 13 | Imports: 14 | e1071, 15 | reshape2, 16 | plyr, 17 | RGtk2 18 | Suggests: 19 | som, 20 | mclust, 21 | kohonen, 22 | ggplot2 23 | License: MIT + file LICENSE 24 | LazyData: true 25 | Roxygen: list(wrap = FALSE) 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2008-2014 2 | COPYRIGHT HOLDER: Hadley Wickham 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2 (4.0.0): do not edit by hand 2 | 3 | S3method("[[<-",clusterfly) 4 | S3method(as.data.frame,clusterfly) 5 | S3method(close,clusterfly) 6 | S3method(clusters,default) 7 | S3method(clusters,kmeans) 8 | S3method(clusters,partition) 9 | S3method(cut,hierfly) 10 | S3method(ggobi,clusterfly) 11 | S3method(ggobi,hierfly) 12 | S3method(ggobi,kohonen) 13 | S3method(ggobi,som) 14 | S3method(ggobi,somiter) 15 | S3method(print,clusterfly) 16 | S3method(print,hierfly) 17 | S3method(summary,somiter) 18 | export(addhull) 19 | export(cfly_animate) 20 | export(cfly_clarify) 21 | export(cfly_cluster) 22 | export(cfly_dist) 23 | export(cfly_fluct) 24 | export(cfly_pcp) 25 | export(cfly_show) 26 | export(clusterfly) 27 | export(clusters) 28 | export(ellipse) 29 | export(hierfly) 30 | export(mefly) 31 | export(olive_example) 32 | import(rggobi) 33 | importFrom(RGtk2,"==.RGtkObject") 34 | importFrom(RGtk2,gSignalConnect) 35 | importFrom(e1071,matchClasses) 36 | importFrom(plyr,count) 37 | importFrom(plyr,rbind.fill) 38 | importFrom(reshape2,melt) 39 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | # clusterfly 0.4.0.99 2 | 3 | # clusterfly 0.4 4 | 5 | * R CMD check fixes. 6 | -------------------------------------------------------------------------------- /R/clusterfly.r: -------------------------------------------------------------------------------- 1 | #' Creates a convenient data structure for dealing with a dataset and a number 2 | #' of alternative clusterings. 3 | #' 4 | #' Once you have created a clusterfly object, you can add 5 | #' clusterings to it with \code{\link{cfly_cluster}}, and 6 | #' visualise then in GGobi with \code{\link{cfly_show}} and 7 | #' \code{\link{cfly_animate}}. Static graphics are also 8 | #' available: \code{\link{cfly_pcp}} will produce a parallel 9 | #' coordinates plot, \code{\link{cfly_dist}} will show 10 | #' the distribution of each variable in each cluster, and 11 | #' \code{\link{cfly_fluct}} compares two clusterings with a 12 | #' fluctuation diagram. 13 | #' 14 | #' If you want to standardise the cluster labelling to one 15 | #' group, look at \code{\link{clarify}} and \code{\link{cfly_clarify}} 16 | #' 17 | #' @param df data frame to be clustered 18 | #' @param extra extra variables to be included in output, but not clustered 19 | #' @param rescale rescale, if true each variable will be scaled to have mean 0 20 | #' and variance 1. 21 | #' @seealso vignette("introduction") 22 | #' @export 23 | #' @aliases clusterfly package-clusterfly 24 | #' @import rggobi 25 | #' @keywords dynamic 26 | #' @examples 27 | #' ol <- olive_example() 28 | #' 29 | #' if (interactive()) { 30 | #' ggobi(ol) 31 | #' cfly_show(ol, "k4-1") 32 | #' cfly_animate(ol, max = 5) 33 | #' close(ol) 34 | #' } 35 | clusterfly <- function(df, extra = NULL, rescale=TRUE) { 36 | if (rescale) df <- rescaler(df) 37 | 38 | g <- NULL 39 | open_ggobi <- function() { 40 | if (is.null(g)) { 41 | clusters <- do.call("cbind", compact(list(df, extra))) 42 | g <<- ggobi(clusters) 43 | } 44 | invisible(g) 45 | } 46 | close_ggobi <- function() { 47 | if (is.null(g)) return() 48 | close(g) 49 | g <<- NULL 50 | } 51 | 52 | structure(list( 53 | df = df, 54 | extra = extra, 55 | clusters = list(), 56 | ggobi = open_ggobi, 57 | close = close_ggobi 58 | ), class="clusterfly") 59 | } 60 | 61 | 62 | #' Show in ggobi. 63 | #' Opens an instance ggobi for this dataset (if not already open), and colours 64 | #' the points according the cluster assignment. 65 | #' 66 | #' @param cf clusterfly object 67 | #' @param idx clustering to display 68 | #' @param hulls add convex hull? see \code{\link{addhull}} for details 69 | #' @keywords dynamic 70 | #' @export 71 | #' @examples 72 | #' o <- olive_example() 73 | #' cfly_show(o, 1) 74 | #' cfly_show(o, "Region") 75 | #' if (!interactive()) close(o) 76 | cfly_show <- function(cf, idx = "true", hulls = FALSE) { 77 | g <- cf$ggobi()[1] 78 | cl <- cf$clusters[[idx]] 79 | glyph_colour(g) <- cl 80 | if (hulls) { 81 | addhull(g[1], g, cl) 82 | glyph_colour(g['hulls']) <- g['hulls']$id 83 | } 84 | } 85 | 86 | #' @export 87 | ggobi.clusterfly <- function(data, ...) data$ggobi() 88 | #' @export 89 | close.clusterfly <- function(con, ...) con$close() 90 | 91 | #' @export 92 | "[[<-.clusterfly" <- function(x, i, value) { 93 | x$clusters[[i]] <- value 94 | x 95 | } 96 | 97 | #' @export 98 | print.clusterfly <- function(x, ...) { 99 | cat("Data: ", paste(names(x$df), collapse=", "), " [", nrow(x$df), "x", ncol(x$df), "]\n", sep="") 100 | cat("Extra: ", paste(names(x$extra), collapse=", "), " [", nrow(x$extra), "x", ncol(x$df), "]\n", sep="") 101 | cat("Clusters: ", paste(names(x$clusters), collapse=", "), "\n", sep="") 102 | } 103 | 104 | 105 | #' Convert clusterfly object to data.frame. 106 | #' Concatenates data and cluster assignments into one data.frame. 107 | #' Cluster assignments are prefixed with \code{cl_}. 108 | #' 109 | #' @export 110 | #' @method as.data.frame clusterfly 111 | #' @param x clusterfly object 112 | #' @param ... ignored 113 | #' @keywords manip 114 | as.data.frame.clusterfly <- function(x, ...) { 115 | cl <- as.data.frame(x$clusters) 116 | if (ncol(cl) > 0) { 117 | names(cl) <- paste("cl_", names(cl), sep="") 118 | } else { 119 | cl <- NULL 120 | } 121 | do.call("cbind", compact(list(x$df, x$extra, cl))) 122 | } 123 | 124 | #' Match all cluster indices to common reference. 125 | #' 126 | #' It's a good idea to run this before running any 127 | #' animation sequences so that unnecessary colour 128 | #' changes are minimised. 129 | #' 130 | #' @param cf clusterfly object 131 | #' @param reference index to reference clustering 132 | #' @param method method to use, see \code{\link{clarify}} 133 | #' @keywords manip 134 | #' @export 135 | #' @examples 136 | #' o <- olive_example() 137 | #' o <- cfly_clarify(o, "Region") 138 | cfly_clarify <- function(cf, reference=1, method="rowmax") { 139 | ref <- cf$clusters[[reference]] 140 | cf$clusters <- sapply(cf$cluster, function(x) clarify(x, ref, method=method), simplify=FALSE) 141 | cf 142 | } 143 | 144 | #' Add clustering. 145 | #' 146 | #' Clustering method needs to respond to \code{\link{clusters}}, 147 | #' if the default does not work, you will need to write 148 | #' your own to extract clusters. 149 | #' 150 | #' @param cf clusterfly object 151 | #' @param method clusterfing method (function) 152 | #' @param ... arguments passed to clustering method 153 | #' @param name name of clustering 154 | #' @keywords manip 155 | #' @export 156 | #' @examples 157 | #' o <- olive_example() 158 | #' cfly_cluster(o, kmeans, 4) 159 | #' cfly_cluster(o, kmeans, 4, name="blah") 160 | cfly_cluster <- function(cf, method, ..., name = deparse(substitute(method))) { 161 | cf[[name]] <- clusters(method(cf$df, ...)) 162 | cf 163 | } 164 | 165 | 166 | #' Dynamic plot: Animate glyph colours 167 | #' 168 | #' This function will animate until you manually break the loop 169 | #' using Ctrl-Break or Ctrl-C. 170 | #' 171 | #' @param cf list of cluster ids that you want to animate across 172 | #' @param clusters clusters to display 173 | #' @param pause clusters number of seconds to pause between each change 174 | #' @param print print current cluster to screen? 175 | #' @param max_iterations maximum number of interations 176 | #' @keywords dynamic 177 | #' @export 178 | #' @examples 179 | #' # Press Ctrl-Break or Ctrl-C to exit 180 | #' if (interactive()) { 181 | #' o <- olive_example() 182 | #' cfly_animate(cfly_clarify(o), max = 5) 183 | #' close(o) 184 | #' } 185 | cfly_animate <- function(cf, clusters = seq_along(cf$clusters), pause = 1, print=TRUE, max_iterations = 100) { 186 | g <- cf$ggobi() 187 | gd <- g[1] 188 | 189 | if (is.character(clusters)) clusters <- match(clusters, names(cf$clusters)) 190 | 191 | count <- 1 192 | while(TRUE) { 193 | for(i in clusters) { 194 | if (!valid_ggobi(g)) return() 195 | if (print) cat("Current cluster: ", names(cf$clusters)[i], "\n") 196 | glyph_colour(gd) <- cf$clusters[[i]] 197 | Sys.sleep(pause) 198 | 199 | count <- count + 1 200 | if (count > max_iterations) return() 201 | } 202 | } 203 | } 204 | 205 | -------------------------------------------------------------------------------- /R/data.r: -------------------------------------------------------------------------------- 1 | #' Example clusterfly object created with olives data 2 | #' 3 | #' @keywords dataset 4 | #' @export 5 | olive_example <- function() { 6 | ol <- clusterfly(olives[, -(1:3)], olives[, 2:3]) 7 | ol <- cfly_cluster(ol, kmeans, 3, name="kmeans") 8 | ol <- cfly_cluster(ol, kmeans, 4, name="k4-1") 9 | ol <- cfly_cluster(ol, kmeans, 4, name="k4-2") 10 | ol <- cfly_cluster(ol, kmeans, 4, name="k4-3") 11 | ol[["Region"]] <- olives$Region 12 | ol[["Area"]] <- olives$Area 13 | 14 | ol 15 | } 16 | -------------------------------------------------------------------------------- /R/ellipsoid.r: -------------------------------------------------------------------------------- 1 | #' Create multivariate ellipse. 2 | #' Randomly sample points from a probability contour of a multivariate normal. 3 | #' 4 | #' There are two ways to use this function. You can either supply 5 | #' a data set for which a multivariate normal ellipse will be drawn 6 | #' or you can supply the mean vector, covariance matrix and number 7 | #' of dimensions yourself. 8 | #' 9 | #' @param data data frame or matrix 10 | #' @param npoints number of points to sample 11 | #' @param cl proportion of density contained within ellipse 12 | #' @param mean mean vector 13 | #' @param cov variance-covariance matrix 14 | #' @param df degrees of freedom used for calculating F statistic 15 | #' @export 16 | #' @keywords internal 17 | ellipse <- function(data, npoints=1000, cl=0.95, mean=colMeans(data), cov=var(data), df=nrow(data)) 18 | { 19 | norm.vec <- function(x) x / sqrt(sum(x^2)) 20 | 21 | p <- length(mean) 22 | ev <- eigen(cov) 23 | 24 | sphere <- matrix(rnorm(npoints*p), ncol=p) 25 | cntr <- t(apply(sphere, 1, norm.vec)) 26 | 27 | cntr <- cntr %*% diag(sqrt(ev$values)) %*% t(ev$vectors) 28 | cntr <- cntr * sqrt(p * (df-1) * qf(cl, p, df-p) / (df * (df-p))) 29 | if (!missing(data)) colnames(cntr) <- colnames(data) 30 | 31 | cntr + rep(mean, each=npoints) 32 | } 33 | -------------------------------------------------------------------------------- /R/extract.r: -------------------------------------------------------------------------------- 1 | #' Extract clusters from clustering object. 2 | #' 3 | #' @param x object 4 | #' @export 5 | #' @keywords internal 6 | clusters <- function(x) UseMethod("clusters", x) 7 | #' @export 8 | clusters.kmeans <- function(x) as.vector(x$cluster) 9 | #' @export 10 | clusters.default <- function(x) as.vector(x) 11 | #' @export 12 | clusters.partition <- function(x) x$clustering 13 | -------------------------------------------------------------------------------- /R/ggplot.r: -------------------------------------------------------------------------------- 1 | #' Static plot: Parallel coordinates. 2 | #' Draw a parallel coordinates plot, facetted across clustering. 3 | #' 4 | #' This really only a proof of concept, a truly useful PCP 5 | #' needs interaction, especially to move the variables around. 6 | #' 7 | #' @param cfly clusterfly object 8 | #' @param index clustering to use 9 | #' @param ... other arguments passed to \code{\link[ggplot2]{geom_line}} 10 | #' @export 11 | #' @keywords hplot 12 | #' @examples 13 | #' if (require("ggplot2")) { 14 | #' o <- olive_example() 15 | #' cfly_pcp(o, "kmeans") 16 | #' cfly_pcp(o, "kmeans", alpha = 1/10) 17 | #' cfly_pcp(o, "kmeans", alpha = 1/10) + coord_flip() 18 | #' } 19 | cfly_pcp <- function(cfly, index, ...) { 20 | stopifnot(require("ggplot2")) 21 | 22 | df <- data.frame( 23 | rescaler(cfly$df), 24 | .cluster = cfly$clusters[[index]], 25 | .id = 1:nrow(cfly$df)) 26 | dfm <- melt(df, id = c(".cluster", ".id")) 27 | 28 | ggplot(dfm, aes_string(x = "variable", y = "value", group = ".id")) + 29 | geom_line(...) + 30 | facet_wrap(~ .cluster) 31 | } 32 | 33 | #' Static plot: Variable distribution. 34 | #' Draw a density plot for each continuous variable, facetted across clustering. 35 | #' 36 | #' This allows you to quickly visualise how the cluster 37 | #' vary in a univariate manner. Currently, it is a bit 38 | #' of a hack, because \code{\link[ggplot2]{ggplot}} does 39 | #' not support plots with different scales, so the variables 40 | #' are manually rescaled prior to plotting. 41 | #' 42 | #' This plot is inspired by Gaguin \url{http://www.rosuda.org/gaguin}. 43 | #' 44 | #' @param cfly clusterfly object 45 | #' @param index clustering to use 46 | #' @param scale scaling to use 47 | #' @keywords hplot 48 | #' @export 49 | #' @examples 50 | #' if (require("ggplot2")) { 51 | #' o <- olive_example() 52 | #' cfly_dist(o, "kmeans") 53 | #' cfly_dist(o, "kmeans") + scale_y_continuous(limit=c(0, 2)) 54 | #' } 55 | #' @importFrom reshape2 melt 56 | cfly_dist <- function(cfly, index, scale="range") { 57 | stopifnot(require("ggplot2")) 58 | 59 | df <- cbind(cfly$df, .cluster=factor(cfly$clusters[[index]])) 60 | dfm <- melt(rescaler(df, scale), id=".cluster") 61 | 62 | ggplot(dfm, aes_string(x = "value")) + 63 | geom_density() + 64 | facet_grid(.cluster ~ variable) 65 | } 66 | 67 | #' Static plot: Fluctuation diagram. 68 | #' Draw a fluctuation diagram comparing two clusterings. 69 | #' 70 | #' @param cfly clusterfly object 71 | #' @param a first clustering, will be reordered to match \code{b} if clarify=TRUE 72 | #' @param b second clustering 73 | #' @param clarify use \code{\link{clarify}} to rearranged cluster indices? 74 | #' @keywords hplot 75 | #' @export 76 | #' @importFrom plyr count 77 | #' @examples 78 | #' if (require("ggplot2")) { 79 | #' o <- olive_example() 80 | #' cfly_fluct(o, "kmeans", "Region") 81 | #' cfly_fluct(o, "kmeans", "Region", clarify = FALSE) 82 | #' } 83 | cfly_fluct <- function(cfly, a, b, clarify=TRUE) { 84 | stopifnot(require("ggplot2")) 85 | 86 | ca <- cfly$clusters[[a]] 87 | cb <- cfly$clusters[[b]] 88 | if (clarify) ca <- clarify(ca, cb) 89 | 90 | counts <- count(data.frame(x = factor(ca), y = factor(cb))) 91 | nx <- length(levels(counts$x)) 92 | ny <- length(levels(counts$y)) 93 | 94 | counts$freq <- sqrt(counts$freq / max(counts$freq)) 95 | 96 | ggplot(counts, aes_string(x = "x", y = "y", height = "freq", width = "freq")) + 97 | geom_tile(colour = "white") + 98 | scale_y_discrete(a) + 99 | scale_x_discrete(b) + 100 | theme(aspect.ratio = ny/nx) 101 | } 102 | -------------------------------------------------------------------------------- /R/hclust.r: -------------------------------------------------------------------------------- 1 | # Need a new type of linking to make this work 2 | # Brushing a node should highlight all nodes and leaves below it 3 | # (investigate nested set representation for efficient storage) 4 | # 5 | # Can this be done using rggobi and the old linking code? 6 | # Should it be added to ggobi as a new type of linking? 7 | # * If so, as the general case - defined by edges? 8 | # * Or for the particular nested set representation? 9 | 10 | 11 | #' Visualisig hierarchical clustering. 12 | #' This method supplements a data set with information needed to draw a 13 | #' dendrogram 14 | #' 15 | #' Intermediate cluster nodes are added as needed, and positioned at the 16 | #' centroid of the combined clusters. 17 | #' 18 | #' @param data data set 19 | #' @param metric distance metric to use, see \code{\link{dist}} for list of 20 | #' possibilities 21 | #' @param method cluster distance measure to use, see \code{\link{hclust}} for 22 | #' details 23 | #' @return object of type, hierfly 24 | #' @seealso \code{\link{cut.hierfly}}, \code{\link{ggobi.hierfly}} 25 | #' @keywords cluster 26 | #' @export 27 | #' @examples 28 | #' h <- hierfly(iris) 29 | #' ggobi(h) 30 | #' h <- hierfly(iris, method="single") 31 | hierfly <- function(data, metric="euclidean", method="average") { 32 | cat <- sapply(data, is.factor) 33 | h <- hclust(dist(data[,!cat], metric), method) 34 | 35 | data$ORDER <- order(h$order) 36 | data$HEIGHT <- 0 37 | data$LEVEL <- 0 38 | data$POINTS <- 1 39 | 40 | for (i in 1:nrow(h$merge)) { 41 | newr <- combinerows(data[as.character(-h$merge[i,]),], cat) 42 | newr$HEIGHT <- h$height[i] 43 | newr$LEVEL <- i 44 | rownames(newr) <- as.character(-i) 45 | 46 | data <- rbind(data, newr) 47 | } 48 | 49 | data$node <- (as.numeric(rownames(data)) < 0) + 0 50 | 51 | structure(list(data=data, hclust=h), class="hierfly") 52 | } 53 | 54 | combinerows <- function(df, cat) { 55 | same <- function(x) if (length(unique(x)) == 1) x[1] else NA 56 | points <- df$POINTS 57 | 58 | cont <- as.data.frame(lapply(df[, !cat, drop=FALSE] * points, sum)) / sum(points) 59 | cat <- as.data.frame(lapply(df[, cat, drop=FALSE], same)) 60 | 61 | df <- if (nrow(cont) > 0 && nrow(cat) > 0) { 62 | cbind(cont, cat) 63 | } else if (nrow(cont) > 0) { 64 | cont 65 | } else { 66 | cat 67 | } 68 | df$POINTS <- sum(points) 69 | df 70 | } 71 | 72 | #' @export 73 | print.hierfly <- function(x, ...) { 74 | print(str(x)) 75 | } 76 | 77 | #' Visualise hierarchical clustering with GGobi. 78 | #' Displays both data and dendrogram in original high-d space. 79 | #' 80 | #' This adds four new variables to the original data set: 81 | #' 82 | #' \itemize{ 83 | #' \item ORDER, the order in which the clusters are joined 84 | #' \item HEIGHT, the height of the branch, ie. the dissimilarity between the branches 85 | #' \item LEVEL, the level of the branch 86 | #' \item POINTS, the number of points in the branch 87 | #' } 88 | #' 89 | #' Make sure to select "attach edge set (edges)" in the in the edges menu on the 90 | #' plot window, when you create a new plot. 91 | #' 92 | #' A tour over the original variables will show how the clusters agglomerate 93 | #' in space. Plotting order vs height, level or points will give various 94 | #' types of dendograms. A correlation tour with height/level/points on the y 95 | #' axis and the original variables on the x axis will show a mobile blowing 96 | #' in the wind. 97 | #' 98 | #' @param data hierfly object to visualise in GGobi 99 | #' @param ... ignored 100 | #' @seealso \code{\link{cut.hierfly}} 101 | #' @keywords cluster dynamic 102 | #' @export 103 | #' @examples 104 | #' h <- hierfly(iris) 105 | #' ggobi(h) 106 | #' h <- hierfly(iris, method="single") 107 | ggobi.hierfly <- function(data, ...) { 108 | h <- data$hclust 109 | data <- data$data 110 | 111 | g <- ggobi(data) 112 | d <- g[1] 113 | glyph_type(d) <- ifelse(data$node != 0, 1, 6) 114 | 115 | e <- data.frame(level=1:length(h$height), height=h$height)[rep(1:length(h$height), 2), ] 116 | rownames(e) <- paste("e", 1:nrow(e), sep="") 117 | 118 | g$edges <- e 119 | edges(g$edges) <- cbind(as.character(-h$merge), -rep(1:nrow(h$merge), 2)) 120 | 121 | d <- displays(g)[[1]] 122 | edges(d) <- g[2] 123 | 124 | invisible(g) 125 | } 126 | 127 | #' Cut hierfly object into k clusters/colours. 128 | #' 129 | #' @param x hierfly object to colour 130 | #' @param k number of clusters 131 | #' @param g GGobi instance displaying x, will create new if not specified 132 | #' @param ... ignored 133 | #' @keywords cluster 134 | #' @export 135 | #' @examples 136 | #' h <- hierfly(iris) 137 | #' hfly <- ggobi(h) 138 | #' cut(h, 2, hfly) 139 | #' h <- hierfly(iris, method="ward") 140 | #' g <- ggobi(h) 141 | #' cut(h, 2, g) 142 | cut.hierfly <- function(x, k=2, g=ggobi(x), ...) { 143 | d <- g[1] 144 | glyph_colour(d) <- c(cutree(x$hclust, k=k) + 1, rep(1, length(x$hclust$height))) 145 | } 146 | -------------------------------------------------------------------------------- /R/hulls.r: -------------------------------------------------------------------------------- 1 | #' Add convex hulls 2 | #' Add conver hulls using the tool qconvex 3 | #' 4 | #' To use this command you must have qconvex installed and available 5 | #' on your path. I'm not sure if this will work on windows (probably not) 6 | #' but it's not a big loss, because the technique isn't very useful 7 | #' anyway. 8 | #' 9 | #' @export 10 | #' @param gd ggobi dataset 11 | #' @param g ggobi reference 12 | #' @param by grouping variable 13 | #' @keywords hplot 14 | addhull <- function(gd, g, by) { 15 | mat <- as.data.frame(gd) 16 | rownames(mat) <- rownames(gd) 17 | by <- rep(by, length=nrow(gd)) 18 | 19 | edges <- tapply(1:length(by), by, function(i) { 20 | qh <- qhull(as.data.frame(mat)[i,]) 21 | if(is.null(qh)) return() 22 | cbind(qh, by[i[1]]) 23 | }) 24 | edges <- do.call(rbind, compact(edges)) 25 | 26 | g['hulls'] <- data.frame(id=as.numeric(edges[,3])) 27 | edges(g['hulls']) <- edges 28 | 29 | invisible() 30 | } 31 | 32 | qhull <- function(mat) { 33 | if (nrow(mat) < 5) return() 34 | #if (is.null(rownames(mat))) rownames(mat) <- 1:nrow(mat) 35 | 36 | output <- system(paste("echo '", qhullout(mat), "' | qconvex QbB i"), TRUE)[-1] 37 | facets <- do.call(rbind, lapply(strsplit(output, " "), function(x) as.numeric(sort(x)))) + 1 38 | 39 | combs <- expand.grid(i = 1:ncol(facets), j = 1:ncol(facets)) 40 | combs <- combs[combs$i < combs$j, , drop = FALSE] 41 | 42 | edges <- unique(do.call(rbind, 43 | lapply(1:nrow(combs), function(x) facets[, unlist(combs[x,])]) 44 | )) 45 | 46 | t(apply(edges, 1, function(x) rownames(mat)[x])) 47 | #edges 48 | } 49 | 50 | qhullout <- function(mat) { 51 | paste( 52 | ncol(mat), "\n", 53 | nrow(mat), "\n", 54 | paste(apply(mat, 1, paste, collapse=" "), collapse="\n"), 55 | sep="" 56 | ) 57 | 58 | } 59 | 60 | -------------------------------------------------------------------------------- /R/model-based.r: -------------------------------------------------------------------------------- 1 | #' Display model based clustering with mvn ellipses. 2 | #' Displays the results of model based clustering with an ellipse drawn from 3 | #' the multivariate normal model for each group. 4 | #' 5 | #' @param model output from me function 6 | #' @param data input data frame to me 7 | #' @keywords cluster dynamic 8 | #' @export 9 | #' @examples 10 | #' if(require("mclust")) { 11 | #' eei <- me(modelName = "EEI", data = iris[,-5], z = unmap(iris[,5])) 12 | #' vvv <- me(modelName = "VVV", data = iris[,-5], z = unmap(iris[,5])) 13 | #' vvi <- me(modelName = "VVI", data = iris[,-5], z = unmap(iris[,5])) 14 | #' mefly(eei, iris[,-5]) 15 | #' mefly(vvi, iris[,-5]) 16 | #' mefly(vvv, iris[,-5]) 17 | #' } 18 | #' @importFrom plyr rbind.fill 19 | mefly <- function(model, data) { 20 | mean <- model$parameters$mean 21 | var <- model$parameters$variance$sigma 22 | 23 | ellipses <- do.call("rbind", lapply(1:ncol(mean), function(i) { 24 | data.frame(ellipse(mean = mean[,i], cov = var[,, i], df=10), cluster=i) 25 | })) 26 | colnames(ellipses) <- c(colnames(data), "cluster") 27 | ellipses$TYPE <- factor("ellipse") 28 | data$TYPE <- factor("data") 29 | 30 | all <- rbind.fill(ellipses, cbind(data, cluster=max.col(model$z))) 31 | 32 | g <- ggobi(all) 33 | glyph_type(g[1]) <- c(1,6)[all$TYPE] 34 | glyph_colour(g[1]) <- all$cluster 35 | invisible(g) 36 | } 37 | -------------------------------------------------------------------------------- /R/som-iteration.r: -------------------------------------------------------------------------------- 1 | som_iterate <- function(df, grid, nsteps = 100, stepsize = 10, alpha = 0.05, radius = NULL) { 2 | if (is.null(radius)) { 3 | radius <- c(quantile(unit.distances(grid, FALSE), 0.67), 0) 4 | } 5 | 6 | # Alpha decreases linearly 7 | alpha_step <- function(i) alpha - (alpha - 0.01) * (i + c(-1, 0)) / nsteps 8 | # Radius decraeses linearly to 1 by 1/3 of steps 9 | radius_step <- function(i) { 10 | r <- radius[1] - (radius[1] - radius[2]) * 3.0 * (i - 1) / nsteps; 11 | ifelse(r < radius[2], max(0.5, radius[2]), r) 12 | } 13 | 14 | print(radius_step(1:nsteps)) 15 | 16 | i <- 1 17 | fit <- kohonen::som(df, grid, rlen = stepsize, alpha = alpha_step(i), radius = radius_step(i)) 18 | 19 | step <- function() { 20 | i <<- i + 1 21 | fit <<- kohonen::som(df, grid, rlen = stepsize, init = fit$codes, 22 | alpha = alpha_step(i), radius=radius_step(i), keep.data = TRUE 23 | ) 24 | 25 | fit 26 | } 27 | 28 | structure(c(list(fit), replicate(nsteps - 1, step(), simplify=FALSE)), class="somiter") 29 | } 30 | 31 | #' @export 32 | summary.somiter <- function(object, ...) { 33 | interesting <- function(fit) { 34 | df <- data.frame( 35 | alpha_start = fit$alpha[1], alpha_end = fit$alpha[2], 36 | radius = fit$radius[1], 37 | change_mean = mean(fit$changes), 38 | dist_mean = mean(fit$distances), 39 | dist_sd = sd(fit$distances) 40 | ) 41 | df$codes <- list(fit$codes) 42 | df$map <- list(fit$grid$pts[fit$unit.classif, ]) 43 | df 44 | } 45 | df <- do.call("rbind", lapply(object, interesting)) 46 | df$step <- 1:nrow(df) 47 | class(df) <- c("somitersum", class(df)) 48 | rownames(df) <- paste("step", 1:nrow(df), sep="") 49 | df 50 | } 51 | 52 | #' @importFrom reshape2 melt 53 | #' @importFrom RGtk2 gSignalConnect ==.RGtkObject 54 | #' @export 55 | ggobi.somiter <- function(data, extra = NULL, ...) { 56 | 57 | g <- ggobi(data[[1]], extra=extra) 58 | all_fits <- summary(data) 59 | fits <- fits[setdiff(names(fits), c("codes", "map"))] 60 | 61 | jittering <- jitter(all_fits[[1, "map"]]) - all_fits[[1, "map"]] 62 | 63 | distances <- melt(sapply(data, function(x) x$distances)) 64 | names(distances) <- c("oid", "step", "value") 65 | distances$oid <- factor(distances$oid) 66 | oid <- NULL # stupid hack for R CMD check 67 | ggobi_longitudinal(distances, step, oid, g = g) 68 | 69 | ggobi_longitudinal(fits, step, g = g) 70 | d <- display(g["fits"], vars = list(X = "step", Y = "dist_mean")) 71 | edges(d) <- g["fits-edges"] 72 | 73 | gSignalConnect(g, "identify-point", function(gg, plot, id, data) { 74 | if (id == -1 || !"==.RGtkObject"(data, gg$fits)) return() 75 | id <- id + 1 76 | 77 | codes <- all_fits[[id, "codes"]] 78 | gg$df[gg$df$net == TRUE, 1:ncol(codes)] <- codes 79 | 80 | map <- all_fits[[id, "map"]] + jittering 81 | gg$df[gg$df$net == FALSE, c("map1", "map2")] <- map 82 | 83 | # dist <- all_fits[[id, "dist"]] 84 | # gg$df[gg$df$net == FALSE, c("distance")] <- as.matrix(dist) 85 | }) 86 | 87 | invisible(g) 88 | } 89 | -------------------------------------------------------------------------------- /R/som.r: -------------------------------------------------------------------------------- 1 | #' Visualise Kohonen self organising maps with GGobi 2 | #' Displays both data, and map in original high-d space. 3 | #' 4 | #' Map variables added as map1 and map2. Plot these to 5 | #' get traditional SOM plot. Tour over all other variables to 6 | #' see how well the map fits the original data. 7 | #' 8 | #' @param data SOM object 9 | #' @param ... ignored 10 | #' @method ggobi som 11 | #' @keywords cluster dynamic 12 | #' @export 13 | #' @examples 14 | #' \dontrun{ 15 | #' d.music <- read.csv("http://www.ggobi.org/book/data/music-all.csv") 16 | #' 17 | #' music <- rescaler(d.music)[complete.cases(d.music), 1:10] 18 | #' music.som <- som::som(music[,-(1:3)], 6, 6, neigh="bubble", rlen=1000) 19 | #' ggobi(music.som) 20 | #' } 21 | #' \dontrun{ 22 | #' d.music <- read.csv("http://www.ggobi.org/book/data/music-all.csv") 23 | #' 24 | #' music <- rescaler(d.music)[complete.cases(d.music), 1:10] 25 | #' music.hex <- kohonen::som(music[,-(1:3)], grid = somgrid(3, 3, "hexagonal"), rlen=1000) 26 | #' music.rect <- kohonen::som(music[,-(1:3)], grid = somgrid(6, 6, "rectangular"), rlen=1000) 27 | #' ggobi(music.rect) 28 | #' } 29 | ggobi.som <- function(data, ...) { 30 | som <- data 31 | original <- data.frame( 32 | som$data, 33 | map1 = jitter(som$visual$x) + 1, 34 | map2 = jitter(som$visual$y) + 1, 35 | net = factor(FALSE) 36 | ) 37 | 38 | xs <- som$xdim 39 | ys <- som$ydim 40 | 41 | net <- som$code 42 | colnames(net) <- colnames(som$data) 43 | net <- cbind(net, expand.grid(map1=1:xs, map2=1:ys), net=factor(TRUE)) 44 | rownames(net) <- paste("net", 1:nrow(net), sep="") 45 | names(net) <- names(original) 46 | 47 | df <- rbind(original, net) 48 | 49 | g <- ggobi(df) 50 | glyph_colour(g[1]) <- c(1,3)[df$net] 51 | shadowed(g[1]) <- c(FALSE,TRUE)[df$net] 52 | d <- displays(g)[[1]] 53 | variables(d) <- list(X = "map1", Y = "map2") 54 | 55 | # Add net edges 56 | netlines <- make_rect_net(xs, ys) 57 | edges(g) <- netlines 58 | glyph_colour(g[2]) <- 3 59 | edges(d) <- g[2] 60 | 61 | invisible(g) 62 | } 63 | 64 | #' @export 65 | #' @importFrom plyr rbind.fill 66 | ggobi.kohonen <- function(data, extra = NULL, ...) { 67 | 68 | som <- data 69 | 70 | original <- data.frame( 71 | som$data, 72 | map1 = jitter(som$grid$pts[som$unit.classif, 1]), 73 | map2 = jitter(som$grid$pts[som$unit.classif, 2]), 74 | distance = som$distance, 75 | net = factor(FALSE), 76 | oid = factor(1:nrow(som$data)) 77 | ) 78 | if (!is.null(extra)) original <- cbind(original, extra) 79 | 80 | net <- data.frame( 81 | map1 = som$grid$pts[, 1], 82 | map2 = som$grid$pts[, 2], 83 | som$codes, 84 | net = factor(TRUE), 85 | oid = factor(paste("net"), 1:nrow(som$grid$pts)) 86 | ) 87 | rownames(net) <- paste("net", 1:nrow(net), sep="") 88 | 89 | df <- rbind.fill(original, net) 90 | 91 | g <- ggobi(df) 92 | glyph_colour(g[1]) <- c(1,3)[df$net] 93 | shadowed(g[1]) <- c(FALSE,TRUE)[df$net] 94 | d <- displays(g)[[1]] 95 | variables(d) <- list(X = "map1", Y = "map2") 96 | 97 | if (som$grid$topo == "rectangular") { 98 | netlines <- make_rect_net(som$grid$xdim, som$grid$ydim) 99 | edges(g) <- netlines 100 | glyph_colour(g[2]) <- 3 101 | edges(d) <- g[2] 102 | } 103 | 104 | invisible(g) 105 | } 106 | 107 | make_rect_net <- function(xs, ys) { 108 | netlines <- with(expand.grid(y=1:(xs-1), x=1:(ys)), rbind( 109 | cbind((x - 1) * xs + y, (x - 1) * xs + y + 1), 110 | cbind((x - 1) * xs + y, x * xs + y) 111 | )) 112 | netlines <- rbind(netlines, cbind(1:(ys-1) * xs, 2:ys * xs)) 113 | netlines <- apply(netlines, 2, function(x) paste("net", x, sep="")) 114 | netlines 115 | } 116 | 117 | -------------------------------------------------------------------------------- /R/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hadley/clusterfly/1f0e14c7e8348fb63ef155c0976277fb6448a799/R/sysdata.rda -------------------------------------------------------------------------------- /R/utils.r: -------------------------------------------------------------------------------- 1 | #' Hierachical clustering 2 | #' Convenient methods for hierachical clustering 3 | #' 4 | #' @param df data frame 5 | #' @param method method to use, see \code{\link{hclust}} 6 | #' @param metric distance metric to use, see \code{\link{dist}} 7 | #' @param n number of clusters to retrieve, see \code{\link{cut}} 8 | #' @keywords cluster 9 | hierarchical <- function(df, method="complete", metric="euclidean", n=5) { 10 | if (metric == 'correlation') { 11 | df <- scale(as.matrix(df)) 12 | metric <- "euclidean" 13 | } 14 | as.vector(cutree(hclust(dist(df, metric), method=method), n)) 15 | } 16 | 17 | #' Clarify matrix 18 | #' Clarify matrix ordering to minimize off diagonals 19 | #' 20 | #' @param a cluster assignments to reassign 21 | #' @param b matrix b 22 | #' @param method clarification method 23 | #' @return vector of reassigned cluster a 24 | #' @keywords manip 25 | #' @seealso \code{\link[e1071]{matchClasses}} 26 | #' @importFrom e1071 matchClasses 27 | clarify <- function(a, b, method="greedy") { 28 | m <- matchClasses(table(a,b), method=method, verbose=FALSE) 29 | as.vector(m[a]) 30 | } 31 | 32 | 33 | rescaler <- function(df, type = "sd") { 34 | f <- switch(type, 35 | rank = function(x, ...) rank(x, ...), 36 | var = , sd = function(x) (x - mean(x, na.rm = TRUE)) / sd(x, na.rm = TRUE), 37 | robust = function(x) (x - median(x, na.rm = TRUE)) / mad(x, na.rm = TRUE), 38 | I = function(x) x, 39 | range = function(x) (x - min(x, na.rm = TRUE)) / diff(range(x, na.rm = TRUE))) 40 | 41 | continuous <- vapply(df, is.numeric, logical(1)) 42 | df[continuous] <- lapply(df[continuous], f) 43 | df 44 | } 45 | 46 | compact <- function(x) Filter(Negate(is.null), x) 47 | -------------------------------------------------------------------------------- /clusterfly.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /man/addhull.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{addhull} 3 | \alias{addhull} 4 | \title{Add convex hulls 5 | Add conver hulls using the tool qconvex} 6 | \usage{ 7 | addhull(gd, g, by) 8 | } 9 | \arguments{ 10 | \item{gd}{ggobi dataset} 11 | 12 | \item{g}{ggobi reference} 13 | 14 | \item{by}{grouping variable} 15 | } 16 | \description{ 17 | To use this command you must have qconvex installed and available 18 | on your path. I'm not sure if this will work on windows (probably not) 19 | but it's not a big loss, because the technique isn't very useful 20 | anyway. 21 | } 22 | \keyword{hplot} 23 | 24 | -------------------------------------------------------------------------------- /man/as.data.frame.clusterfly.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{as.data.frame.clusterfly} 3 | \alias{as.data.frame.clusterfly} 4 | \title{Convert clusterfly object to data.frame. 5 | Concatenates data and cluster assignments into one data.frame. 6 | Cluster assignments are prefixed with \code{cl_}.} 7 | \usage{ 8 | \method{as.data.frame}{clusterfly}(x, ...) 9 | } 10 | \arguments{ 11 | \item{x}{clusterfly object} 12 | 13 | \item{...}{ignored} 14 | } 15 | \description{ 16 | Convert clusterfly object to data.frame. 17 | Concatenates data and cluster assignments into one data.frame. 18 | Cluster assignments are prefixed with \code{cl_}. 19 | } 20 | \keyword{manip} 21 | 22 | -------------------------------------------------------------------------------- /man/cfly_animate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{cfly_animate} 3 | \alias{cfly_animate} 4 | \title{Dynamic plot: Animate glyph colours} 5 | \usage{ 6 | cfly_animate(cf, clusters = seq_along(cf$clusters), pause = 1, 7 | print = TRUE, max_iterations = 100) 8 | } 9 | \arguments{ 10 | \item{cf}{list of cluster ids that you want to animate across} 11 | 12 | \item{clusters}{clusters to display} 13 | 14 | \item{pause}{clusters number of seconds to pause between each change} 15 | 16 | \item{print}{print current cluster to screen?} 17 | 18 | \item{max_iterations}{maximum number of interations} 19 | } 20 | \description{ 21 | This function will animate until you manually break the loop 22 | using Ctrl-Break or Ctrl-C. 23 | } 24 | \examples{ 25 | # Press Ctrl-Break or Ctrl-C to exit 26 | if (interactive()) { 27 | o <- olive_example() 28 | cfly_animate(cfly_clarify(o), max = 5) 29 | close(o) 30 | } 31 | } 32 | \keyword{dynamic} 33 | 34 | -------------------------------------------------------------------------------- /man/cfly_clarify.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{cfly_clarify} 3 | \alias{cfly_clarify} 4 | \title{Match all cluster indices to common reference.} 5 | \usage{ 6 | cfly_clarify(cf, reference = 1, method = "rowmax") 7 | } 8 | \arguments{ 9 | \item{cf}{clusterfly object} 10 | 11 | \item{reference}{index to reference clustering} 12 | 13 | \item{method}{method to use, see \code{\link{clarify}}} 14 | } 15 | \description{ 16 | It's a good idea to run this before running any 17 | animation sequences so that unnecessary colour 18 | changes are minimised. 19 | } 20 | \examples{ 21 | o <- olive_example() 22 | o <- cfly_clarify(o, "Region") 23 | } 24 | \keyword{manip} 25 | 26 | -------------------------------------------------------------------------------- /man/cfly_cluster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{cfly_cluster} 3 | \alias{cfly_cluster} 4 | \title{Add clustering.} 5 | \usage{ 6 | cfly_cluster(cf, method, ..., name = deparse(substitute(method))) 7 | } 8 | \arguments{ 9 | \item{cf}{clusterfly object} 10 | 11 | \item{method}{clusterfing method (function)} 12 | 13 | \item{...}{arguments passed to clustering method} 14 | 15 | \item{name}{name of clustering} 16 | } 17 | \description{ 18 | Clustering method needs to respond to \code{\link{clusters}}, 19 | if the default does not work, you will need to write 20 | your own to extract clusters. 21 | } 22 | \examples{ 23 | o <- olive_example() 24 | cfly_cluster(o, kmeans, 4) 25 | cfly_cluster(o, kmeans, 4, name="blah") 26 | } 27 | \keyword{manip} 28 | 29 | -------------------------------------------------------------------------------- /man/cfly_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{cfly_dist} 3 | \alias{cfly_dist} 4 | \title{Static plot: Variable distribution. 5 | Draw a density plot for each continuous variable, facetted across clustering.} 6 | \usage{ 7 | cfly_dist(cfly, index, scale = "range") 8 | } 9 | \arguments{ 10 | \item{cfly}{clusterfly object} 11 | 12 | \item{index}{clustering to use} 13 | 14 | \item{scale}{scaling to use} 15 | } 16 | \description{ 17 | This allows you to quickly visualise how the cluster 18 | vary in a univariate manner. Currently, it is a bit 19 | of a hack, because \code{\link[ggplot2]{ggplot}} does 20 | not support plots with different scales, so the variables 21 | are manually rescaled prior to plotting. 22 | } 23 | \details{ 24 | This plot is inspired by Gaguin \url{http://www.rosuda.org/gaguin}. 25 | } 26 | \examples{ 27 | if (require("ggplot2")) { 28 | o <- olive_example() 29 | cfly_dist(o, "kmeans") 30 | cfly_dist(o, "kmeans") + scale_y_continuous(limit=c(0, 2)) 31 | } 32 | } 33 | \keyword{hplot} 34 | 35 | -------------------------------------------------------------------------------- /man/cfly_fluct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{cfly_fluct} 3 | \alias{cfly_fluct} 4 | \title{Static plot: Fluctuation diagram. 5 | Draw a fluctuation diagram comparing two clusterings.} 6 | \usage{ 7 | cfly_fluct(cfly, a, b, clarify = TRUE) 8 | } 9 | \arguments{ 10 | \item{cfly}{clusterfly object} 11 | 12 | \item{a}{first clustering, will be reordered to match \code{b} if clarify=TRUE} 13 | 14 | \item{b}{second clustering} 15 | 16 | \item{clarify}{use \code{\link{clarify}} to rearranged cluster indices?} 17 | } 18 | \description{ 19 | Static plot: Fluctuation diagram. 20 | Draw a fluctuation diagram comparing two clusterings. 21 | } 22 | \examples{ 23 | if (require("ggplot2")) { 24 | o <- olive_example() 25 | cfly_fluct(o, "kmeans", "Region") 26 | cfly_fluct(o, "kmeans", "Region", clarify = FALSE) 27 | } 28 | } 29 | \keyword{hplot} 30 | 31 | -------------------------------------------------------------------------------- /man/cfly_pcp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{cfly_pcp} 3 | \alias{cfly_pcp} 4 | \title{Static plot: Parallel coordinates. 5 | Draw a parallel coordinates plot, facetted across clustering.} 6 | \usage{ 7 | cfly_pcp(cfly, index, ...) 8 | } 9 | \arguments{ 10 | \item{cfly}{clusterfly object} 11 | 12 | \item{index}{clustering to use} 13 | 14 | \item{...}{other arguments passed to \code{\link[ggplot2]{geom_line}}} 15 | } 16 | \description{ 17 | This really only a proof of concept, a truly useful PCP 18 | needs interaction, especially to move the variables around. 19 | } 20 | \examples{ 21 | if (require("ggplot2")) { 22 | o <- olive_example() 23 | cfly_pcp(o, "kmeans") 24 | cfly_pcp(o, "kmeans", alpha = 1/10) 25 | cfly_pcp(o, "kmeans", alpha = 1/10) + coord_flip() 26 | } 27 | } 28 | \keyword{hplot} 29 | 30 | -------------------------------------------------------------------------------- /man/cfly_show.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{cfly_show} 3 | \alias{cfly_show} 4 | \title{Show in ggobi. 5 | Opens an instance ggobi for this dataset (if not already open), and colours 6 | the points according the cluster assignment.} 7 | \usage{ 8 | cfly_show(cf, idx = "true", hulls = FALSE) 9 | } 10 | \arguments{ 11 | \item{cf}{clusterfly object} 12 | 13 | \item{idx}{clustering to display} 14 | 15 | \item{hulls}{add convex hull? see \code{\link{addhull}} for details} 16 | } 17 | \description{ 18 | Show in ggobi. 19 | Opens an instance ggobi for this dataset (if not already open), and colours 20 | the points according the cluster assignment. 21 | } 22 | \examples{ 23 | o <- olive_example() 24 | cfly_show(o, 1) 25 | cfly_show(o, "Region") 26 | if (!interactive()) close(o) 27 | } 28 | \keyword{dynamic} 29 | 30 | -------------------------------------------------------------------------------- /man/clarify.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{clarify} 3 | \alias{clarify} 4 | \title{Clarify matrix 5 | Clarify matrix ordering to minimize off diagonals} 6 | \usage{ 7 | clarify(a, b, method = "greedy") 8 | } 9 | \arguments{ 10 | \item{a}{cluster assignments to reassign} 11 | 12 | \item{b}{matrix b} 13 | 14 | \item{method}{clarification method} 15 | } 16 | \value{ 17 | vector of reassigned cluster a 18 | } 19 | \description{ 20 | Clarify matrix 21 | Clarify matrix ordering to minimize off diagonals 22 | } 23 | \seealso{ 24 | \code{\link[e1071]{matchClasses}} 25 | } 26 | \keyword{manip} 27 | 28 | -------------------------------------------------------------------------------- /man/clusterfly.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{clusterfly} 3 | \alias{clusterfly} 4 | \alias{package-clusterfly} 5 | \title{Creates a convenient data structure for dealing with a dataset and a number 6 | of alternative clusterings.} 7 | \usage{ 8 | clusterfly(df, extra = NULL, rescale = TRUE) 9 | } 10 | \arguments{ 11 | \item{df}{data frame to be clustered} 12 | 13 | \item{extra}{extra variables to be included in output, but not clustered} 14 | 15 | \item{rescale}{rescale, if true each variable will be scaled to have mean 0 16 | and variance 1.} 17 | } 18 | \description{ 19 | Once you have created a clusterfly object, you can add 20 | clusterings to it with \code{\link{cfly_cluster}}, and 21 | visualise then in GGobi with \code{\link{cfly_show}} and 22 | \code{\link{cfly_animate}}. Static graphics are also 23 | available: \code{\link{cfly_pcp}} will produce a parallel 24 | coordinates plot, \code{\link{cfly_dist}} will show 25 | the distribution of each variable in each cluster, and 26 | \code{\link{cfly_fluct}} compares two clusterings with a 27 | fluctuation diagram. 28 | } 29 | \details{ 30 | If you want to standardise the cluster labelling to one 31 | group, look at \code{\link{clarify}} and \code{\link{cfly_clarify}} 32 | } 33 | \examples{ 34 | ol <- olive_example() 35 | 36 | if (interactive()) { 37 | ggobi(ol) 38 | cfly_show(ol, "k4-1") 39 | cfly_animate(ol, max = 5) 40 | close(ol) 41 | } 42 | } 43 | \seealso{ 44 | vignette("introduction") 45 | } 46 | \keyword{dynamic} 47 | 48 | -------------------------------------------------------------------------------- /man/clusters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{clusters} 3 | \alias{clusters} 4 | \title{Extract clusters from clustering object.} 5 | \usage{ 6 | clusters(x) 7 | } 8 | \arguments{ 9 | \item{x}{object} 10 | } 11 | \description{ 12 | Extract clusters from clustering object. 13 | } 14 | \keyword{internal} 15 | 16 | -------------------------------------------------------------------------------- /man/cut.hierfly.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{cut.hierfly} 3 | \alias{cut.hierfly} 4 | \title{Cut hierfly object into k clusters/colours.} 5 | \usage{ 6 | \method{cut}{hierfly}(x, k = 2, g = ggobi(x), ...) 7 | } 8 | \arguments{ 9 | \item{x}{hierfly object to colour} 10 | 11 | \item{k}{number of clusters} 12 | 13 | \item{g}{GGobi instance displaying x, will create new if not specified} 14 | 15 | \item{...}{ignored} 16 | } 17 | \description{ 18 | Cut hierfly object into k clusters/colours. 19 | } 20 | \examples{ 21 | h <- hierfly(iris) 22 | hfly <- ggobi(h) 23 | cut(h, 2, hfly) 24 | h <- hierfly(iris, method="ward") 25 | g <- ggobi(h) 26 | cut(h, 2, g) 27 | } 28 | \keyword{cluster} 29 | 30 | -------------------------------------------------------------------------------- /man/ellipse.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{ellipse} 3 | \alias{ellipse} 4 | \title{Create multivariate ellipse. 5 | Randomly sample points from a probability contour of a multivariate normal.} 6 | \usage{ 7 | ellipse(data, npoints = 1000, cl = 0.95, mean = colMeans(data), 8 | cov = var(data), df = nrow(data)) 9 | } 10 | \arguments{ 11 | \item{data}{data frame or matrix} 12 | 13 | \item{npoints}{number of points to sample} 14 | 15 | \item{cl}{proportion of density contained within ellipse} 16 | 17 | \item{mean}{mean vector} 18 | 19 | \item{cov}{variance-covariance matrix} 20 | 21 | \item{df}{degrees of freedom used for calculating F statistic} 22 | } 23 | \description{ 24 | There are two ways to use this function. You can either supply 25 | a data set for which a multivariate normal ellipse will be drawn 26 | or you can supply the mean vector, covariance matrix and number 27 | of dimensions yourself. 28 | } 29 | \keyword{internal} 30 | 31 | -------------------------------------------------------------------------------- /man/ggobi.hierfly.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{ggobi.hierfly} 3 | \alias{ggobi.hierfly} 4 | \title{Visualise hierarchical clustering with GGobi. 5 | Displays both data and dendrogram in original high-d space.} 6 | \usage{ 7 | \method{ggobi}{hierfly}(data, ...) 8 | } 9 | \arguments{ 10 | \item{data}{hierfly object to visualise in GGobi} 11 | 12 | \item{...}{ignored} 13 | } 14 | \description{ 15 | This adds four new variables to the original data set: 16 | } 17 | \details{ 18 | \itemize{ 19 | \item ORDER, the order in which the clusters are joined 20 | \item HEIGHT, the height of the branch, ie. the dissimilarity between the branches 21 | \item LEVEL, the level of the branch 22 | \item POINTS, the number of points in the branch 23 | } 24 | 25 | Make sure to select "attach edge set (edges)" in the in the edges menu on the 26 | plot window, when you create a new plot. 27 | 28 | A tour over the original variables will show how the clusters agglomerate 29 | in space. Plotting order vs height, level or points will give various 30 | types of dendograms. A correlation tour with height/level/points on the y 31 | axis and the original variables on the x axis will show a mobile blowing 32 | in the wind. 33 | } 34 | \examples{ 35 | h <- hierfly(iris) 36 | ggobi(h) 37 | h <- hierfly(iris, method="single") 38 | } 39 | \seealso{ 40 | \code{\link{cut.hierfly}} 41 | } 42 | \keyword{cluster} 43 | \keyword{dynamic} 44 | 45 | -------------------------------------------------------------------------------- /man/ggobi.som.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{ggobi.som} 3 | \alias{ggobi.som} 4 | \title{Visualise Kohonen self organising maps with GGobi 5 | Displays both data, and map in original high-d space.} 6 | \usage{ 7 | \method{ggobi}{som}(data, ...) 8 | } 9 | \arguments{ 10 | \item{data}{SOM object} 11 | 12 | \item{...}{ignored} 13 | } 14 | \description{ 15 | Map variables added as map1 and map2. Plot these to 16 | get traditional SOM plot. Tour over all other variables to 17 | see how well the map fits the original data. 18 | } 19 | \examples{ 20 | \dontrun{ 21 | d.music <- read.csv("http://www.ggobi.org/book/data/music-all.csv") 22 | 23 | music <- rescaler(d.music)[complete.cases(d.music), 1:10] 24 | music.som <- som::som(music[,-(1:3)], 6, 6, neigh="bubble", rlen=1000) 25 | ggobi(music.som) 26 | } 27 | \dontrun{ 28 | d.music <- read.csv("http://www.ggobi.org/book/data/music-all.csv") 29 | 30 | music <- rescaler(d.music)[complete.cases(d.music), 1:10] 31 | music.hex <- kohonen::som(music[,-(1:3)], grid = somgrid(3, 3, "hexagonal"), rlen=1000) 32 | music.rect <- kohonen::som(music[,-(1:3)], grid = somgrid(6, 6, "rectangular"), rlen=1000) 33 | ggobi(music.rect) 34 | } 35 | } 36 | \keyword{cluster} 37 | \keyword{dynamic} 38 | 39 | -------------------------------------------------------------------------------- /man/hierarchical.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{hierarchical} 3 | \alias{hierarchical} 4 | \title{Hierachical clustering 5 | Convenient methods for hierachical clustering} 6 | \usage{ 7 | hierarchical(df, method = "complete", metric = "euclidean", n = 5) 8 | } 9 | \arguments{ 10 | \item{df}{data frame} 11 | 12 | \item{method}{method to use, see \code{\link{hclust}}} 13 | 14 | \item{metric}{distance metric to use, see \code{\link{dist}}} 15 | 16 | \item{n}{number of clusters to retrieve, see \code{\link{cut}}} 17 | } 18 | \description{ 19 | Hierachical clustering 20 | Convenient methods for hierachical clustering 21 | } 22 | \keyword{cluster} 23 | 24 | -------------------------------------------------------------------------------- /man/hierfly.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{hierfly} 3 | \alias{hierfly} 4 | \title{Visualisig hierarchical clustering. 5 | This method supplements a data set with information needed to draw a 6 | dendrogram} 7 | \usage{ 8 | hierfly(data, metric = "euclidean", method = "average") 9 | } 10 | \arguments{ 11 | \item{data}{data set} 12 | 13 | \item{metric}{distance metric to use, see \code{\link{dist}} for list of 14 | possibilities} 15 | 16 | \item{method}{cluster distance measure to use, see \code{\link{hclust}} for 17 | details} 18 | } 19 | \value{ 20 | object of type, hierfly 21 | } 22 | \description{ 23 | Intermediate cluster nodes are added as needed, and positioned at the 24 | centroid of the combined clusters. 25 | } 26 | \examples{ 27 | h <- hierfly(iris) 28 | ggobi(h) 29 | h <- hierfly(iris, method="single") 30 | } 31 | \seealso{ 32 | \code{\link{cut.hierfly}}, \code{\link{ggobi.hierfly}} 33 | } 34 | \keyword{cluster} 35 | 36 | -------------------------------------------------------------------------------- /man/mefly.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{mefly} 3 | \alias{mefly} 4 | \title{Display model based clustering with mvn ellipses. 5 | Displays the results of model based clustering with an ellipse drawn from 6 | the multivariate normal model for each group.} 7 | \usage{ 8 | mefly(model, data) 9 | } 10 | \arguments{ 11 | \item{model}{output from me function} 12 | 13 | \item{data}{input data frame to me} 14 | } 15 | \description{ 16 | Display model based clustering with mvn ellipses. 17 | Displays the results of model based clustering with an ellipse drawn from 18 | the multivariate normal model for each group. 19 | } 20 | \examples{ 21 | if(require("mclust")) { 22 | eei <- me(modelName = "EEI", data = iris[,-5], z = unmap(iris[,5])) 23 | vvv <- me(modelName = "VVV", data = iris[,-5], z = unmap(iris[,5])) 24 | vvi <- me(modelName = "VVI", data = iris[,-5], z = unmap(iris[,5])) 25 | mefly(eei, iris[,-5]) 26 | mefly(vvi, iris[,-5]) 27 | mefly(vvv, iris[,-5]) 28 | } 29 | } 30 | \keyword{cluster} 31 | \keyword{dynamic} 32 | 33 | -------------------------------------------------------------------------------- /man/olive_example.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.0): do not edit by hand 2 | \name{olive_example} 3 | \alias{olive_example} 4 | \title{Example clusterfly object created with olives data} 5 | \usage{ 6 | olive_example() 7 | } 8 | \description{ 9 | Example clusterfly object created with olives data 10 | } 11 | \keyword{dataset} 12 | 13 | --------------------------------------------------------------------------------