├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── R ├── ggbiplot.r └── ggscreeplot.r ├── README-wine-example-1.png ├── README.Rmd ├── README.md ├── data └── wine.rda ├── ggbiplot.Rproj └── man ├── ggbiplot.Rd ├── ggscreeplot.Rd └── wine.Rd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^README\.Rmd$ 4 | ^README-.*\.png$ 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: ggbiplot 2 | Type: Package 3 | Title: A ggplot2 based biplot 4 | Version: 0.55 5 | Date: 2011-10-23 6 | Author: Vincent Q. Vu 7 | Maintainer: Vincent Q. Vu 8 | Description: A ggplot2 based biplot. It provides a drop-in 9 | replacement for biplot.princomp(). It implements a 10 | biplot and scree plot using ggplot2. 11 | Depends: 12 | ggplot2, plyr, scales, grid 13 | License: GPL-2 14 | URL: http://github.com/vqv/ggbiplot 15 | Collate: 16 | 'ggbiplot.r' 17 | 'ggscreeplot.r' 18 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | export(ggbiplot) 2 | export(ggscreeplot) 3 | -------------------------------------------------------------------------------- /R/ggbiplot.r: -------------------------------------------------------------------------------- 1 | # 2 | # ggbiplot.r 3 | # 4 | # Copyright 2011 Vincent Q. Vu. 5 | # 6 | # This program is free software; you can redistribute it and/or 7 | # modify it under the terms of the GNU General Public License 8 | # as published by the Free Software Foundation; either version 2 9 | # of the License, or (at your option) any later version. 10 | # 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program; if not, write to the Free Software 18 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 | # 20 | 21 | #' Biplot for Principal Components using ggplot2 22 | #' 23 | #' @param pcobj an object returned by prcomp() or princomp() 24 | #' @param choices which PCs to plot 25 | #' @param scale covariance biplot (scale = 1), form biplot (scale = 0). When scale = 1, the inner product between the variables approximates the covariance and the distance between the points approximates the Mahalanobis distance. 26 | #' @param obs.scale scale factor to apply to observations 27 | #' @param var.scale scale factor to apply to variables 28 | #' @param pc.biplot for compatibility with biplot.princomp() 29 | #' @param groups optional factor variable indicating the groups that the observations belong to. If provided the points will be colored according to groups 30 | #' @param ellipse draw a normal data ellipse for each group? 31 | #' @param ellipse.prob size of the ellipse in Normal probability 32 | #' @param labels optional vector of labels for the observations 33 | #' @param labels.size size of the text used for the labels 34 | #' @param alpha alpha transparency value for the points (0 = transparent, 1 = opaque) 35 | #' @param circle draw a correlation circle? (only applies when prcomp was called with scale = TRUE and when var.scale = 1) 36 | #' @param var.axes draw arrows for the variables? 37 | #' @param varname.size size of the text for variable names 38 | #' @param varname.adjust adjustment factor the placement of the variable names, >= 1 means farther from the arrow 39 | #' @param varname.abbrev whether or not to abbreviate the variable names 40 | #' 41 | #' @return a ggplot2 plot 42 | #' @export 43 | #' @examples 44 | #' data(wine) 45 | #' wine.pca <- prcomp(wine, scale. = TRUE) 46 | #' print(ggbiplot(wine.pca, obs.scale = 1, var.scale = 1, groups = wine.class, ellipse = TRUE, circle = TRUE)) 47 | #' 48 | ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, 49 | obs.scale = 1 - scale, var.scale = scale, 50 | groups = NULL, ellipse = FALSE, ellipse.prob = 0.68, 51 | labels = NULL, labels.size = 3, alpha = 1, 52 | var.axes = TRUE, 53 | circle = FALSE, circle.prob = 0.69, 54 | varname.size = 3, varname.adjust = 1.5, 55 | varname.abbrev = FALSE, ...) 56 | { 57 | library(ggplot2) 58 | library(plyr) 59 | library(scales) 60 | library(grid) 61 | 62 | stopifnot(length(choices) == 2) 63 | 64 | # Recover the SVD 65 | if(inherits(pcobj, 'prcomp')){ 66 | nobs.factor <- sqrt(nrow(pcobj$x) - 1) 67 | d <- pcobj$sdev 68 | u <- sweep(pcobj$x, 2, 1 / (d * nobs.factor), FUN = '*') 69 | v <- pcobj$rotation 70 | } else if(inherits(pcobj, 'princomp')) { 71 | nobs.factor <- sqrt(pcobj$n.obs) 72 | d <- pcobj$sdev 73 | u <- sweep(pcobj$scores, 2, 1 / (d * nobs.factor), FUN = '*') 74 | v <- pcobj$loadings 75 | } else if(inherits(pcobj, 'PCA')) { 76 | nobs.factor <- sqrt(nrow(pcobj$call$X)) 77 | d <- unlist(sqrt(pcobj$eig)[1]) 78 | u <- sweep(pcobj$ind$coord, 2, 1 / (d * nobs.factor), FUN = '*') 79 | v <- sweep(pcobj$var$coord,2,sqrt(pcobj$eig[1:ncol(pcobj$var$coord),1]),FUN="/") 80 | } else if(inherits(pcobj, "lda")) { 81 | nobs.factor <- sqrt(pcobj$N) 82 | d <- pcobj$svd 83 | u <- predict(pcobj)$x/nobs.factor 84 | v <- pcobj$scaling 85 | d.total <- sum(d^2) 86 | } else { 87 | stop('Expected a object of class prcomp, princomp, PCA, or lda') 88 | } 89 | 90 | # Scores 91 | choices <- pmin(choices, ncol(u)) 92 | df.u <- as.data.frame(sweep(u[,choices], 2, d[choices]^obs.scale, FUN='*')) 93 | 94 | # Directions 95 | v <- sweep(v, 2, d^var.scale, FUN='*') 96 | df.v <- as.data.frame(v[, choices]) 97 | 98 | names(df.u) <- c('xvar', 'yvar') 99 | names(df.v) <- names(df.u) 100 | 101 | if(pc.biplot) { 102 | df.u <- df.u * nobs.factor 103 | } 104 | 105 | # Scale the radius of the correlation circle so that it corresponds to 106 | # a data ellipse for the standardized PC scores 107 | r <- sqrt(qchisq(circle.prob, df = 2)) * prod(colMeans(df.u^2))^(1/4) 108 | 109 | # Scale directions 110 | v.scale <- rowSums(v^2) 111 | df.v <- r * df.v / sqrt(max(v.scale)) 112 | 113 | # Change the labels for the axes 114 | if(obs.scale == 0) { 115 | u.axis.labs <- paste('standardized PC', choices, sep='') 116 | } else { 117 | u.axis.labs <- paste('PC', choices, sep='') 118 | } 119 | 120 | # Append the proportion of explained variance to the axis labels 121 | u.axis.labs <- paste(u.axis.labs, 122 | sprintf('(%0.1f%% explained var.)', 123 | 100 * pcobj$sdev[choices]^2/sum(pcobj$sdev^2))) 124 | 125 | # Score Labels 126 | if(!is.null(labels)) { 127 | df.u$labels <- labels 128 | } 129 | 130 | # Grouping variable 131 | if(!is.null(groups)) { 132 | df.u$groups <- groups 133 | } 134 | 135 | # Variable Names 136 | if(varname.abbrev) { 137 | df.v$varname <- abbreviate(rownames(v)) 138 | } else { 139 | df.v$varname <- rownames(v) 140 | } 141 | 142 | # Variables for text label placement 143 | df.v$angle <- with(df.v, (180/pi) * atan(yvar / xvar)) 144 | df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar)) / 2) 145 | 146 | # Base plot 147 | g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) + 148 | xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal() 149 | 150 | if(var.axes) { 151 | # Draw circle 152 | if(circle) 153 | { 154 | theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50)) 155 | circle <- data.frame(xvar = r * cos(theta), yvar = r * sin(theta)) 156 | g <- g + geom_path(data = circle, color = muted('white'), 157 | size = 1/2, alpha = 1/3) 158 | } 159 | 160 | # Draw directions 161 | g <- g + 162 | geom_segment(data = df.v, 163 | aes(x = 0, y = 0, xend = xvar, yend = yvar), 164 | arrow = arrow(length = unit(1/2, 'picas')), 165 | color = muted('red')) 166 | } 167 | 168 | # Draw either labels or points 169 | if(!is.null(df.u$labels)) { 170 | if(!is.null(df.u$groups)) { 171 | g <- g + geom_text(aes(label = labels, color = groups), 172 | size = labels.size) 173 | } else { 174 | g <- g + geom_text(aes(label = labels), size = labels.size) 175 | } 176 | } else { 177 | if(!is.null(df.u$groups)) { 178 | g <- g + geom_point(aes(color = groups), alpha = alpha) 179 | } else { 180 | g <- g + geom_point(alpha = alpha) 181 | } 182 | } 183 | 184 | # Overlay a concentration ellipse if there are groups 185 | if(!is.null(df.u$groups) && ellipse) { 186 | theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50)) 187 | circle <- cbind(cos(theta), sin(theta)) 188 | 189 | ell <- ddply(df.u, 'groups', function(x) { 190 | if(nrow(x) <= 2) { 191 | return(NULL) 192 | } 193 | sigma <- var(cbind(x$xvar, x$yvar)) 194 | mu <- c(mean(x$xvar), mean(x$yvar)) 195 | ed <- sqrt(qchisq(ellipse.prob, df = 2)) 196 | data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'), 197 | groups = x$groups[1]) 198 | }) 199 | names(ell)[1:2] <- c('xvar', 'yvar') 200 | g <- g + geom_path(data = ell, aes(color = groups, group = groups)) 201 | } 202 | 203 | # Label the variable axes 204 | if(var.axes) { 205 | g <- g + 206 | geom_text(data = df.v, 207 | aes(label = varname, x = xvar, y = yvar, 208 | angle = angle, hjust = hjust), 209 | color = 'darkred', size = varname.size) 210 | } 211 | # Change the name of the legend for groups 212 | # if(!is.null(groups)) { 213 | # g <- g + scale_color_brewer(name = deparse(substitute(groups)), 214 | # palette = 'Dark2') 215 | # } 216 | 217 | # TODO: Add a second set of axes 218 | 219 | return(g) 220 | } 221 | -------------------------------------------------------------------------------- /R/ggscreeplot.r: -------------------------------------------------------------------------------- 1 | # 2 | # ggscreeplot.r 3 | # 4 | # Copyright 2011 Vincent Q. Vu. 5 | # 6 | # This program is free software; you can redistribute it and/or 7 | # modify it under the terms of the GNU General Public License 8 | # as published by the Free Software Foundation; either version 2 9 | # of the License, or (at your option) any later version. 10 | # 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program; if not, write to the Free Software 18 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 | # 20 | 21 | #' Screeplot for Principal Components 22 | #' 23 | #' @param pcobj an object returned by prcomp() or princomp() 24 | #' @param type the type of scree plot. 'pev' corresponds proportion of explained variance, i.e. the eigenvalues divided by the trace. 'cev' corresponds to the cumulative proportion of explained variance, i.e. the partial sum of the first k eigenvalues divided by the trace. 25 | #' @export 26 | #' @examples 27 | #' data(wine) 28 | #' wine.pca <- prcomp(wine, scale. = TRUE) 29 | #' print(ggscreeplot(wine.pca)) 30 | #' 31 | ggscreeplot <- function(pcobj, type = c('pev', 'cev')) 32 | { 33 | type <- match.arg(type) 34 | d <- pcobj$sdev^2 35 | yvar <- switch(type, 36 | pev = d / sum(d), 37 | cev = cumsum(d) / sum(d)) 38 | 39 | yvar.lab <- switch(type, 40 | pev = 'proportion of explained variance', 41 | cev = 'cumulative proportion of explained variance') 42 | 43 | df <- data.frame(PC = 1:length(d), yvar = yvar) 44 | 45 | ggplot(data = df, aes(x = PC, y = yvar)) + 46 | xlab('principal component number') + ylab(yvar.lab) + 47 | geom_point() + geom_path() 48 | } 49 | -------------------------------------------------------------------------------- /README-wine-example-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vqv/ggbiplot/f7ea76da44ee68cc0ab68c35202e1dab06aaa027/README-wine-example-1.png -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | **NEWS**: Active development of ggbiplot has moved to [github.com/friendly/ggbiplot](https://github.com/friendly/ggbiplot). 8 | 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | **NEWS**: Active development of ggbiplot has moved to 5 | [github.com/friendly/ggbiplot](https://github.com/friendly/ggbiplot). 6 | -------------------------------------------------------------------------------- /data/wine.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vqv/ggbiplot/f7ea76da44ee68cc0ab68c35202e1dab06aaa027/data/wine.rda -------------------------------------------------------------------------------- /ggbiplot.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: XeLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | -------------------------------------------------------------------------------- /man/ggbiplot.Rd: -------------------------------------------------------------------------------- 1 | \name{ggbiplot} 2 | \alias{ggbiplot} 3 | \title{Biplot for Principal Components using ggplot2} 4 | \usage{ 5 | ggbiplot(pcobj, choices = 1:2, scale = 1, pc.biplot = 6 | TRUE, obs.scale = 1 - scale, var.scale = scale, groups = 7 | NULL, ellipse = FALSE, ellipse.prob = 0.68, labels = 8 | NULL, labels.size = 3, alpha = 1, var.axes = TRUE, circle 9 | = FALSE, circle.prob = 0.69, varname.size = 3, 10 | varname.adjust = 1.5, varname.abbrev = FALSE, ...) 11 | } 12 | \arguments{ 13 | \item{pcobj}{an object returned by prcomp() or 14 | princomp()} 15 | 16 | \item{choices}{which PCs to plot} 17 | 18 | \item{scale}{covariance biplot (scale = 1), form biplot 19 | (scale = 0). When scale = 1, the inner product between 20 | the variables approximates the covariance and the 21 | distance between the points approximates the Mahalanobis 22 | distance.} 23 | 24 | \item{obs.scale}{scale factor to apply to observations} 25 | 26 | \item{var.scale}{scale factor to apply to variables} 27 | 28 | \item{pc.biplot}{for compatibility with 29 | biplot.princomp()} 30 | 31 | \item{groups}{optional factor variable indicating the 32 | groups that the observations belong to. If provided the 33 | points will be colored according to groups} 34 | 35 | \item{ellipse}{draw a normal data ellipse for each 36 | group?} 37 | 38 | \item{ellipse.prob}{size of the ellipse in Normal 39 | probability} 40 | 41 | \item{labels}{optional vector of labels for the 42 | observations} 43 | 44 | \item{labels.size}{size of the text used for the labels} 45 | 46 | \item{alpha}{alpha transparency value for the points (0 = 47 | TRUEransparent, 1 = opaque)} 48 | 49 | \item{circle}{draw a correlation circle? (only applies 50 | when prcomp was called with scale = TRUE and when 51 | var.scale = 1)} 52 | 53 | \item{var.axes}{draw arrows for the variables?} 54 | 55 | \item{varname.size}{size of the text for variable names} 56 | 57 | \item{varname.adjust}{adjustment factor the placement of 58 | the variable names, >= 1 means farther from the arrow} 59 | 60 | \item{varname.abbrev}{whether or not to abbreviate the 61 | variable names} 62 | } 63 | \value{ 64 | a ggplot2 plot 65 | } 66 | \description{ 67 | Biplot for Principal Components using ggplot2 68 | } 69 | \examples{ 70 | data(wine) 71 | wine.pca <- prcomp(wine, scale. = TRUE) 72 | print(ggbiplot(wine.pca, obs.scale = 1, var.scale = 1, groups = wine.class, ellipse = TRUE, circle = TRUE)) 73 | } 74 | 75 | -------------------------------------------------------------------------------- /man/ggscreeplot.Rd: -------------------------------------------------------------------------------- 1 | \name{ggscreeplot} 2 | \alias{ggscreeplot} 3 | \title{Screeplot for Principal Components} 4 | \usage{ 5 | ggscreeplot(pcobj, type = c("pev", "cev")) 6 | } 7 | \arguments{ 8 | \item{pcobj}{an object returned by prcomp() or 9 | princomp()} 10 | 11 | \item{type}{the type of scree plot. 'pev' corresponds 12 | proportion of explained variance, i.e. the eigenvalues 13 | divided by the trace. 'cev' corresponds to the cumulative 14 | proportion of explained variance, i.e. the partial sum of 15 | the first k eigenvalues divided by the trace.} 16 | } 17 | \description{ 18 | Screeplot for Principal Components 19 | } 20 | \examples{ 21 | data(wine) 22 | wine.pca <- prcomp(wine, scale. = TRUE) 23 | print(ggscreeplot(wine.pca)) 24 | } 25 | 26 | -------------------------------------------------------------------------------- /man/wine.Rd: -------------------------------------------------------------------------------- 1 | \name{wine} 2 | \alias{wine} 3 | \docType{data} 4 | \title{ 5 | Chemical composition of three cultivars of wine 6 | } 7 | \description{ 8 | Chemical constituents of wines from three different cultivars grown in the same region in Italy. The cultivars, 9 | 'barolo', 'barbera', and 'grignolino', are indicated in wine.class. 10 | } 11 | \usage{data(wine)} 12 | \format{ 13 | The format is: 14 | chr "wine" 15 | } 16 | \source{ 17 | http://archive.ics.uci.edu/ml/datasets/Wine 18 | } 19 | \examples{ 20 | data(wine) 21 | wine.pca <- prcomp(wine, scale. = TRUE) 22 | print(ggscreeplot(wine.pca)) 23 | print(ggbiplot(wine.pca, obs.scale = 1, var.scale = 1, groups = wine.class, ellipse = TRUE, circle = TRUE)) 24 | } 25 | \keyword{datasets} 26 | --------------------------------------------------------------------------------