├── DESCRIPTION
├── NAMESPACE
├── R
    ├── PLSDA.R
    ├── SPLSDA.R
    ├── ggbiplot.R
    ├── hello.R
    ├── normalization.R
    ├── okernelPLS.fit.R
    ├── plsmda.R
    ├── splitVar.R
    ├── splsmda.R
    └── stability.R
├── README.md
├── codexample_clrpca1.pdf
├── data
    ├── ArcticLake.rda
    └── Hydrochem.rda
├── man
    ├── alr.Rd
    ├── clr.Rd
    ├── compPLS-package.Rd
    ├── ggbiplot.Rd
    ├── hello.Rd
    ├── plsDA.Rd
    ├── plsDA_main.Rd
    ├── spls.stars.Rd
    └── splsDA.Rd
└── tests
    ├── testthat.R
    └── testthat
        ├── test_norms_and_transforms.R
        └── test_plsda.R


/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: compPLS
 2 | Type: Package
 3 | Title: Conduct Partial Least Squares regression for compositional data
 4 | Version: 1.1
 5 | Date: 2021-04-18
 6 | Author: Zachary Kurtz
 7 | Maintainer: Zachary Kurtz <zdkurtz@gmail.com>
 8 | Description: Partial Least Squares method on log-ratio transformed data.
 9 |     Methods for PLS, sparse PLS discriminant analysis and biplot plotting for
10 |     high-dimensional compositions. Particularly microbial
11 |     communities/microbiota OTU tables, as generated by 16S amplicon sequencing.
12 | License: GPL (>= 2)
13 | Depends:
14 |    R (>= 4.0.0),
15 | Imports:
16 |     caret,
17 |     ggplot2,
18 |     MASS,
19 |     pls,
20 |     klaR,
21 |     boot,
22 |     grid,
23 |     scales,
24 |     kernlab
25 | Suggests:
26 |     testthat
27 | RoxygenNote: 7.1.2
28 | Encoding: UTF-8
29 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(clr,data.frame)
 4 | S3method(clr,default)
 5 | S3method(clr,matrix)
 6 | S3method(ggbiplot,default)
 7 | S3method(ggbiplot,lda)
 8 | S3method(ggbiplot,plsda)
 9 | S3method(ggbiplot,prcomp)
10 | S3method(ggbiplot,princomp)
11 | S3method(ggbiplot,splsda)
12 | export(alr)
13 | export(alr.data.frame)
14 | export(alr.default)
15 | export(clr)
16 | export(ggbiplot)
17 | export(ggbiplot.matrix)
18 | export(norm_pseudo)
19 | export(norm_to_total)
20 | export(plsDA)
21 | export(plsDA_main)
22 | export(spls.stars)
23 | export(splsDA)
24 | export(splsDA_main)
25 | importFrom(ggplot2,aes)
26 | importFrom(ggplot2,geom_path)
27 | importFrom(ggplot2,geom_point)
28 | importFrom(ggplot2,geom_segment)
29 | importFrom(ggplot2,geom_text)
30 | importFrom(ggplot2,ggplot)
31 | importFrom(ggplot2,scale_x_continuous)
32 | importFrom(ggplot2,scale_y_continuous)
33 | 


--------------------------------------------------------------------------------
/R/PLSDA.R:
--------------------------------------------------------------------------------
  1 | ################################
  2 | # @author Zachary Kurtz
  3 | # @date 8/14/2013
  4 | ## Methods for Partial Least Squares regression for compositional data
  5 | 
  6 | 
  7 | 
  8 | 
  9 | #' Partial Least Squares Discriminant Analysis
 10 | #' PLS regression to discriminate classes (via a logistic model)
 11 | #' basically this is a wrapper for the \code{plsda} function in the caret package,
 12 | #' but with default setup for dealing with uneven classes (via the priors option, see details)
 13 | #' see caret::plsda for implementation details
 14 | #' 
 15 | #' run this code if you don't need to fit paramaters by cross-validation
 16 | #'
 17 | #' @title plsDA partial least squares discriminant analysis
 18 | #' @param x data with samples in rows, features are columns (not necessarily compositional data)
 19 | #' @param grouping a numeric vector or factor with sample classes (length should equal \code{nrow(x)})
 20 | #' @param usePriors use priors for very biased sample size between groups (ie - put strong penalty on misclassifying small groups)
 21 | #' @param K number of components in the PLS model (default: number of classes - 1)
 22 | #' @return a plsda fitted model
 23 | #' @seealso \code{\link{plsDA_main}}, \code{\link{caret::plsda}}
 24 | #' @rdname plsDA
 25 | #' @export
 26 | plsDA <- function(x, grouping, K, usePriors=FALSE, plsfun=caret::plsda, ...) {
 27 | # wrapper function for plsda
 28 | 
 29 |     if (length(grouping) != nrow(x)) 
 30 |         stop('length of grouping vector does equal the number of samples')
 31 | 
 32 |     args <- list(...)
 33 |     if (!('probMethod' %in% names(args))) args <- c(args, list(probMethod='Bayes'))
 34 |     if (usePriors) {
 35 |         # set priors to 1-(class freq) 
 36 |         prior <- 1-(table(grouping)/length(grouping))
 37 |         args <- c(args, list(prior=as.vector(prior)))
 38 |     }
 39 | 
 40 |     y <- as.factor(grouping)
 41 | 
 42 |     if (missing(K)) {
 43 |         K <- length(levels(y))-1
 44 |     }
 45 |     args  <- c(args, ncomp=K)
 46 |     
 47 |     do.call(plsfun, c(list(x, y), args))
 48 | }
 49 | 
 50 | 
 51 | #' The main wrapper for full Partial Least Squares discriminant analysis,
 52 | #' performing cross-validation to tune model parameters (here, number of components)
 53 | #' and do permutation tests (ie bootstrapping) to get pseudo-pvals estimates for model coefficients
 54 | #'
 55 | #' @title plsDA_main partial least squares discriminant analysis
 56 | #' @param x data with samples in rows, features are columns (not necessarily compositional data)
 57 | #' @param grouping a numeric vector or factor with sample classes (length should equal \code{nrow(x)})
 58 | #' @param usePriors use priors for very biased sample size between groups (ie - put strong penalty on misclassifying small groups)
 59 | #' @param K numeric vector containing number of components in the PLS model
 60 | #' @param  fold number of partitions to randomly subsample for cross-validation
 61 | #' @param nboots number of bootstraps/permutations for estimating coefficient p-vals
 62 | #' @param n.core number of cores for paralellization of bootstraps
 63 | #' @param noise for very sparse components, some subsamples may have zero variance. Optionally, add some Gaussian noise to to avoid PLS errors
 64 | #' @param ... additional arguments passed to plsDA
 65 | #' 
 66 | #' @return a \code{plsDA} object that contains: the plsda model/object, \code{pvals}, the original data, \code{x}, and \code{groupings}
 67 | #' @seealso \code{\link{plsDA}}
 68 | #' @rdname plsDA_main
 69 | #' @export
 70 | plsDA_main  <- function(x, grouping, K, usePriors=FALSE, fold=5, nboots=999, n.core=4, noise=0, ...) {
 71 | 
 72 |     if (length(K) > 1) 
 73 |         K   <- cv.plsDA.fit(x, grouping, K=K, noise=noise, fold=fold, n.core=n.core, usePriors=usePriors)$K
 74 |     .bstat <- function(data, indices, ...)   plsDA(data[indices,,drop=FALSE], ...)$coefficients
 75 |     .pstat <- function(data, indices, ...)   plsDA(apply(data[indices,,drop=FALSE], 2, function(x) sample(x)), ...)$coefficients
 76 | 
 77 |     if (nboots > 1) {
 78 |         sboots <- plsdaboot(x, .bstat, .pstat, R=nboots, n.core, K=K, grouping=grouping, ...)
 79 |         pmat   <- pval(sboots)
 80 | #        keepInd <- which(suppressWarnings(apply(pmat, 1, min, na.rm=TRUE)) <= alpha)
 81 |     } else {
 82 |         keepInd <- 1:ncol(x)
 83 |         pmat    <- NULL
 84 |     }
 85 |     plsmod  <- plsDA(x, grouping, usePriors=usePriors, K=K, ...)
 86 | 
 87 | #    if (length(keepInd) > 0) {
 88 | #        splskmod <- plsDA(x[,keepInd], grouping, usePriors=usePriors, K=K, ...)	
 89 | 
 90 | #    } else splskmod <- NULL
 91 |     structure(list(plsda=plsmod, pvals=pmat, # keep=keepInd,
 92 |                   x=data, y=grouping), class="plsDA")
 93 | 
 94 | 
 95 | }
 96 | 
 97 | 
 98 | plsdaboot <- function(data, statisticboot, statisticperm, R, ncpus=1, ...) {
 99 |     res     <- boot::boot(data, statisticboot, R=R, parallel="multicore", ncpus=ncpus, noise=0, ...)
100 |     null_av <- boot::boot(data, statisticperm, sim='permutation', R=R, parallel="multicore", ncpus=ncpus, ...)
101 |     class(res) <- 'list'
102 |     structure(c(res, list(null_av=null_av)), class='plsdaboot')
103 | }
104 | 
105 | 
106 | 
107 | pval.plsdaboot <- function(x, sided='both', mar=2) {
108 | # calculate 1 or 2 way pseudo p-val from boot object
109 | # Args: a boot object
110 |     if (sided != "both") stop("only two-sided currently supported")
111 |     x$t0 <- as.matrix(x$t0)
112 |     nparams  <- ncol(x$t)
113 |     tmeans   <- colMeans(x$null_av$t)
114 | #    check to see whether betas are unstable -- confirm 
115 |     niters   <- nrow(x$t)
116 |     ind95    <- max(1,round(.025*niters)):round(.975*niters)
117 |     boot_ord <- apply(x$t, 2, sort)
118 |     boot_ord95 <- boot_ord[ind95,]
119 |     outofrange <- unlist(lapply(1:length(x$t0), function(i) {
120 |             betas <- x$t0[i]
121 |             range  <- range(boot_ord95[,i])
122 |             range[1] > betas || range[2] < betas
123 |         }))
124 |     # calc whether center of mass is above or below the mean
125 |     bs_above <- unlist(lapply(1:nparams, function(i) 
126 |                     length(which(x$t[, i] > tmeans[i]))))
127 |     is_above <- bs_above > x$R/2
128 |     pvals    <- ifelse(is_above, 2*(1-bs_above/x$R), 2*bs_above/x$R)
129 |     pvals[pvals > 1]  <- 1
130 |     pvals[outofrange] <- NaN
131 |     pmat <- matrix(pvals, ncol=ncol(x$t0))
132 |     rownames(pmat) <- rownames(x$t0)
133 |     colnames(pmat) <- colnames(x$t0)
134 |     pmat
135 | }
136 | 
137 | cv.plsDA.fit <- function(x, grouping, K, noise=0, fold=5, n.core, ...) {
138 | # Find eta & k by cross validation
139 |     x <- x + matrix(rnorm(prod(dim(x)), 0, noise), ncol=ncol(x))
140 |     capture.output(out <- .cv.plsDA(x, grouping, fold=fold, K=K, plot.it=FALSE, n.core=n.core, ...))
141 |     out
142 | }
143 | 
144 | #' @keywords internal
145 | .cv.plsDA <- function (x, y, fold = 10, K, kappa = 0.5, classifier = c("lda", 
146 |     "logistic"), scale.x = TRUE, n.core = 4, plot.it=FALSE, ...) 
147 | {
148 |     result.mat <- c()
149 |     foldi <- .cv.split(y, fold)
150 |     x <- as.matrix(x)
151 |     n <- nrow(x)
152 |     p <- ncol(x)
153 |     ip <- c(1:p)
154 | #    y <- as.matrix(y)
155 |     q <- length(y) #ncol(y)
156 | #    eta.K.pair <- cbind(rep(1, each = length(K)), rep(K, 1))
157 | #    eta.K.list <- split(eta.K.pair, c(1:nrow(eta.K.pair)))
158 |     .fit.plsda <- function(K.val) {
159 |         mspemati <- rep(0, fold)
160 |         Ai <- rep(0, fold)
161 |         for (k in 1:fold) {
162 |             omit <- foldi[[k]]
163 |             train.x <- x[-omit, ]
164 |             train.y <- y[-omit]
165 |             test.x <- x[omit, ]
166 |             test.y <- y[omit]
167 |             plsda.fit <- plsDA(train.x, train.y, K = K.val, ...)
168 |             pred <- predict(plsda.fit,  test.x)
169 |             mspemati[k] <- mean(as.numeric(pred != test.y))
170 |             Ai[k] <- mean(length(plsda.fit$A))
171 |         }
172 |         mspe.ij <- c(mean(mspemati), mean(Ai), 1, K.val)
173 |         return(mspe.ij)
174 |     }
175 |     if (.Platform$OS.type == "unix") {
176 |         result.list <- parallel::mclapply(K, function(k) .fit.plsda(k), mc.cores = n.core)
177 |     }
178 |     else {
179 |         result.list <- lapply(K, function(k) .fit.plsda(k))
180 |     }
181 |     result.mat <- c()
182 |     for (i in 1:length(result.list)) {
183 |         result.mat <- rbind(result.mat, result.list[[i]])
184 |     }
185 |     
186 |     mspemat <- matrix(result.mat[, 1], length(K), 1)
187 |     mspemat <- t(mspemat)
188 |     rownames(mspemat) <- '1'
189 |     colnames(mspemat) <- K
190 |     cands <- result.mat[result.mat[, 1] == min(result.mat[, 1]), 
191 |         , drop = FALSE]
192 | 
193 |     cands <- cands[cands[, 2] == min(cands[, 2]), , drop = FALSE]
194 |     cands <- cands[cands[, 4] == min(cands[, 4]), , drop = FALSE]
195 |     cands <- cands[cands[, 3] == max(cands[, 3]), , drop = FALSE]
196 |     K.opt <- cands[, 4]
197 |     eta.opt <- cands[, 3]
198 | 
199 | #    cat(paste("K = ", K.opt, "\n", sep = ""))
200 | #    if (plot.it) {
201 | 
202 | #        spls::heatmap.spls(t(mspemat), xlab = "K", ylab = "eta", main = "CV MSPE Plot", 
203 | #            coln = 16, as = "n")
204 | #    }
205 | #    rownames(mspemat) <- paste("eta=", eta)
206 |     colnames(mspemat) <- paste("K =", K)
207 |     cv <- list(mspemat = mspemat,  K.opt = K.opt)
208 |     invisible(cv)
209 | }
210 | 
211 | #' @keywords internal
212 | .cv.split <- function (y, fold) {
213 |     n <- length(y)
214 |     group <- table(y)
215 |     x <- c()
216 |     for (i in 1:length(group)) {
217 |         x.group <- c(1:n)[y == names(group)[i]]
218 |         x <- c(x, sample(x.group))
219 |     }
220 |     foldi <- split(x, rep(1:fold, length = n))
221 |     return(foldi)
222 | }
223 | 


--------------------------------------------------------------------------------
/R/SPLSDA.R:
--------------------------------------------------------------------------------
  1 | ########################################
  2 | # SPLS discriminant analysis pipeline
  3 | #
  4 | #
  5 | 
  6 | 
  7 | 
  8 | 
  9 | #' sparse Partial Least Squares Discriminant Analysis
 10 | #' sPLS regression to discriminate classes (via a logistic model)
 11 | #' basically this is a wrapper for the \code{splsda} function in the caret package,
 12 | #' but with default setup for dealing with uneven classes (via the priors option, see details)
 13 | #' see caret::splsda for implementation details
 14 | #' 
 15 | #' run this code if you don't need to fit paramaters by cross-validation
 16 | #'
 17 | #' @title splsDA sparse partial least squares discriminant analysis
 18 | #' @param x x with samples in rows, features are columns (not necessarily compositional x)
 19 | #' @param grouping a numeric vector or factor with sample classes (length should equal \code{nrow(x)})
 20 | #' @param usePriors use priors for very biased sample size between groups (ie - put strong penalty on misclassifying small groups)
 21 | #' @param K number of components in the PLS model (default: number of classes - 1)
 22 | #' @param eta parameter that adjusts sparsity of the PLS model (between 0 and 1)
 23 | #' @return a plsda fitted model
 24 | #' @seealso \code{\link{plsDA_main}}, \code{\link{caret::plsda}}, \code{\link{caret::splsda}}
 25 | #' @rdname splsDA
 26 | #' @export
 27 | splsDA <- function(x, grouping, eta, K, usePriors=FALSE, ...) {
 28 | ## splsda wrapper
 29 |  #   caret::splsda(...)
 30 | 
 31 |     if (length(grouping) != nrow(x)) 
 32 |         stop('length of grouping vector does equal the number of samples')
 33 | 
 34 |     args <- list(...)
 35 |     if (!('probMethod' %in% names(args))) args <- c(args, list(probMethod='Bayes'))
 36 |     if (usePriors) {
 37 |         # set priors to 1-(class freq) 
 38 |         prior <- 1-(table(grouping)/length(grouping))
 39 |         args <- c(args, list(prior=as.vector(prior)))
 40 |     }
 41 | 
 42 |     y <- as.factor(grouping)
 43 | 
 44 |     if (missing(K)) {
 45 |         ncomp <- length(levels(y))-1
 46 |     }
 47 |     args  <- c(args, K=K, eta=eta)
 48 |     do.call(caret::splsda, c(list(x, y), args))
 49 | }
 50 | 
 51 | 
 52 | 
 53 | #' The main wrapper for full sparse Partial Least Squares discriminant analysis,
 54 | #' performing cross-validation to tune model parameters (here, number of components)
 55 | #' and do permutation tests (ie bootstrapping) to get pseudo-pvals estimates for model coefficients
 56 | #'
 57 | #' @title splsDA_main sparse partial least squares discriminant analysis
 58 | #' @param x data with samples in rows, features are columns (not necessarily compositional x)
 59 | #' @param grouping a numeric vector or factor with sample classes (length should equal \code{nrow(x)})
 60 | #' @param usePriors use priors for very biased sample size between groups (ie - put strong penalty on misclassifying small groups)
 61 | #' @param K numeric vector containing number of components in the PLS model
 62 | #' @param  fold number of partitions to randomly subsample for cross-validation
 63 | #' @param nboots number of bootstraps/permutations for estimating coefficient p-vals
 64 | #' @param n.core number of cores for paralellization of bootstraps
 65 | #' @param noise for very sparse components, some subsamples may have zero variance. Optionally, add some Gaussian noise to to avoid PLS errors
 66 | #' @param ... additional arguments passed to plsDA
 67 | #' 
 68 | #' @return a \code{plsDA} object that contains: the plsda model/object, \code{pvals}, the original data, \code{x}, and \code{groupings}
 69 | #' @seealso \code{\link{plsDA}}
 70 | #' @rdname plsDA_main
 71 | #' @export
 72 | splsDA_main <- function(x, grouping, eta, K, usePriors=FALSE, fold=5, nboots=999, n.core=4, noise=0, ...) {
 73 |     # find optimal eta & K by cross validation
 74 |     
 75 |     opt   <- cv.splsDA.fit(x, grouping, eta=eta, K=K, noise=noise, fold=fold, usePriors=usePriors, n.core=n.core)
 76 |     eta   <- opt$eta.opt
 77 |     K     <- opt$K.opt
 78 |     .bstat <- function(x, indices, ...)   splsDA(x[indices,,drop=FALSE], ...)$betahat
 79 |     .pstat <- function(x, indices, ...)   splsDA(apply(x[indices,], 2, function(x) sample(x)), ...)$betahat
 80 |     
 81 |     if (nboots > 1) {
 82 |         sboots <- splsdaboot(x, .bstat, .pstat, R=nboots, n.core, eta=eta, K=K, grouping=grouping, ...)
 83 |         pmat   <- pval(sboots)
 84 | #        keepInd <- which(suppressWarnings(apply(pmat, 1, min, na.rm=TRUE)) <= alpha)
 85 |     } else {
 86 |         keepInd <- 1:ncol(x)
 87 |         pmat    <- NULL
 88 |     }
 89 |     splsmod  <- splsDA(x, grouping, usePriors=usePriors, eta=eta, K=K, ...)
 90 | 
 91 | #    if (length(keepInd) > 0) {
 92 | #        splskmod <- splsDA(x[,keepInd], grouping, usePriors=usePriors, eta=.99, K=K, ...)
 93 | 
 94 | #    } else splskmod <- NULL
 95 |     structure(list(splsda=splsmod, pvals=pmat,
 96 |                 eta=eta, x=x, y=grouping), class="splsDA")
 97 | }
 98 | 
 99 | 
100 | 
101 | predict.splsDA <- function(train.mod, test) {
102 |     fullpred <- caret::predict.splsda(train.mod$splsda, test)
103 |     
104 |     if (!is.null(train.mod$splsdakeep)) {
105 |         keeppred <- caret::predict.splsda(train.mod$splsdakeep, test[,train.mod$keep])
106 |     } else {
107 |         keeppred <- NULL
108 |     }
109 |     list(Full=fullpred, Keep=keeppred)
110 | }
111 | 
112 | splsdaboot <- function(x, statisticboot, statisticperm, R, ncpus=1, ...) {
113 |     res     <- boot::boot(x, statisticboot, R=R, parallel="multicore", ncpus=ncpus, ...)
114 |     null_av <- boot::boot(x, statisticperm, sim='permutation', R=R, parallel="multicore", ncpus=ncpus, ...)
115 |     class(res) <- 'list'
116 |     structure(c(res, list(null_av=null_av)), class='splsdaboot')
117 | }
118 | 
119 | pval <- function(x, ...) {
120 |     UseMethod("pval")
121 | }
122 | 
123 | pval.splsdaboot <- function(x, sided='both', mar=2) {
124 | # calculate 1 or 2 way pseudo p-val from boot object
125 | # Args: a boot object
126 |     if (sided != "both") stop("only two-sided currently supported")
127 |     nparams  <- ncol(x$t)
128 |     tmeans   <- colMeans(x$null_av$t)
129 | #    check to see whether betas are unstable -- confirm 
130 |     niters   <- nrow(x$t)
131 |     ind95    <- max(1,round(.025*niters)):round(.975*niters)
132 |     boot_ord <- apply(x$t, 2, sort)
133 |     boot_ord95 <- boot_ord[ind95,]
134 |     outofrange <- unlist(lapply(1:length(x$t0), function(i) {
135 |             betas <- x$t0[i]
136 |             range  <- range(boot_ord95[,i])
137 |             range[1] > betas || range[2] < betas
138 |         }))
139 |     # calc whether center of mass is above or below the mean
140 |     bs_above <- unlist(lapply(1:nparams, function(i) 
141 |                     length(which(x$t[, i] > tmeans[i]))))
142 |     is_above <- bs_above > x$R/2
143 |     pvals    <- ifelse(is_above, 2*(1-bs_above/x$R), 2*bs_above/x$R)
144 |     pvals[pvals > 1]  <- 1
145 |     pvals[outofrange] <- NaN
146 |     pmat <- matrix(pvals, ncol=ncol(x$t0))
147 |     rownames(pmat) <- rownames(x$t0)
148 |     colnames(pmat) <- colnames(x$t0)
149 |     pmat
150 | }
151 | 
152 | 
153 | cv.splsDA.fit <- function(x, grouping, eta, K, noise=0, fold=5, usePriors=FALSE, n.core) {
154 | # Find eta & k by cross validation
155 | ## wrapper for spls::cv.spls, need to add some random noise so variance isn't zero
156 |     x <- x + matrix(rnorm(prod(dim(x)), 0, noise), ncol=ncol(x))
157 |     capture.output(out <- spls::cv.splsda(x, grouping, eta=eta, K=K, fold=fold, plot.it=FALSE, n.core=n.core))
158 |     out
159 | }
160 | 
161 | 
162 | 
163 | 
164 | 
165 | #cross_validation.splsDA <- function(x, grouping, k=5, rep=1, allgroups=FALSE, ...) {
166 | # # Perform n-fold cross validation of LDA decision rule by leaving out samples and returning prediction
167 | # #Args:
168 | # #  method -> a method string that takes x and grouping (or X & y) and has a predict method for resulting object
169 | # #  x -> N by p x matrix
170 | # #  k    -> divide x into n even fractions for cross validation
171 | # #  rep  -> repeat randomized x-validation
172 | # # allgroups -> all groups should be represented at least twice in the subsample (could mean doubling up)
173 | # #  ...  -> additional arguments to lda_r
174 | # # Returns:
175 | # #  matrix of n X rep with prediction error
176 | 
177 | #    nrec <- function(N, k) {
178 | #        r <- N %% k
179 | #        b <- floor(N/k)
180 | #        c(rep(b+1, r), rep(b, k-r))
181 | #    }
182 | 
183 | #    ind       <- 1:nrow(x)
184 | #    err       <- vector('list', length=rep)
185 | #    errKeep   <- vector('list', length=rep)
186 | #    subsizes  <- nrec(length(ind), k)
187 | #    groupfact <- as.factor(grouping)
188 | #    for (l in 1:rep) {
189 | #        rind     <- sample(ind)
190 | #        mmFull <- vector('list', length=length(subsizes))
191 | #        mmKeep <- vector('list', length=length(subsizes))
192 | #        
193 | #        if (allgroups) {
194 | #            memlist <- split(1:length(grouping), grouping[rind])
195 | #            rindmat <- suppressWarnings(do.call('rbind', memlist))
196 | #        }
197 | #        
198 | #        for (i in 1:length(subsizes)) {
199 | #            rind.i    <- ((subsizes[i]*i)-(subsizes[i]-1)):(subsizes[i]*i)
200 | #            
201 | #            if (allgroups) {
202 | #                rind.i    <- rindmat[rind.i]
203 | #            }
204 | #            
205 | #            test.ind  <- rind[rind.i]
206 | #            test      <- x[test.ind,]
207 | #            train     <- x[-test.ind,]
208 | 
209 | #            args <- list(...)
210 | 
211 | #            vars <- apply(train, 2, var)
212 | #            zind <- which(vars <= 1e-3)
213 | #            vind  <- setdiff(1:ncol(train), zind)
214 | 
215 | #            train.mod <- do.call(splsDA_main, c(list(train[,vind], grouping[-test.ind]), args))
216 | 
217 | #            pred  <- predict.splsDA(train.mod, test[,vind])
218 | #            if ('class' %in% names(pred)) pred <- pred$class  # eg if method is lda_r
219 | #            mmFull[[i]]   <- table(factor(pred$Full, levels=levels(groupfact)), 
220 | #                                    factor(grouping[test.ind], levels=levels(groupfact)))
221 | 
222 | #            if (!is.null(pred$Keep)) {
223 | #                mmKeep[[i]]   <- table(factor(pred$Keep, levels=levels(groupfact)),
224 | #                                    factor(grouping[test.ind], levels=levels(groupfact)))
225 | #            }
226 | #        }
227 | #        mfind <- which(unlist(lapply(lapply(mmFull, dim), 
228 | #                    function(x) all(x == rep(length(unique(grouping)), 2) ))))
229 | #        mmFull  <- mmFull[mfind]
230 | #        err[[l]] <- Reduce('+', mmFull) / length(mmFull)
231 | #        err[[l]] <- err[[l]] / rowSums(err[[l]])
232 | #        if (!all(unlist(lapply(mmKeep, is.null)))) {
233 | #            mfind <- which(unlist(lapply(lapply(mmKeep, dim), 
234 | #                            function(x) all(x == rep(length(unique(grouping)), 2) ))))
235 | #            mmKeep  <- mmKeep[mfind]
236 | #            errKeep[[l]] <- Reduce('+', mmKeep) / length(mmKeep)
237 | #            errKeep[[l]] <- errKeep[[l]] / rowSums(errKeep[[l]])
238 | #        } else errKeep <- NULL
239 | #    }
240 | #    structure(list(errFull = err, errKeep = errKeep), class='cv')
241 | #}
242 | 
243 | 
244 | #spls.dist <- function(x, ...) {
245 | ##    means <- irispls$meanx
246 | ##    X <- scale(xobj$x, center = means, scale = FALSE)
247 | #    scores <- as.matrix(x$x[,x$A]) %*% as.matrix(x$projection)
248 | #    dist(scores)
249 | #}
250 | 
251 | 
252 | #spls.sil <- function(dist, grouping, ...) {
253 | #    x <- list()
254 | #    x$clustering <- as.integer(grouping)
255 | #    cluster:::silhouette.default(x, dist=dist)
256 | #}
257 | 
258 | 
259 | spls2pls <- function(x) {
260 |     pls(x$x[,x$A])
261 | }
262 | 
263 | 
264 | 
265 | 
266 | 


--------------------------------------------------------------------------------
/R/ggbiplot.R:
--------------------------------------------------------------------------------
  1 | ################################################################
  2 | #  Methods for making pretty biplots with ggplot2 package
  3 | #
  4 | #  based on code by http://www.vince.vu/software/#ggbiplot
  5 | 
  6 | 
  7 | #' Pretty biplots using ggplots
  8 | #' @title methods for making biplots from various projection & classification models
  9 | #' @param xobj The object to be plotted
 10 | #' @rdname ggbiplot
 11 | #' @export
 12 | ggbiplot <- function(xobj, ...) {
 13 |     UseMethod("ggbiplot")
 14 | }
 15 | 
 16 | #' @rdname ggbiplot
 17 | #' @method ggbiplot princomp
 18 | #' @export
 19 | ggbiplot.princomp <- function(xobj, ...) {
 20 |     nobs.factor <- sqrt(xobj$n.obs)
 21 |     d <- xobj$sdev
 22 |     scores <- sweep(xobj$scores, 2, 6/(d * nobs.factor), FUN = '*')
 23 |     ggbiplot.default(list(scores=scores, loadings=xobj$loadings), ...)
 24 | }
 25 | 
 26 | 
 27 | #' @rdname ggbiplot
 28 | #' @method ggbiplot prcomp
 29 | #' @export
 30 | ggbiplot.prcomp <- function(xobj, ...) {
 31 |     nobs.factor <- sqrt(nrow(xobj$x) - 1)
 32 |     d <- xobj$sdev
 33 |     scores   <- sweep(xobj$x, 2, 1/(d * nobs.factor), FUN = '*')
 34 |     loadings <- xobj$rotation
 35 |     ggbiplot.default(list(scores=scores, loadings=loadings), ...)
 36 | }
 37 | 
 38 | 
 39 | #' @rdname ggbiplot
 40 | #' @method ggbiplot lda
 41 | #' @export
 42 | ggbiplot.lda <- function(xobj, ...) {
 43 |     xname <- xobj$call$x
 44 |     gname <- xobj$call[[3L]]
 45 |     X <- eval.parent(xname)
 46 |     g <- eval.parent(gname)
 47 |     means <- colMeans(xobj$means)
 48 |     X <- scale(X, center = means, scale = FALSE)
 49 |     x <- as.matrix(X) %*% xobj$scaling
 50 |     nobs.factor <- sqrt(nrow(x))
 51 |     d <- apply(x, 2, sd)
 52 |     x <- sweep(x, 2, 25/(d * nobs.factor), FUN = '*')
 53 |     ggbiplot.default(list(scores=x, loadings=xobj$scaling), ...)
 54 | }
 55 | 
 56 | 
 57 | #' @rdname ggbiplot
 58 | #' @method ggbiplot plsda
 59 | #' @export
 60 | ggbiplot.plsda <- function(xobj, Yplot=FALSE, ...) {
 61 |     if (Yplot) {
 62 |         scores <- xobj$Yscores
 63 |         loadings <- xobj$Yloadings
 64 |     } else {
 65 |         scores <- xobj$scores
 66 |         loadings <- xobj$loadings
 67 |     }
 68 |     nobs.factor <- sqrt(nrow(scores))
 69 |     d     <- apply(scores, 2, sd)
 70 |     means  <- colMeans(scores)
 71 |     scores <- scale(scores, center = means, scale = FALSE)
 72 |     scores <- sweep(scores, 2, 6/(d * nobs.factor), FUN = '*')
 73 |     ggbiplot.default(list(scores=scores, loadings=loadings), ...)
 74 | 
 75 | }
 76 | 
 77 | 
 78 | #' @rdname ggbiplot
 79 | #' @method ggbiplot splsda
 80 | #' @export
 81 | ggbiplot.splsda <- function(xobj, ...) {
 82 | #    means <- irispls$meanx
 83 | #    X <- scale(xobj$x, center = means, scale = FALSE)
 84 |     X <- xobj$x
 85 |     if (is.null(xobj$scores))
 86 |       xobj$scores <- as.matrix(X[,xobj$A]) %*% as.matrix(xobj$projection)
 87 |     xobj$loadings <- xobj$projection
 88 |     ggbiplot.plsda(xobj, ...)
 89 | }
 90 | 
 91 | 
 92 | #' @rdname ggbiplot
 93 | #' @method ggbiplot matrix
 94 | #' @export ggbiplot.matrix
 95 | ggbiplot.matrix <- function(xobj, ...) {
 96 |     scores   <- xobj
 97 |     loadings <- matrix(NA, nrow(xobj), ncol(xobj))
 98 |     ggbiplot.default(list(scores=scores, loadings=loadings), plot.loadings=FALSE,
 99 |                           equalcoord=FALSE, ...)
100 | }
101 | 
102 | 
103 | #' @param grouping an optional grouping vector (ie - for coloring points)
104 | #' @param select index of components to be plotted (must be length 2)
105 | #' @param circle enclose points in a circle
106 | #' @param circle.prob controls circle diameter (scales data std dev) if \code{circle = TRUE}
107 | #' @param plot.loadings should loading vectors be plotted
108 | #' @param label.loadings text of loadings labels, taken from rownames of loadings (depends on class of \code{xobj})
109 | #' @param label.offset absolute offset for loading labels, so labels don't cover loadings vectors
110 | #' @param scale.loadings scale length of loading vectors for plotting purposes
111 | #' @param col.loadings a single value of vector for color of loadings
112 | #' @param alpha controls relative transparency of various plot features
113 | #' @param col color factor for points
114 | #' @param group.ellipse enclose within-group points in an covariance ellipse
115 | #' @param scale.ellipse scale \code{group.ellipse} to 1 standard deviation
116 | #' @param group.cloud connect within-group points to a group mean point with a straight edge
117 | #' @param xlab label for x axis
118 | #' @param ylab label for y axis
119 | #' @param equalcoord equal coordinates, ie should the plot area be square?
120 | #' @param size point size
121 | #' @param size.loadings line width of loading vectors
122 | #'
123 | #' @details additional plotting attributes (eg colors, themes, etc) can be chained on in the usual way for ggplots
124 | #' @examples
125 | #'  # an LDA example with iris data
126 | #' ldamod <- lda(iris[,1:4], grouping=iris[,5])
127 | #' ggbiplot(ldamod, grouping=iris[,5], alpha=.7, group.cloud=TRUE) + theme_bw()
128 | 
129 | #' @rdname ggbiplot
130 | #' @method ggbiplot default
131 | #' @importFrom ggplot2 ggplot geom_segment geom_point geom_text geom_path scale_x_continuous scale_y_continuous aes
132 | #' @export
133 | ggbiplot.default <- function(xobj, grouping, select=1:2, circle = FALSE, circle.prob = 0.69,
134 |                      plot.loadings=TRUE, label.loadings=FALSE, sub.loadings=1:nrow(xobj$loadings),
135 |                      label.offset=0, label.size=4.5, scale.loadings = 1, col.loadings=scales::muted("red"),
136 |                      alpha = 1, col=grouping, shape=NULL, group.ellipse=FALSE, scale.ellipse = 1,
137 |                      group.cloud = FALSE, xlab="", ylab="", equalcoord=TRUE, size=3, size.loadings=1,
138 |                      loadingsOnTop = FALSE) {
139 |     ## get scores and loadings from xobj
140 |     if (length(select) > 2) stop("Error: only 2d plots supported")
141 |     if (length(select) < 2) stop("Error: need at least 2 coordinates/components")
142 |     scores   <- data.frame(xvar=xobj$scores[,select[1]], yvar=xobj$scores[,select[2]])
143 |     loadings <- data.frame(xvar=xobj$loadings[sub.loadings,select[1]], yvar=xobj$loadings[sub.loadings,select[2]])
144 |     # standardize scores (?)
145 |     # Base plot
146 |     g <- ggplot(data = scores, aes(x = xvar, y = yvar))
147 | 
148 |     if (plot.loadings) {
149 |       loadingslayer <- geom_segment(data = loadings*scale.loadings,
150 |                         aes(x = 0, y = 0, xend = xvar, yend = yvar),
151 |                         arrow = grid::arrow(length = grid::unit(1/2, 'picas')),
152 |                         size = size.loadings, color = col.loadings)
153 |     }
154 |     if (is.character(label.loadings) || label.loadings) {
155 |         if (is.logical(label.loadings))
156 |             labs <- rownames(loadings)
157 |         else labs <- label.loadings
158 |         # compute angles from orig.
159 |         ang <- atan2(loadings$yvar*scale.loadings, loadings$xvar*scale.loadings)
160 |         hyp <- sqrt((loadings$yvar*scale.loadings)^2 + (loadings$xvar*scale.loadings)^2)
161 | 
162 |         labdat <- data.frame(newx=(hyp + label.offset)*cos(ang),
163 |                              newy=(hyp + label.offset)*sin(ang),
164 |                              label=labs)
165 |         g <- g +
166 |            geom_text(aes(x=newx, y=newy, label=label), data=labdat, size=label.size)
167 |     }
168 | 
169 |     if (!missing(grouping)) {
170 |         gind <- order(grouping)
171 |         grouping <- grouping[gind]
172 |         scores   <- scores[gind,]
173 | 
174 |         df <- data.frame(xvar=scores$xvar, yvar=scores$yvar, grouping=grouping)
175 |         if (!is.null(shape)) {
176 |             aesfun <- aes(color = grouping, shape=shape) ; df$shape <- shape[gind]
177 |         } else
178 |             aesfun <- aes(color = grouping)
179 | 
180 |         scoreslayer <- geom_point(data = df,
181 |                           aesfun, alpha = alpha, size=size)
182 | 
183 |     } else {
184 |         if (!missing(col)) {
185 |             df <- data.frame(xvar=scores$xvar, yvar=scores$yvar, col=col)
186 |             if (!is.null(shape)) {
187 |                 aesfun <- aes(color = col, shape=shape) ; df$shape <- shape
188 |             } else
189 |                 aesfun <- aes(color = col)
190 | 
191 |             scoreslayer <- geom_point(data=df,
192 |                                 aesfun, alpha = alpha, size=size)
193 | 
194 |         } else {
195 |             if (!is.null(shape))
196 |                 aesfun <- aes(shape=shape)
197 |             else
198 |                 aesfun <- aes()
199 |         scoreslayer <- geom_point(aesfun, alpha = alpha, size=size)
200 |         }
201 |     }
202 |      if (plot.loadings) {
203 |        if (!loadingsOnTop)  g <- g + loadingslayer + scoreslayer
204 |          else g <- g + scoreslayer + loadingslayer
205 |      } else
206 |         g <- g + scoreslayer
207 | 
208 | 
209 |     if (group.ellipse && !missing(grouping)) {
210 |         l   <- 200
211 |         group.scores  <- split(scores[,1:2], grouping)
212 |         group.centers <- lapply(group.scores, colMeans)
213 |         group.cov     <- lapply(group.scores, cov)
214 |         group.RR      <- lapply(group.cov, chol)
215 |         angles   <- seq(0, 2*pi, length.out=l)
216 |         ell.list <- lapply(group.RR, function(RR)
217 |                      scale.ellipse * cbind(cos(angles), sin(angles)) %*% RR)
218 |         ellCntr  <- lapply(1:length(ell.list), function(i)
219 |                         sweep(ell.list[[i]], 2, group.centers[[i]], "+"))
220 |         names(ellCntr) <- names(ell.list)
221 |         ell.df   <- as.data.frame(do.call("rbind", ellCntr))
222 |         ell.df$grouping <- factor(rep(names(ellCntr), each=l), levels=names(ellCntr))
223 |         g <- g + geom_path(data = ell.df, aes(color = grouping, group = grouping))
224 |     }
225 | 
226 |     if (group.cloud && !missing(grouping)) {
227 |         group.scores  <- split(scores[,1:2], grouping)
228 |         group.centers <- lapply(group.scores, colMeans)
229 |         centers.df    <- do.call('rbind', rep(group.centers, table(grouping)))
230 |         rownames(centers.df) <- rownames(scores)
231 |         colnames(centers.df) <- c("xcntr", "ycntr")
232 |         loadCntr.df <- cbind(scores, centers.df, grouping)
233 |         g <- g +
234 |           geom_segment(data = loadCntr.df,
235 |                        aes(x = xcntr, y = ycntr, xend = xvar, yend = yvar,
236 |                        color = grouping), alpha=10^log(alpha/1.4))
237 |     }
238 | 
239 | 
240 |     if (circle) {
241 |     # scale circle radius
242 |         r1 <- sqrt(qchisq(circle.prob, df = 2)) * max(scores$xvar^2)^(1/2)
243 |         r2 <- sqrt(qchisq(circle.prob, df = 2)) * max(scores$yvar^2)^(1/2)
244 | 
245 |         theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
246 |         circdat <- data.frame(xvar = r1 * cos(theta), yvar = r2 * sin(theta))
247 |         g <- g + geom_path(aes(x=xvar, y=yvar), data = circdat, color = scales::muted('black'),
248 |                            size = 0.5, alpha = alpha/3)
249 |     }
250 | 
251 |     if (equalcoord) {
252 |         if (circle) {
253 |             xrange <- range(circdat$xvar)
254 |             yrange <- range(circdat$yvar)
255 |         } else {
256 |             xrange <- c(-max(abs(scores$xvar)), max(abs(scores$xvar)))
257 |             yrange <- c(-max(abs(scores$yvar)), max(abs(scores$yvar)))
258 |         }
259 | 
260 |         g <- g + scale_x_continuous(xlab, limits=xrange) +
261 |                  scale_y_continuous(ylab, limits=yrange)
262 |     } else {
263 |         g <- g + scale_x_continuous(xlab) +
264 |                  scale_y_continuous(ylab)
265 |     }
266 |     g
267 | }
268 | 


--------------------------------------------------------------------------------
/R/hello.R:
--------------------------------------------------------------------------------
1 | 
2 | ## a placeholder
3 | hello <- function(txt = "world") {
4 |     cat("Hello, ", txt, "\n")
5 | }
6 | 


--------------------------------------------------------------------------------
/R/normalization.R:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | # Different normalization schemes for microbiome counts (real or fake)
  3 | #
  4 | # @author Zachary Kurtz
  5 | # @date 10/10/2013
  6 | #####################################################################
  7 | 
  8 | 
  9 | #' @export
 10 | norm_pseudo  <- function(x) norm_to_total(x+1)
 11 | 
 12 | 
 13 | #' @keywords internal
 14 | norm_diric   <- function(x, rep=1) {
 15 |     require(VGAM)
 16 |     dmat <- rdiric(rep, x+1)
 17 |     norm_to_total(colMeans(dmat))
 18 | }
 19 | 
 20 | #' @export
 21 | norm_to_total <- function(x) x/sum(x)
 22 | 
 23 | 
 24 | #' The centered log-ratio transformation for
 25 | #' compositional data (not necessarily closed/normalized!)
 26 | #'
 27 | #' The clr is computed as
 28 | #'  \code{x[i]} = log (\code{x[i]} / \code{exp(mean(log(x)))})
 29 | #'
 30 | #' @title clr The centered log-ratio transformation
 31 | #' @param x a numeric data vector containing components of a composition
 32 | #' @param ... additional arguments
 33 | #' @return clr transformed \code{x}
 34 | #' @examples
 35 | #' # vector examples:
 36 | #' clr(norm_to_total(1:10))
 37 | #' clr(1:10)
 38 | #'
 39 | #' # matrix examples:
 40 | #' dmat <- matrix(exp(rnorm(110)), 10)
 41 | # rows are samples/compositions, cols are features/components
 42 | #' clr(dmat, 1)
 43 | # cols are samples/compositions, rows are features/components
 44 | #' clr(dmat, 2)
 45 | #' @rdname clr
 46 | #' @export
 47 | clr <- function(x, ...) {
 48 |     UseMethod('clr')
 49 | }
 50 | 
 51 | 
 52 | 
 53 | #'
 54 | #' @param base base of log to use, default is natural log
 55 | #' @param tol machine tolerance for a zero count, default is machine tol (.Machine$double.eps)
 56 | #' @rdname clr
 57 | #' @method clr default
 58 | #' @export
 59 | clr.default <- function(x, base=exp(1), tol=.Machine$double.eps) {
 60 |     nzero <- (x >= tol)
 61 |     LOG <- log(ifelse(nzero, x, 1), base)
 62 |     ifelse(nzero, LOG - mean(LOG)/mean(nzero), 0.0)
 63 | }
 64 | 
 65 | 
 66 | #'
 67 | #'
 68 | #' @rdname clr
 69 | #' @method clr matrix
 70 | #' @export
 71 | clr.matrix <- function(x, mar=2, base=exp(1), tol=.Machine$double.eps) {
 72 | #    apply(x, mar, clr, ...)
 73 |     if (!mar %in% c(1,2)) stop('mar (margin) must be 1 (compositions are rows) or 2 (compositions are columns)')
 74 | 
 75 |     if (mar == 1) x <- t(x)
 76 |     nzero <- (x >= tol)
 77 |     LOG <- log(ifelse(nzero, x, 1), base)
 78 |     means <- colMeans(LOG)
 79 |     x.clr <- LOG - rep(means/colMeans(nzero), each=nrow(x))
 80 |     x.clr[!nzero] <- 0.0
 81 |     if (mar == 1) x.clr <- t(x.clr)
 82 |     x.clr
 83 | }
 84 | 
 85 | 
 86 | #'
 87 | #'
 88 | #' @rdname clr
 89 | #' @method clr data.frame
 90 | #' @export
 91 | clr.data.frame <- function(x, mar=2, ...) {
 92 |     clr(as.matrix(x), mar, ...)
 93 | }
 94 | 
 95 | 
 96 | 
 97 | #' The additive log-ratio transformation for
 98 | #' compositional data (not necessarily closed/normalized!)
 99 | #'
100 | #'  The alr transformation is computed as:
101 | #'  \code{x[i]} = log ( \code{x[i]} /  x[D] )
102 | #'
103 | #' @title alr The additive log-ratio transformation
104 | #' @param x a numeric data vector containing components of a composition
105 | #' @param ... additional arguments
106 | #' @return alr transformed \code{x}
107 | #' @examples
108 | #' # vector examples:
109 | #' alr(norm_to_total(1:10))
110 | #' alr(1:10)
111 | #'
112 | #' # matrix examples:
113 | #' dmat <- matrix(exp(rnorm(110)), 10)
114 | # rows are samples/compositions, cols are features/components
115 | #' alr(dmat, 1)
116 | # cols are samples/compositions, rows are features/components
117 | #' alr(dmat, 2)
118 | #' @rdname alr
119 | #' @export
120 | alr <- function(x, ...) {
121 |     UseMethod("alr", x)
122 | }
123 | 
124 | 
125 | #' @param divcomp index of the divisor component
126 | #' @param removeDivComp remove divisor component from the resulting data
127 | #' @param base base of log to use, default is natural log
128 | #' @param tol machine tolerance for a zero count, default is machine tol (.Machine$double.eps)
129 | #' @rdname alr
130 | #' @method alr default
131 | #' @export alr.default
132 | alr.default <- function(x, divcomp=1, base=exp(1), removeDivComp=TRUE,
133 |                         tol=.Machine$double.eps) {
134 |     zero <- (x >= tol)
135 |     LOG <- log(ifelse(zero, x, 1), base)
136 |     x.alr <- ifelse(zero, LOG - LOG[divcomp], 0.0)
137 |     if (removeDivComp) x.alr[-divcomp]
138 |     else x.alr
139 | }
140 | 
141 | #'
142 | #'
143 | #' @rdname alr
144 | #' @method alr matrix
145 | #' @export alr.default
146 | alr.matrix <- function(x, mar=2, divcomp=1, base=exp(1), removeDivComp=TRUE,
147 |                         tol=.Machine$double.eps) {
148 |     if (mar == 1) x <- t(x)
149 |     zero <- (x >= tol)
150 |     LOG <- log(ifelse(zero, x, 1), base)
151 |     x.alr <- ifelse(zero, LOG - rep(LOG[divcomp,], each=nrow(x)), 0.0)
152 |     if (removeDivComp) x.alr <- x.alr[-divcomp,]
153 | 
154 |     if (mar ==1) t(x.alr)
155 |     else x.alr
156 | }
157 | 
158 | #'
159 | #'
160 | #' @rdname alr
161 | #' @method alr data.frame
162 | #' @export alr.data.frame
163 | alr.data.frame <- function(x, mar=2, ...) {
164 |     alr(as.matrix(x), mar, ...)
165 | }
166 | 
167 | #' @keywords internal
168 | ilr <- function(x.f, V, ...) {
169 |     UseMethod('ilr')
170 | }
171 | 
172 | #' @keywords internal
173 | ilr.default <- function(x.f, ...) {
174 | 
175 | }
176 | 
177 | 
178 | 
179 | #' @keywords internal
180 | CSS <- function(x, ...) {
181 |     UseMethod("CSS")
182 | }
183 | #' @keywords internal
184 | CSS.default <- function(x, p=0.05, sl=1000) {
185 | # Cumulative sum scaling Normalization Paulson et al 2013 (Nature Methods)
186 |     xx <- x
187 |     xx[x==0] <- NA
188 |     qs <- quantile(xx, p=p, na.rm=TRUE)
189 |     xx <- x - .Machine$double.eps
190 |     normFactor <- sum(xx[xx <= qs])
191 |     (x/normFactor)*sl
192 | }
193 | #' @keywords internal
194 | CSS.matrix <- function(x, p=CSSstat(x), sl=1000, mar=2) {
195 |     apply(x, mar, CSS, p=p, sl=sl)
196 | }
197 | #' @keywords internal
198 | CSSstat <- function(mat, rel=0.1) {
199 |     smat <- sapply(1:ncol(mat), function(i) {
200 |         sort(mat[, i], decreasing = FALSE)
201 |     })
202 |     ref <- rowMeans(smat)
203 |     yy  <- mat
204 |     yy[yy == 0] <- NA
205 |     ncols <- ncol(mat)
206 |     refS  <- sort(ref)
207 |     k     <- which(refS > 0)[1]
208 |     lo    <- (length(refS) - k + 1)
209 |     diffr <- sapply(1:ncols, function(i) {
210 |             refS[k:length(refS)] - quantile(yy[, i], p = seq(0,
211 |                 1, length.out = lo), na.rm = TRUE)})
212 |     diffr2 <- apply(abs(diffr), 1, median, na.rm = TRUE)
213 |     x <- which(abs(diff(diffr2))/diffr2[-1] > rel)[1]/length(diffr2)
214 |     names(x) <- NULL
215 |     x
216 | }
217 | 
218 | #' @keywords internal
219 | DESeq <- function(x, ...) {
220 |     UseMethod("DESeq")
221 | }
222 | #' @keywords internal
223 | DESeq.matrix <- function(mat, c) {
224 |     # compute geometric mean along columns
225 |     matt  <- mat
226 |     matt[matt == 0] <- NA
227 |     k_ref <- apply(matt, 1, function(x) exp(mean(log(x), na.rm=TRUE)))
228 |     krefmat <- matrix(rep(k_ref,ncol(mat)), nrow=nrow(mat))
229 |     s_hat   <- apply(matt/krefmat, 2, median, na.rm=TRUE)
230 |     if (missing(c)) {
231 |         fn <- function(c, s_hat) abs(sum(log(c*s_hat)))
232 |         c  <- optimize(fn, interval=0:10, s_hat=s_hat)$minimum
233 |     }
234 |     s <- c * s_hat
235 |     smat <- matrix(rep(s,nrow(mat)), ncol=ncol(mat), byrow=TRUE)
236 |     mat/smat
237 | }
238 | 


--------------------------------------------------------------------------------
/R/okernelPLS.fit.R:
--------------------------------------------------------------------------------
  1 | ###########################################
  2 | # Orthonormalized kernel PLS fit algorithms
  3 | #
  4 | ###########################################
  5 | 
  6 | 
  7 | okernelpls.fit <- function(X, Y, ncomp, stripped=FALSE, scale=FALSE, center=TRUE, kernelfun='linkern', ...) {
  8 |     X <- as.matrix(X)
  9 |     Y <- as.matrix(Y)
 10 |     if (!stripped) {
 11 |         dnX <- dimnames(X)
 12 |         dnY <- dimnames(Y)
 13 |     }
 14 |     dimnames(X) <- dimnames(Y) <- NULL
 15 |     nobj <- dim(X)[1]
 16 |     npred <- dim(X)[2]
 17 |     nresp <- dim(Y)[2]
 18 |     Xmeans <- colMeans(X)
 19 |     X <- scale(X, scale=scale, center=center)
 20 |     Ymeans <- colMeans(Y)
 21 |     Y <- scale(Y, scale=scale, center=center)
 22 | 
 23 |     # define a linear kernel for response vectors
 24 |     K_y <- tcrossprod(Y)
 25 |     if (kernelfun == "linkern") {
 26 |         K_x <- tcrossprod(X)
 27 |     } else {
 28 |         library(kernlab)
 29 |         K_x <- kernelMatrix(match.fun(kernelfun), X)
 30 |     }
 31 |     
 32 |     Amat <- K_x %*% K_y %*% K_x
 33 |     Amat0 <- Amat
 34 |     Bmat <- K_x %*% K_x
 35 |     TT <- U <- matrix(0, ncol = ncomp, nrow = nobj)
 36 |     B <- array(0, c(npred, nresp, ncomp))
 37 |     In <- diag(nobj)
 38 |     nits <- numeric(ncomp)
 39 |     A <- matrix(0, nobj, ncomp)
 40 |     if (!stripped) {
 41 |         fitted <- array(0, dim = c(nobj, nresp, ncomp))
 42 |         Xresvar <- numeric(ncomp)
 43 |         Xtotvar <- sum(diag(K_x))
 44 |     }
 45 |     for (a in 1:ncomp) {
 46 |         # generalized eigenvalue decomposition of feature and response kernel
 47 |         out  <- .geigen(Amat, Bmat)
 48 |         vals <- out$values
 49 |         vecs <- out$vectors
 50 |         val.max <- which.max((vals))
 51 |         alpha   <- vecs[,val.max,drop=FALSE]
 52 | ##        alpha <- alpha/ norm(matrix(alpha), 'F')
 53 |         beta  <- K_y %*% alpha
 54 |         utmp  <- beta/c(crossprod(alpha, beta))
 55 |         wpw   <- sqrt(c(crossprod(utmp, K_x) %*% utmp))
 56 |         TT[, a] <- alpha * wpw
 57 |         U[, a]  <- utmp * wpw
 58 |         A[,a]   <- alpha
 59 |         Amat <- Amat - (vals[val.max]  * (Bmat %*% alpha %*% t(alpha) %*% Bmat))
 60 |     }
 61 |     n <- length(X)
 62 |     m <- length(X)
 63 |     ret <- t(t(K_x - rowSums(K_x)/m) - rowSums(K_x)/m) + sum(K_x)/(m * n)
 64 |     TT  <- scale(tcrossprod(ret) %*% TT, scale=TRUE, center=TRUE)[,,drop=FALSE]
 65 |     TTtTinv <- TT %*% diag(1/colSums(TT * TT), ncol = ncol(TT))
 66 |     if (kernelfun != 'linkern') {  
 67 |         XW <- tcrossprod(ret) %*% U
 68 |         W <- scale(crossprod(X, XW), center=FALSE)
 69 |         W <- W/rep(sqrt(colSums(W * W)), each = npred)
 70 |         P <- crossprod(tcrossprod(XW, W), TTtTinv)
 71 |     } else {
 72 |         W <- crossprod(X, U)
 73 |         W <- W/rep(sqrt(colSums(W * W)), each = npred)
 74 |         P <- crossprod(X, TTtTinv)
 75 |     }
 76 | 
 77 |     Q <- crossprod(Y, TTtTinv)
 78 |     if (ncomp == 1) {
 79 |         R <- W
 80 |     }
 81 |     else {
 82 |         PW <- crossprod(P, W)
 83 |         if (nresp == 1) {
 84 |             PWinv <- diag(ncomp)
 85 |             bidiag <- -PW[row(PW) == col(PW) - 1]
 86 |             for (a in 1:(ncomp - 1)) PWinv[a, (a + 1):ncomp] <- cumprod(bidiag[a:(ncomp -  1)])
 87 |         }
 88 |         else {
 89 |             PWinv <- backsolve(PW, diag(ncomp))
 90 |         }
 91 |         R <- W %*% PWinv
 92 |     }
 93 |     for (a in 1:ncomp) {
 94 |         B[, , a] <- tcrossprod(R[, 1:a, drop = FALSE], Q[, 1:a, drop = FALSE])
 95 |     }
 96 |     if (stripped) {
 97 |         list(coefficients = B, Xmeans = Xmeans, Ymeans = Ymeans)
 98 |     } else {
 99 |         for (a in 1:ncomp) fitted[, , a] <- tcrossprod(TT[, 1:a, drop = FALSE], Q[, 1:a, drop = FALSE])
100 |         residuals <- -fitted + c(Y)
101 |         fitted <- fitted + rep(Ymeans, each = nobj)
102 |         Xvar <- diff(-c(Xtotvar, Xresvar))
103 |         objnames <- dnX[[1]]
104 |         if (is.null(objnames)) 
105 |             objnames <- dnY[[1]]
106 |         prednames <- dnX[[2]]
107 |         respnames <- dnY[[2]]
108 |         compnames <- paste("Comp", 1:ncomp)
109 |         nCompnames <- paste(1:ncomp, "comps")
110 |         dimnames(TT) <- dimnames(U) <- list(objnames, compnames)
111 |         dimnames(R) <- dimnames(W) <- dimnames(P) <- list(prednames, 
112 |             compnames)
113 |         dimnames(Q) <- list(respnames, compnames)
114 |         dimnames(B) <- list(prednames, respnames, nCompnames)
115 |         dimnames(fitted) <- dimnames(residuals) <- list(objnames, 
116 |             respnames, nCompnames)
117 |         names(Xvar) <- compnames
118 |         class(TT) <- class(U) <- "scores"
119 |         class(P) <- class(W) <- class(Q) <- "loadings"
120 |         library(MASS)
121 |         modbeta <- MASS::ginv(TT) %*% Y
122 |         list(coefficients = B, scores = TT, loadings = P, loading.weights = W, 
123 |             Yscores = U, Yloadings = Q, projection = R, Xmeans = Xmeans, 
124 |             Ymeans = Ymeans, fitted.values = fitted, residuals = residuals, modbeta=modbeta,
125 |             Xvar = Xvar, Xtotvar = Xtotvar, nits = nits, A=A, Kx=K_x, Ky=K_y, ret=ret)
126 |     }
127 | 
128 | 
129 | #    if (kernelfun != 'linkern') {  
130 | #        n <- length(X)
131 | #        m <- length(X)
132 | #        ret <- t(t(K_x - rowSums(K_x)/m) - rowSums(K_x)/m) + sum(K_x)/(m * n)
133 | #        XU <- ret %*% t(ret) %*% A
134 | #        # mimic feature loadings using kernel factor analysis on the scores
135 | #        # and computing inner prod with X and scores
136 | ##        fa  <- kfa(XU, features=npred)
137 | ##        U <- fa@xmatrix
138 | #        U <- scale(t(X) %*% XU, center=FALSE)
139 | #    } else {
140 | #        ret   <- X
141 | #        U     <- t(ret) %*% A
142 | #        XU <- ret %*% U
143 | #    }
144 | 
145 | #        rownames(U) <- colnames(X)
146 | #        XUXU_inv <- tryCatch(solve(t(XU) %*% XU), error=function(e) {
147 | #           require(MASS) ; ginv(t(XU) %*% XU)    })
148 | 
149 | #        B <-  XUXU_inv %*% t(XU) %*% Y
150 | #        residuals <- Y - (XU %*% B)
151 | 
152 | 
153 | #    list(coefficients = B, scores = XU, loadings = U, Xmeans = Xmeans, fitted.values=XU%*%B,
154 | #         Ymeans = Ymeans, residuals = residuals,  Bmat=Bmat, Amat=Amat0,
155 | #         Xvar = colSums(U * U), Xtotvar = sum(X * X), ret=ret)
156 | }
157 | 
158 | 
159 | 
160 | .geigen <- function(A, B, rank=ncol(A)) {
161 |     svd1 <- svd(B)
162 |     F <- svd1$v
163 |     D <- diag(svd1$d[1:rank]^-.5)
164 |     T1 <- F[,1:rank] %*% D
165 |     svd2 <- svd(t(T1) %*% A %*% T1)
166 |     T2 <- svd2$v
167 |     T3 <- T1 %*% T2
168 |     list(vectors=T3, values=diag(t(T3) %*% A %*% T3))
169 | }
170 | 
171 | 
172 | 
173 | 
174 | #kpls <- function (X, Y, ncomp, stripped = FALSE, tol = .Machine$double.eps^0.5, maxit = 100, ...) {
175 | #    Y <- as.matrix(Y)
176 | #    if (!stripped) {
177 | #        dnX <- dimnames(X)
178 | #        dnY <- dimnames(Y)
179 | #    }
180 | #    dimnames(X) <- dimnames(Y) <- NULL
181 | #    nobj <- dim(X)[1]
182 | #    npred <- dim(X)[2]
183 | #    nresp <- dim(Y)[2]
184 | #    TT <- U <- matrix(0, ncol = ncomp, nrow = nobj)
185 | #    B <- array(0, c(npred, nresp, ncomp))
186 | #    In <- diag(nobj)
187 | #    nits <- numeric(ncomp)
188 | #    if (!stripped) {
189 | #        fitted <- array(0, dim = c(nobj, nresp, ncomp))
190 | #        Xresvar <- numeric(ncomp)
191 | #    }
192 | #    Xmeans <- colMeans(X)
193 | #    X <- X - rep(Xmeans, each = nobj)
194 | #    Ymeans <- colMeans(Y)
195 | #    Y <- Y - rep(Ymeans, each = nobj)
196 | #    XXt <- tcrossprod(X)
197 | #    YYt <- tcrossprod(Y)
198 | #    if (!stripped) 
199 | #        Xtotvar <- sum(diag(XXt))
200 | #    for (a in 1:ncomp) {
201 | #        XXtYYt <- XXt %*% YYt
202 | #        XXtYYt <- XXtYYt %*% XXtYYt
203 | #        t.a.old <- Y[, 1]
204 | #        nit <- 0
205 | #        repeat {
206 | #            nit <- nit + 1
207 | #            t.a <- XXtYYt %*% t.a.old
208 | #            t.a <- t.a/sqrt(c(crossprod(t.a)))
209 | #            if (sum(abs((t.a - t.a.old)/t.a), na.rm = TRUE) < 
210 | #                tol) 
211 | #                break
212 | #            else t.a.old <- t.a
213 | #            if (nit >= maxit) {
214 | #                warning("No convergence in", maxit, "iterations\n")
215 | #                break
216 | #            }
217 | #        }
218 | #        nits[a] <- nit
219 | #        u.a <- YYt %*% t.a
220 | #        utmp <- u.a/c(crossprod(t.a, u.a))
221 | #        wpw <- sqrt(c(crossprod(utmp, XXt) %*% utmp))
222 | #        TT[, a] <- t.a * wpw
223 | #        U[, a] <- utmp * wpw
224 | #        G <- In - tcrossprod(t.a)
225 | #        XXt <- G %*% XXt %*% G
226 | #        YYt <- G %*% YYt %*% G
227 | #        if (!stripped) 
228 | #            Xresvar[a] <- sum(diag(XXt))
229 | #    }
230 | #    W <- crossprod(X, U)
231 | #    W <- W/rep(sqrt(colSums(W * W)), each = npred)
232 | #    TTtTinv <- TT %*% diag(1/colSums(TT * TT), ncol = ncol(TT))
233 | #    P <- crossprod(X, TTtTinv)
234 | #    Q <- crossprod(Y, TTtTinv)
235 | #    if (ncomp == 1) {
236 | #        R <- W
237 | #    }
238 | #    else {
239 | #        PW <- crossprod(P, W)
240 | #        if (nresp == 1) {
241 | #            PWinv <- diag(ncomp)
242 | #            bidiag <- -PW[row(PW) == col(PW) - 1]
243 | #            for (a in 1:(ncomp - 1)) PWinv[a, (a + 1):ncomp] <- cumprod(bidiag[a:(ncomp - 
244 | #                1)])
245 | #        }
246 | #        else {
247 | #            PWinv <- backsolve(PW, diag(ncomp))
248 | #        }
249 | #        R <- W %*% PWinv
250 | #    }
251 | #    for (a in 1:ncomp) {
252 | #        B[, , a] <- tcrossprod(R[, 1:a, drop = FALSE], Q[, 1:a, 
253 | #            drop = FALSE])
254 | #    }
255 | #    if (stripped) {
256 | #        list(coefficients = B, Xmeans = Xmeans, Ymeans = Ymeans)
257 | #    }
258 | #    else {
259 | #        for (a in 1:ncomp) fitted[, , a] <- tcrossprod(TT[, 1:a, 
260 | #            drop = FALSE], Q[, 1:a, drop = FALSE])
261 | #        residuals <- -fitted + c(Y)
262 | #        fitted <- fitted + rep(Ymeans, each = nobj)
263 | #        Xvar <- diff(-c(Xtotvar, Xresvar))
264 | #        objnames <- dnX[[1]]
265 | #        if (is.null(objnames)) 
266 | #            objnames <- dnY[[1]]
267 | #        prednames <- dnX[[2]]
268 | #        respnames <- dnY[[2]]
269 | #        compnames <- paste("Comp", 1:ncomp)
270 | #        nCompnames <- paste(1:ncomp, "comps")
271 | #        dimnames(TT) <- dimnames(U) <- list(objnames, compnames)
272 | #        dimnames(R) <- dimnames(W) <- dimnames(P) <- list(prednames, 
273 | #            compnames)
274 | #        dimnames(Q) <- list(respnames, compnames)
275 | #        dimnames(B) <- list(prednames, respnames, nCompnames)
276 | #        dimnames(fitted) <- dimnames(residuals) <- list(objnames, 
277 | #            respnames, nCompnames)
278 | #        names(Xvar) <- compnames
279 | #        class(TT) <- class(U) <- "scores"
280 | #        class(P) <- class(W) <- class(Q) <- "loadings"
281 | #        list(coefficients = B, scores = TT, loadings = P, loading.weights = W, 
282 | #            Yscores = U, Yloadings = Q, projection = R, Xmeans = Xmeans, 
283 | #            Ymeans = Ymeans, fitted.values = fitted, residuals = residuals, 
284 | #            Xvar = Xvar, Xtotvar = Xtotvar, nits = nits)
285 | #    }
286 | #}
287 | 
288 | 
289 | 
290 | 


--------------------------------------------------------------------------------
/R/plsmda.R:
--------------------------------------------------------------------------------
  1 | #######################
  2 | # multilabel PLS-DA
  3 | #
  4 | #########################
  5 | 
  6 | 
  7 | plsmda <- function (x, y, ncomp = 2, probMethod = "softmax", prior = NULL, ...) {
  8 | ## partial least squares multilabel discriminant analysis
  9 |     caret:::requireNamespaceQuietStop("pls")
 10 |     funcCall <- match.call(expand.dots = TRUE)
 11 |     if (!is.matrix(x)) 
 12 |         x <- as.matrix(x)
 13 |     if (length(ncomp) > 1) {
 14 |         ncomp <- max(ncomp)
 15 |         warning(paste("A value single ncomp must be specified.", 
 16 |             "max(ncomp) was used.", "Predictions can be obtained for values <= ncomp"))
 17 |     }
 18 |     if (probMethod == "softmax") {
 19 |         if (!is.null(prior)) 
 20 |             warning("Priors are ignored unless probMethod = \"Bayes\"")
 21 |     }
 22 |     if (is.factor(y)) {
 23 |         obsLevels <- levels(y)
 24 |         oldY <- list(y)
 25 |         y <- .class2ind(y)
 26 |     } else if (is.data.frame(y)) {
 27 |       ## coerce to columns to factors
 28 |         for (i in 1:ncol(y)) y[,i] <- droplevels(as.factor(y[,i]))
 29 |         obsLevels <- lapply(y, levels)
 30 |         oldY <- y
 31 |         y <- do.call('cbind', lapply(y, .class2ind))
 32 |     }
 33 |     else {
 34 |         if (is.matrix(y)) {
 35 | #            test <- apply(y, 1, sum)
 36 | #            if (any(test != 1)) 
 37 | #                stop("the rows of y must be 0/1 and sum to 1")
 38 |             obsLevels <- colnames(y)
 39 |             if (is.null(obsLevels)) 
 40 |                 stop("the y matrix must have column names")
 41 |             oldY <- obsLevels[apply(y, 1, which.max)]
 42 |         }
 43 |         else stop("y must be a matrix, data.frame or a factor")
 44 |     }
 45 |     tmpData <- data.frame(n = paste("row", 1:nrow(y), sep = ""))
 46 |     tmpData$y <- y
 47 |     tmpData$x <- x
 48 |     out <- .plsr(y ~ x, data = tmpData, ncomp = ncomp, ...)
 49 |     out$obsLevels <- obsLevels
 50 |     out$probMethod <- probMethod
 51 |     if (probMethod == "Bayes") {
 52 |         caret:::requireNamespaceQuietStop("klaR")
 53 |         makeModels <- function(x, y, pri) {
 54 |             probModel <- klaR::NaiveBayes(x, y, prior = pri, usekernel = TRUE)
 55 |             probModel$train <- predict(probModel)$posterior
 56 |             probModel$x <- NULL
 57 |             probModel
 58 |         }
 59 |         cls <- class(out)
 60 |         class(out) <- "mvr"
 61 |         train <- predict(out, as.matrix(tmpData$x), ncomp = 1:ncomp)
 62 | #       train <- train[, -length(obsLevels), , drop = FALSE]
 63 |         usedlevs <- 0
 64 |         probMod <- vector("list", N <- length(obsLevels))
 65 |         for (i in 1:N) {
 66 |             ysub  <- oldY[[i]]
 67 |             nlevs <- nlevels(ysub)
 68 |             traintemp <- train[, (i+usedlevs):(nlevs+usedlevs), ,drop=FALSE]
 69 |             traintemp <- traintemp[, -length(obsLevels[[i]]), , drop = FALSE]
 70 |             usedlevs  <- usedlevs + nlevels(ysub)
 71 |             probMod[[i]] <- apply(traintemp, 3, makeModels, y = ysub, pri = prior)
 72 |         }
 73 |         out$probModel <- probMod
 74 |         names(out$probModel) <- names(oldY)
 75 |     }
 76 |     else out$probModel <- NULL
 77 |     class(out) <- c("plsmda", "plsda", class(out))  
 78 |     out
 79 | }
 80 | 
 81 | 
 82 | .plsr <- function (..., method = "okernelpls") {
 83 |     cl <- match.call()
 84 |     cl$method <- match.arg(method, c("kernelpls", "widekernelpls", "okernelpls",
 85 |            "simpls", "oscorespls", "model.frame"))
 86 |     cl[[1]] <- quote(.mvr)
 87 |     res <- eval(cl, parent.frame())
 88 |     if (cl$method != "model.frame") 
 89 |         res$call[[1]] <- as.name(".plsr")
 90 |     if (missing(method)) 
 91 |         res$call$method <- NULL
 92 |     res
 93 | }
 94 | 
 95 | 
 96 | .mvr <- function (formula, ncomp, Y.add, data, subset, na.action, method = pls.options()$mvralg, 
 97 |     scale = FALSE, validation = c("none", "CV", "LOO"), model = TRUE, 
 98 |     x = FALSE, y = FALSE, ...) {
 99 |     caret:::requireNamespaceQuietStop("pls")
100 |     ret.x <- x
101 |     ret.y <- y
102 |     mf <- match.call(expand.dots = FALSE)
103 |     if (!missing(Y.add)) {
104 |         Y.addname <- as.character(substitute(Y.add))
105 |         mf$formula <- update(formula, paste("~ . +", Y.addname))
106 |     }
107 |     m <- match(c("formula", "data", "subset", "na.action"), names(mf), 
108 |         0)
109 |     mf <- mf[c(1, m)]
110 |     mf[[1]] <- as.name("model.frame")
111 |     mf <- eval(mf, parent.frame())
112 |     method <- match.arg(method, c("kernelpls", "widekernelpls", "okernelpls",
113 |         "simpls", "oscorespls", "cppls", "svdpc", "model.frame"))
114 |     if (method == "model.frame") 
115 |         return(mf)
116 |     mt <- attr(mf, "terms")
117 |     Y <- model.response(mf, "numeric")
118 |     if (is.matrix(Y)) {
119 |         if (is.null(colnames(Y))) 
120 |             colnames(Y) <- paste("Y", 1:dim(Y)[2], sep = "")
121 |     }
122 | 
123 |     else {
124 |         Y <- as.matrix(Y)
125 |         colnames(Y) <- deparse(formula[[2]])
126 |     }
127 |     if (missing(Y.add)) {
128 |         Y.add <- NULL
129 |     }
130 |     else {
131 |         Y.add <- mf[, Y.addname]
132 |         mt <- drop.terms(mt, which(attr(mt, "term.labels") == 
133 |             Y.addname), keep.response = TRUE)
134 |     }
135 |     X <- pls:::delete.intercept(model.matrix(mt, mf))
136 |     nobj <- dim(X)[1]
137 |     npred <- dim(X)[2]
138 |     if (length(attr(mt, "term.labels")) == 1 && !is.null(colnames(mf[[attr(mt, 
139 |         "term.labels")]]))) 
140 |         colnames(X) <- sub(attr(mt, "term.labels"), "", colnames(X))
141 |     if (missing(ncomp)) {
142 |         ncomp <- min(nobj - 1, npred)
143 |         ncompWarn <- FALSE
144 |     }
145 |     else {
146 |         if (ncomp < 1 || ncomp > min(nobj - 1, npred)) 
147 |             stop("Invalid number of components, ncomp")
148 |         ncompWarn <- TRUE
149 |     }
150 |     sdscale <- identical(TRUE, scale)
151 |     if (is.numeric(scale)) 
152 |         if (length(scale) == npred) 
153 |             X <- X/rep(scale, each = nobj)
154 |         else stop("length of 'scale' must equal the number of x variables")
155 |     switch(match.arg(validation), CV = {
156 |         val <- mvrCv(X, Y, ncomp, Y.add = Y.add, method = method, 
157 |             scale = sdscale, ...)
158 |     }, LOO = {
159 |         segments <- as.list(1:nobj)
160 |         attr(segments, "type") <- "leave-one-out"
161 |         val <- mvrCv(X, Y, ncomp, Y.add = Y.add, method = method, 
162 |             scale = sdscale, segments = segments, ...)
163 |     }, none = {
164 |         val <- NULL
165 |     })
166 |     if (identical(TRUE, ncomp > val$ncomp)) {
167 |         ncomp <- val$ncomp
168 |         if (ncompWarn) 
169 |             warning("`ncomp' reduced to ", ncomp, " due to cross-validation")
170 |     }
171 |     fitFunc <- switch(method, kernelpls = pls:::kernelpls.fit, widekernelpls = pls:::widekernelpls.fit, 
172 |         simpls = pls:::simpls.fit, oscorespls = pls:::oscorespls.fit, 
173 |         cppls = pls:::cppls.fit, okernelpls=okernelpls.fit, svdpc = pls:::svdpc.fit)
174 |     if (sdscale) {
175 |         scale <- sqrt(colSums((X - rep(colMeans(X), each = nobj))^2)/(nobj - 
176 |             1))
177 |         if (any(abs(scale) < .Machine$double.eps^0.5)) 
178 |             warning("Scaling with (near) zero standard deviation")
179 |         X <- X/rep(scale, each = nobj)
180 |     }
181 |     start.time <- proc.time()[3]
182 |     z <- fitFunc(X, Y, ncomp, Y.add = Y.add, ...)
183 |     z$fit.time <- proc.time()[3] - start.time
184 |     class(z) <- "mvr"
185 |     z$na.action <- attr(mf, "na.action")
186 |     z$ncomp <- ncomp
187 |     z$method <- method
188 |     if (is.numeric(scale)) 
189 |         z$scale <- scale
190 |     z$validation <- val
191 |     z$call <- match.call()
192 |     z$terms <- mt
193 |     if (model) 
194 |         z$model <- mf
195 |     if (ret.x) 
196 |         z$x <- X
197 |     if (ret.y) 
198 |         z$y <- Y
199 |     z
200 | }
201 | 
202 | print.plsmda <- function(x, ...) {
203 |     if (x$method == "okernelpls") {
204 |         ana = "Orthonormalized Partial Least Squares"
205 |         alg = "kernel"
206 |         kernfun = x$call$kernelfun
207 |        cat(ana, ", fitted with the", alg, "algorithm and using a", kernfun, "kernel function")
208 |     if (!is.null(x$validation)) 
209 |         cat("\nCross-validated using", length(x$validation$segments), 
210 |             attr(x$validation$segments, "type"), "segments.")
211 |     cat("\nCall:\n", deparse(x$call), "\n", sep = "")
212 |     invisible(x)
213 |     } else {
214 |         pls:::print.mvr(x)
215 |     }
216 | }
217 | 
218 | 
219 | predict.plsmda <- function (object, newdata = NULL, ncomp = NULL, type = "class",  ...)  {
220 |     caret:::requireNamespaceQuietStop("pls")
221 |     if (is.null(ncomp)) 
222 |         if (!is.null(object$ncomp)) 
223 |             ncomp <- object$ncomp
224 |         else stop("specify ncomp")
225 |     if (!is.null(newdata)) {
226 |         if (!is.matrix(newdata)) 
227 |             newdata <- as.matrix(newdata)
228 |     }
229 |     class(object) <- "mvr"
230 |     tmpPred <- predict(object, newdata = newdata)[, , ncomp, drop = FALSE]
231 |     if (type == "raw") 
232 |         return(tmpPred)
233 |     out <- vector('list', N <- length(object$obsLevels))
234 |     if (is.null(object$probModel)) {
235 |         ## softmax
236 |         for (j in 1:N) {
237 |             switch(type, class = {
238 |                 if (length(dim(tmpPred)) < 3) {
239 |                     # only 1 latent component 
240 |                     out[[j]] <- object$obsLevels[apply(tmpPred, 1, which.max)]
241 |                     out[[j]] <- factor(out[[j]], levels = object$obsLevels[[j]])
242 |                 } else {
243 |                     tmpOut <- matrix("", nrow = dim(tmpPred)[1], ncol = dim(tmpPred)[3])
244 |                     for (i in 1:dim(tmpPred)[3]) {
245 |                       tmpOut[, i] <- object$obsLevels[[j]][apply(tmpPred[, , i, drop = FALSE], 1, which.max)]
246 |                     }
247 |                     out[[j]] <- as.data.frame(tmpOut)
248 |                     out[[j]] <- as.data.frame(lapply(out[[j]], function(x, y) factor(x, levels = y), y = object$obsLevels[[j]]))
249 |                     names(out[[j]]) <- paste("ncomp", ncomp, sep = "")
250 |                     rownames(out[[j]]) <- rownames(newdata)
251 |                     if (length(ncomp) == 1) out[[j]] <- out[[j]][, 1]
252 |                 }
253 |             }, prob = {
254 |                 if (length(dim(tmpPred)) < 3) {
255 |                     out[[j]] <- t(apply(tmpPred, 1, function(data) exp(data)/sum(exp(data))))
256 |                 } else {
257 |                     out[[j]] <- tmpPred * NA
258 |                     for (i in 1:dim(tmpPred)[3]) {
259 |                       out[[j]][, , i] <- t(apply(tmpPred[, , i, drop = FALSE], 1, function(data) exp(data)/sum(exp(data))))
260 |                     }
261 |                 }
262 |             })
263 |         }
264 |     }
265 |     else {
266 |         for (j in 1:N) {
267 |             library(klaR)
268 |             tmp <- vector(mode = "list", length = length(ncomp))
269 |             for (i in seq(along = ncomp)) {
270 |                 tmp[[i]] <- predict(object$probModel[[j]][[ncomp[i]]], as.data.frame(tmpPred[, -length(object$obsLevels[[j]]), i]))
271 |             }
272 |             if (type == "class") {
273 |                 out[[j]] <- t(do.call("rbind", lapply(tmp, function(x) as.character(x$class))))
274 |                 rownames(out[[j]]) <- names(tmp[[1]]$class)
275 |                 colnames(out[[j]]) <- paste("ncomp", ncomp, sep = "")
276 |                 out[[j]] <- as.data.frame(out[[j]])
277 |                 out[[j]] <- as.data.frame(lapply(out[[j]], function(x, y) factor(x, levels = y), y = object$obsLevels[[j]]))
278 |                 if (length(ncomp) == 1) 
279 |                     out[[j]] <- out[[j]][, 1]
280 |             } else {
281 |                 out[[j]] <- array(dim = c(dim(tmp[[1]]$posterior), length(ncomp)), 
282 |                     dimnames = list(rownames(tmp[[1]]$posterior), 
283 |                        colnames(tmp[[1]]$posterior), paste("ncomp", ncomp, sep = "")))
284 |                 for (i in seq(along = ncomp)) out[[j]][, , i] <- tmp[[i]]$posterior
285 |             }
286 |         }
287 |     }
288 |     names(out) <- names(object$obsLevels)
289 |     out
290 | }
291 | 
292 | 
293 | .class2ind <- function(y) {
294 |     y <- caret:::class2ind(y)
295 |     y[y==0] <- -1
296 |     y
297 | }
298 | 


--------------------------------------------------------------------------------
/R/splitVar.R:
--------------------------------------------------------------------------------
  1 | 
  2 | ## multilevel tools, stolen from mixomics package
  3 | .Split.variation.one.level <- function (X, Y, sample) {
  4 |     X = as.matrix(X)
  5 |     if (is.factor(sample)) {
  6 |         sample = as.numeric(sample)
  7 |         warning("the vector sample was converted into a numeric vector", 
  8 |             call. = FALSE)
  9 |     }
 10 |     Xmi <- colMeans(X, na.rm=TRUE)
 11 |     Xm <- matrix(Xmi, nrow = nrow(X), ncol = ncol(X), byrow = T)
 12 |     indX <- cbind(sample, X)
 13 |     indsample <- unique(sample)
 14 |     n.sample <- length(indsample)
 15 |     Xbi <- t(apply(matrix(indsample, ncol = 1, nrow = n.sample), 
 16 |         MARGIN = 1, FUN = function(x, indX) {
 17 |             indice <- which(indX[, 1,drop=FALSE] == x[1])
 18 |             res <- colMeans(indX[indice, ,drop=FALSE], na.rm=TRUE)[-1]
 19 |             return(c(x, res))
 20 |         }, indX = indX))
 21 |     Xb <- apply(matrix(sample, ncol = 1, nrow = length(sample)), 
 22 |         MARGIN = 1, FUN = function(x, Xbi) {
 23 |             Xbi[which(Xbi[, 1,drop=FALSE] == x), -1]
 24 |         }, Xbi = Xbi)
 25 |     Xb <- t(Xb) - Xm
 26 |     Xw <- X - Xm - Xb
 27 |     list(Xw = Xw, Xb = Xb, Xm = Xm)
 28 | }
 29 | 
 30 | 
 31 | .Split.variation.two.level <- function (X, factor1, factor2, sample) {
 32 |     if (is.factor(sample)) {
 33 |         sample = as.numeric(sample)
 34 |         warning("the vector sample was converted into a numeric vector", 
 35 |             call. = FALSE)
 36 |     }
 37 |     Xmi <- colMeans(X, na.rm=TRUE)
 38 |     Xm <- matrix(Xmi, nrow = nrow(X), ncol = ncol(X), byrow = T)
 39 |     indX <- cbind(sample, X)
 40 |     Xb <- apply(indX, MARGIN = 1, FUN = function(x, indX) {
 41 |         indice <- which(indX[, 1,drop=FALSE] == x[1])
 42 |         res <- colMeans(indX[indice, ,drop=FALSE], na.rm=TRUE)
 43 |         return(res[-1])
 44 |     }, indX = indX)
 45 |     Xs <- t(Xb)
 46 |     Xb <- t(Xb) - Xm
 47 |     xbfactor1 <- X
 48 |     for (i in levels(factor(factor1))) {
 49 |         indice <- which(factor1 == i)
 50 |         indXX <- indX[indice, ]
 51 |         res1 <- apply(indXX, MARGIN = 1, FUN = function(x, indXX) {
 52 |             indice <- which(indXX[, 1] == x[1])
 53 |             if (length(indice) == 1) {
 54 |                 res <- colMeans(matrix(indXX[indice, ], nrow = 1, 
 55 |                   ncol = dim(indXX)[2]), na.rm=TRUE)
 56 |             }
 57 |             else {
 58 |                 res <- colMeans(indXX[indice, ,drop=FALSE], na.rm=TRUE)
 59 |             }
 60 |             return(res[-1])
 61 |         }, indXX = indXX)
 62 |         xbfactor1[indice, ] <- t(res1)
 63 |     }
 64 |     xbfactor2 <- X
 65 |     for (i in levels(factor(factor2))) {
 66 |         indice <- which(factor2 == i)
 67 |         indXX <- indX[indice, ]
 68 |         res1 <- apply(indXX, MARGIN = 1, FUN = function(x, indXX) {
 69 |             indice <- which(indXX[, 1,drop=FALSE] == x[1])
 70 |             if (length(indice) == 1) {
 71 |                 res <- colMeans(matrix(indXX[indice, ,drop=FALSE], nrow = 1, 
 72 |                   ncol = dim(indXX)[2]), na.rm=TRUE)
 73 |             }
 74 |             else {
 75 |                 res <- colMeans(indXX[indice, ,drop=FALSE], na.rm=TRUE)
 76 |             }
 77 |             return(res[-1])
 78 |         }, indXX = indXX)
 79 |         xbfactor2[indice, ] <- t(res1)
 80 |     }
 81 |     matfactor1 <- matrix(factor1, nrow = 1, ncol = length(factor1))
 82 |     XFACTOR1 <- apply(matfactor1, MARGIN = 2, FUN = function(x, 
 83 |         matfactor1) {
 84 |         indice <- which(matfactor1 == x[1])
 85 |         res <- colMeans(X[indice, ,drop=FALSE], na.rm=TRUE)
 86 |         return(res)
 87 |     }, matfactor1 = matfactor1)
 88 |     matfactor2 <- matrix(factor2, nrow = 1, ncol = length(factor2))
 89 |     XFACTOR2 <- apply(matfactor2, MARGIN = 2, FUN = function(x, 
 90 |         matfactor2) {
 91 |         indice <- which(matfactor2 == x[1])
 92 |         res <- colMeans(X[indice, ,drop=FALSE], na.rm=TRUE)
 93 |         return(res)
 94 |     }, matfactor2 = matfactor2)
 95 |     XCS <- xbfactor1 - Xs + Xm - t(XFACTOR1)
 96 |     XTS <- xbfactor2 - Xs + Xm - t(XFACTOR2)
 97 |     Xw <- X - Xb - Xm - XCS - XTS
 98 |     list(Xw = Xw, Xb = Xb, Xm = Xm, XCS = XCS, XTS = XTS)
 99 | }
100 | 
101 | 


--------------------------------------------------------------------------------
/R/splsmda.R:
--------------------------------------------------------------------------------
  1 | #######################
  2 | # multilabel sPLS-DA
  3 | #
  4 | #########################
  5 | 
  6 | 
  7 | splsmda <- function (x, y, probMethod = "softmax", prior = NULL, ...) {
  8 |     caret:::requireNamespaceQuietStop("spls")
  9 |     funcCall <- match.call(expand.dots = TRUE)
 10 |     if (!is.matrix(x)) 
 11 |         x <- as.matrix(x)
 12 |     if (probMethod == "softmax") {
 13 |         if (!is.null(prior)) 
 14 |             warning("Priors are ignored unless probMethod = \"Bayes\"")
 15 |     }
 16 |     if (is.factor(y)) {
 17 |         obsLevels <- list(levels(y))
 18 |         oldY <- list(y)
 19 |         y <- .class2ind(y)
 20 |     } else if (is.data.frame(y)) {
 21 |       ## coerce to columns to factors
 22 |         for (i in 1:ncol(y)) y[,i] <- droplevels(as.factor(y[,i]))
 23 |         obsLevels <- lapply(y, levels)
 24 |         oldY <- y
 25 |         y <- do.call('cbind', lapply(y, .class2ind))
 26 |     }
 27 |     else {
 28 |         if (is.matrix(y)) {
 29 | #            test <- apply(y, 1, sum)
 30 | #            if (any(test != 1)) 
 31 | #                stop("the rows of y must be 0/1 and sum to 1")
 32 |             obsLevels <- list(colnames(y))
 33 |             if (is.null(obsLevels)) 
 34 |                 stop("the y matrix must have column names")
 35 |             oldY <- obsLevels[apply(y, 1, which.max)]
 36 |         }
 37 |         else stop("y must be a matrix, data.frame or a factor")
 38 |     }
 39 |     tmpData <- data.frame(n = paste("row", 1:nrow(y), sep = ""))
 40 |     tmpData$y <- y
 41 |     tmpData$x <- x
 42 |     out <- .spls(x, y, ...)
 43 |     out$obsLevels <- obsLevels
 44 |     out$probMethod <- probMethod
 45 |     if (probMethod == "Bayes") {
 46 |         caret:::requireNamespaceQuietStop("klaR")
 47 |         makeModels <- function(x, y, pri) {
 48 |             probModel <- klaR::NaiveBayes(x, y, prior = pri, usekernel = TRUE)
 49 |             probModel$train <- predict(probModel)$posterior
 50 |             probModel$x <- NULL
 51 |             probModel
 52 |         }
 53 |         cls <- class(out)
 54 |         ## spoof # obslevels so nothing is dropped in the predict function
 55 |         out$obsLevels <- ncol(y) + 1
 56 |         train <- spls:::predict.spls(out, as.matrix(tmpData$x))
 57 |         usedlevs <- 0
 58 |         probMod <- vector("list", N <- length(obsLevels))
 59 |         for (i in 1:N) {
 60 |             ysub  <- oldY[[i]]
 61 |             nlevs <- nlevels(ysub)
 62 |             traintemp <- train[, (i+usedlevs):(nlevs+usedlevs)]
 63 |             traintemp <- traintemp[, -length(obsLevels[[i]]), drop = FALSE]
 64 |             usedlevs  <- usedlevs + nlevels(ysub)
 65 |             probMod[[i]] <- makeModels(traintemp, y = ysub, pri = prior)
 66 |         }
 67 |         out$probModel <- probMod
 68 |         names(out$probModel) <- names(oldY)
 69 |     }
 70 |     else out$probModel <- NULL
 71 |     class(out) <- c("splsmda", "splsda")
 72 |     out
 73 | }
 74 | 
 75 | 
 76 | .spls <- function (x, y, K, eta, kappa = 0.5, select = "pls2", fit = "okernelpls", 
 77 |     scale.x = TRUE, scale.y = FALSE, eps = 1e-04, maxstep = 100, trace = FALSE, ...) {
 78 |     caret:::requireNamespaceQuietStop("spls")
 79 |     caret:::requireNamespaceQuietStop("pls")
 80 |     x <- as.matrix(x)
 81 |     n <- nrow(x)
 82 |     p <- ncol(x)
 83 |     ip <- c(1:p)
 84 |     y <- as.matrix(y)
 85 |     q <- ncol(y)
 86 |     one <- matrix(1, 1, n)
 87 |     mu <- one %*% y/n
 88 |     y <- scale(y, drop(mu), FALSE)
 89 |     meanx <- drop(one %*% x)/n
 90 |     x <- scale(x, meanx, FALSE)
 91 |     if (scale.x) {
 92 |         normx <- sqrt(drop(one %*% (x^2))/(n - 1))
 93 |         if (any(normx < .Machine$double.eps)) {
 94 |             stop("Some of the columns of the predictor matrix have zero variance.")
 95 |         }
 96 |         x <- scale(x, FALSE, normx)
 97 |     }
 98 |     else {
 99 |         normx <- rep(1, p)
100 |     }
101 |     if (scale.y) {
102 |         normy <- sqrt(drop(one %*% (y^2))/(n - 1))
103 |         if (any(normy < .Machine$double.eps)) {
104 |             stop("Some of the columns of the response matrix have zero variance.")
105 |         }
106 |         y <- scale(y, FALSE, normy)
107 |     }
108 |     else {
109 |         normy <- rep(1, q)
110 |     }
111 |     betahat <- matrix(0, p, q)
112 |     betamat <- list()
113 |     x1 <- x
114 |     y1 <- y
115 |     type <- spls:::correctp(x, y, eta, K, kappa, "simpls", "simpls")
116 |     eta <- type$eta
117 |     K <- type$K
118 |     kappa <- type$kappa
119 |  #   select <- type$select
120 |     if (is.null(colnames(x))) {
121 |         xnames <- c(1:p)
122 |     }
123 |     else {
124 |         xnames <- colnames(x)
125 |     }
126 |     new2As <- list()
127 |     if (trace) {
128 |         cat("The variables that join the set of selected variables at each step:\n")
129 |     }
130 |     for (k in 1:K) {
131 |         Z <- t(x1) %*% y1
132 |         what <- spls:::spls.dv(Z, eta, kappa, eps, maxstep)
133 |         A <- unique(ip[what != 0 | betahat[, 1] != 0])
134 |         new2A <- ip[what != 0 & betahat[, 1] == 0]
135 |         xA <- x[, A, drop = FALSE]
136 |         plsfit <- .plsr(y ~ xA, ncomp = min(k, length(A)), 
137 |             method = fit, scale = FALSE, ...)
138 |         betahat <- matrix(0, p, q)
139 |         betahat[A, ] <- matrix(coef(plsfit), length(A), q)
140 |         betamat[[k]] <- betahat
141 |         pj <- plsfit$projection
142 |         scores <- plsfit$scores
143 |         if (select == "pls2") {
144 |             y1 <- y - x %*% betahat
145 |         }
146 |         if (select == "simpls") {
147 |             pw <- pj %*% solve(t(pj) %*% pj) %*% t(pj)
148 |             x1 <- x
149 |             x1[, A] <- x[, A, drop = FALSE] - x[, A, drop = FALSE] %*% pw
150 |         }
151 |         if (select == "okernelpls") {
152 |             modbeta <- plsfit$modbeta
153 |             y1 <- y - scores %*% modbeta
154 |         }
155 |         new2As[[k]] <- new2A
156 |         if (trace) {
157 |             if (length(new2A) <= 10) {
158 |                 cat(paste("- ", k, "th step (K=", k, "):\n", 
159 |                   sep = ""))
160 |                 cat(xnames[new2A])
161 |                 cat("\n")
162 |             }
163 |             else {
164 |                 cat(paste("- ", k, "th step (K=", k, "):\n", 
165 |                   sep = ""))
166 |                 nlines <- ceiling(length(new2A)/10)
167 |                 for (i in 0:(nlines - 2)) {
168 |                   cat(xnames[new2A[(10 * i + 1):(10 * (i + 1))]])
169 |                   cat("\n")
170 |                 }
171 |                 cat(xnames[new2A[(10 * (nlines - 1) + 1):length(new2A)]])
172 |                 cat("\n")
173 |             }
174 |         }
175 |     }
176 |     if (!is.null(colnames(x))) {
177 |         rownames(betahat) <- colnames(x)
178 |     }
179 |     if (q > 1 & !is.null(colnames(y))) {
180 |         colnames(betahat) <- colnames(y)
181 |     }
182 |     object <- list(x = x, y = y, betahat = betahat, A = A, betamat = betamat, 
183 |         new2As = new2As, mu = mu, meanx = meanx, normx = normx,
184 |         normy = normy, eta = eta, K = K, kappa = kappa, select = select, 
185 |         fit = fit, projection = pj,  scores=plsfit$scores)
186 |     class(object) <- "spls"
187 |     object
188 | }
189 | 
190 | 
191 | predict.splsmda <- function (object, newdata, type = "class", ...) {
192 |     caret:::requireNamespaceQuietStop("spls")
193 |     tmpPred <- spls::predict.spls(object, newx = newdata)
194 |     if (type == "raw") 
195 |         return(tmpPred)
196 |     if (is.null(object$probModel)) {
197 |         # softmax
198 |         usedlevs <- 1
199 |         out <- vector("list", N <- length(object$obsLevels))
200 |         for (i in 1:N) {
201 |             levs <- object$obsLevels[[i]]
202 |             nlevs     <- length(levs)
203 |             tmpPred2  <- as.data.frame(tmpPred[, (usedlevs):(nlevs+usedlevs-1)])
204 |             out[[i]]  <- switch(type, class = {
205 |                 classIndex <- levs[apply(tmpPred2, 1, which.max)]
206 |                 factor(classIndex, levels = levs)
207 |              }, prob = t(apply(tmpPred2, 1, function(data) exp(data)/sum(exp(data))))
208 |             )
209 |             usedlevs  <- usedlevs + nlevs
210 |         }
211 |     }
212 |     else {
213 |         # Naive Bayes
214 |         caret:::requireNamespaceQuietStop("klaR")
215 |         usedlevs <- 1
216 |         out <- vector("list", N <- length(object$obsLevels))
217 |         for (i in 1:N) {
218 |             levs <- object$obsLevels[[i]]
219 |             nlevs     <- length(levs)
220 |             tmpPred2  <- as.data.frame(tmpPred[, (usedlevs):(nlevs+usedlevs-1)])
221 |             tmpPred2  <- tmpPred2[, -nlevs, drop = FALSE]
222 |             pred      <- predict(object$probModel[[i]], tmpPred2)
223 |             out[[i]]  <- switch(type, class = pred$class, prob = pred$posterior)
224 |             usedlevs  <- usedlevs + nlevs
225 |         }
226 |     }
227 |     names(out) <- names(object$obsLevels)
228 |     out
229 | }
230 | 
231 | 
232 | 
233 | 
234 | 
235 | 


--------------------------------------------------------------------------------
/R/stability.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' stability selection of sparse models 
 3 | #'' via stars
 4 | #' @export
 5 | spls.stars <- function (x, y, fold = 10, K, eta, kappa = 0.5, select = "pls2", 
 6 |     fit = "simpls", scale.x = TRUE, scale.y = FALSE,  stars.thresh = 0.05, ncores=2, rep.num=20,
 7 |     stars.subsample.ratio = NULL) {
 8 |     x <- as.matrix(x)
 9 |     n <- nrow(x)
10 |     p <- ncol(x)
11 |     ip <- c(1:p)
12 |     y <- as.matrix(y)
13 |     q <- ncol(y)
14 | 
15 |      if (is.null(stars.subsample.ratio)) {
16 |          if (n > 144) 
17 |              stars.subsample.ratio = 10 * sqrt(n)/n
18 |          if (n <= 144) 
19 |              stars.subsample.ratio = 0.8
20 |      }
21 | 
22 |    mergelist <- vector('list', length(eta))
23 |     ## compute variability
24 |     mergeArr <- array(0, dim=c(p, length(eta), length(K)))
25 |     variability <- matrix(0, length(eta), length(K))
26 | 
27 |    for (j in 1:length(eta)) {
28 |          cat(paste("eta =", eta[j], "\n"))
29 |        merge <- parallel::mclapply(1:rep.num, function(i) {
30 |            ind.sample = sample(c(1:n), floor(n * stars.subsample.ratio), replace = FALSE)
31 |            object <- spls::spls(x[ind.sample, , drop = FALSE], y[ind.sample, 
32 |                     , drop = FALSE], eta = eta[j], kappa = kappa, 
33 |                     K = max(K), select = select, fit = fit, scale.x = scale.x, 
34 |                     scale.y = scale.y, trace = FALSE)
35 |               return(object$betamat)
36 |          }, mc.cores=ncores)
37 |         mergelist[[j]] <- merge
38 |         for (k.ind in 1:length(K)) {
39 |             k <- K[k.ind]
40 |             mm <- sapply(mergelist[[j]][1:rep.num], function(x) sign(abs(x[[k]][,1])))
41 |             mergeArr[,j,k.ind] <- apply(mm, 1, function(x) sum(x)/length(x))
42 |             variability[j,k.ind] <- 4 * sum(mergeArr[,j,k.ind] * (1 - mergeArr[,j,k.ind]))/(p)
43 |         }
44 |     }
45 |     colnames(variability) <- K
46 |     rownames(variability) <- eta
47 |     opt.index = max(which.max(variability >= 
48 |                 stars.thresh)[1] - 1, 1)
49 |     return(list(merge=mergeArr, variability=variability, opt.index=opt.index))
50 | }
51 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | compPLS
 2 | =======
 3 | 
 4 | An R package for [sparse] Partial least squares discriminant analysis and biplots for compositional data analysis.
 5 | 
 6 | This package is the implementation for the method developed in Lee et al. (2014) [**1**] for the classification of independently-sampled microbial compositions based on Helminth-infection status of a people in Malaysia. Under an assumption of model sparsity (that is, relatively few microbial populations truly correlate with Helminth-status) we get a factor analysis as well as a classifier.
 7 | 
 8 | Currently, this package consists of functions for compositionally-robust data transformations that you can apply prior to (sparse) partial least squares discriminant analysis: here, just wrappers to key functions from the caret package [**2**]. As described in the paper, I've added parameter selection via cross-validation and bootstrap-based p-value calculation to assess PLS model coefficients (for feature selection). This package also includes some methods for ggplot2-based biplots for PLS-DA output, and for a few other commonly used projection/ordination/classification methods. This code was forked from vqv's biplot package [**3**].
 9 | 
10 | This package is still under development and I am currently adding features, based on some new and exciting microbiome data being generated by the good people at the Loke lab [**4**] and collaborators. Installation and example instructions will be added soon.
11 | 
12 | 
13 | Installation
14 | ------------
15 | This development package requires the `devtools` package for installation. Additionally, `compPLS` depends on the `caret` and `MASS` packages. Suggested packages are `boot`, (for bootstrapping) `ggplot2`, `grid` and `scales` (for biplots).
16 | 
17 | ```R
18 |  library(devtools)
19 |  install_github('zdk123/compPLS')
20 | ```
21 | 
22 | Usage
23 | -----
24 | Some minimal examples for running 1) PLS 2) sparse PLS and biplots for the results.
25 | 
26 | ```
27 |  # a too low-dim example, for code demo purposes only
28 |  data(ArcticLake)
29 |  # clr transform the data along row margin (1)
30 |  ALake.clr <- clr(ArcticLake[,1:3], 1)
31 |  res <- plsDA(ALake.clr, grouping=ArcticLake[,4], K=2)
32 |  ggbiplot(res, grouping=ArcticLake[,4], group.ellipse=TRUE, label.loadings=TRUE, label.offset=.2, alpha=.6)
33 |  # alternative biplot
34 |  biplot(res)
35 |  
36 |  ## a higher dim example
37 |  set.seed(1100)
38 |  data(Hydrochem)
39 |  Hchem.clr <- clr(Hydrochem[,6:19], 1)
40 |  # try without bootstrapping
41 |  res <- plsDA_main(Hchem.clr, grouping=Hydrochem$River, K=8:10, nboots=0) #, n.core=4) # if on multicore system
42 |  ggbiplot(res$plsda, grouping=Hydrochem$River, group.ellipse=TRUE, alpha=.5, plot.loadings=FALSE, label.loadings=TRUE)
43 |  optK <- res$plsda$ncomp
44 |  
45 |  # do bootstrapping, warning: this can take a long time 
46 |   res <- plsDA_main(Hchem.clr, grouping=Hydrochem$River, K=optK, nboots=999) #, n.core=4) # if on multicore system
47 | ```
48 | 
49 | [**1**] http://www.plosntds.org/article/info%3Adoi%2F10.1371%2Fjournal.pntd.0002880
50 | 
51 | [**2**] http://topepo.github.io/caret/index.html
52 | 
53 | [**3**] https://github.com/vqv/ggbiplot
54 | 
55 | [**4**] http://microbiology-parasitology.med.nyu.edu/png-loke
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/codexample_clrpca1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdk123/compPLS/078bda73ae23b3181f4874bea084c2db51ecc7a2/codexample_clrpca1.pdf


--------------------------------------------------------------------------------
/data/ArcticLake.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdk123/compPLS/078bda73ae23b3181f4874bea084c2db51ecc7a2/data/ArcticLake.rda


--------------------------------------------------------------------------------
/data/Hydrochem.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zdk123/compPLS/078bda73ae23b3181f4874bea084c2db51ecc7a2/data/Hydrochem.rda


--------------------------------------------------------------------------------
/man/alr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalization.R
 3 | \name{alr}
 4 | \alias{alr}
 5 | \alias{alr.default}
 6 | \alias{alr.matrix}
 7 | \alias{alr.data.frame}
 8 | \title{alr The additive log-ratio transformation}
 9 | \usage{
10 | alr(x, ...)
11 | 
12 | \method{alr}{default}(
13 |   x,
14 |   divcomp = 1,
15 |   base = exp(1),
16 |   removeDivComp = TRUE,
17 |   tol = .Machine$double.eps
18 | )
19 | 
20 | \method{alr}{matrix}(
21 |   x,
22 |   mar = 2,
23 |   divcomp = 1,
24 |   base = exp(1),
25 |   removeDivComp = TRUE,
26 |   tol = .Machine$double.eps
27 | )
28 | 
29 | \method{alr}{data.frame}(x, mar = 2, ...)
30 | }
31 | \arguments{
32 | \item{x}{a numeric data vector containing components of a composition}
33 | 
34 | \item{...}{additional arguments}
35 | 
36 | \item{divcomp}{index of the divisor component}
37 | 
38 | \item{base}{base of log to use, default is natural log}
39 | 
40 | \item{removeDivComp}{remove divisor component from the resulting data}
41 | 
42 | \item{tol}{machine tolerance for a zero count, default is machine tol (.Machine$double.eps)}
43 | }
44 | \value{
45 | alr transformed \code{x}
46 | }
47 | \description{
48 | The additive log-ratio transformation for
49 | compositional data (not necessarily closed/normalized!)
50 | }
51 | \details{
52 | The alr transformation is computed as:
53 |  \code{x[i]} = log ( \code{x[i]} /  x[D] )
54 | }
55 | \examples{
56 | # vector examples:
57 | alr(norm_to_total(1:10))
58 | alr(1:10)
59 | 
60 | # matrix examples:
61 | dmat <- matrix(exp(rnorm(110)), 10)
62 | alr(dmat, 1)
63 | alr(dmat, 2)
64 | }
65 | 


--------------------------------------------------------------------------------
/man/clr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalization.R
 3 | \name{clr}
 4 | \alias{clr}
 5 | \alias{clr.default}
 6 | \alias{clr.matrix}
 7 | \alias{clr.data.frame}
 8 | \title{clr The centered log-ratio transformation}
 9 | \usage{
10 | clr(x, ...)
11 | 
12 | \method{clr}{default}(x, base = exp(1), tol = .Machine$double.eps)
13 | 
14 | \method{clr}{matrix}(x, mar = 2, base = exp(1), tol = .Machine$double.eps)
15 | 
16 | \method{clr}{data.frame}(x, mar = 2, ...)
17 | }
18 | \arguments{
19 | \item{x}{a numeric data vector containing components of a composition}
20 | 
21 | \item{...}{additional arguments}
22 | 
23 | \item{base}{base of log to use, default is natural log}
24 | 
25 | \item{tol}{machine tolerance for a zero count, default is machine tol (.Machine$double.eps)}
26 | }
27 | \value{
28 | clr transformed \code{x}
29 | }
30 | \description{
31 | The centered log-ratio transformation for
32 | compositional data (not necessarily closed/normalized!)
33 | }
34 | \details{
35 | The clr is computed as
36 |  \code{x[i]} = log (\code{x[i]} / \code{exp(mean(log(x)))})
37 | }
38 | \examples{
39 | # vector examples:
40 | clr(norm_to_total(1:10))
41 | clr(1:10)
42 | 
43 | # matrix examples:
44 | dmat <- matrix(exp(rnorm(110)), 10)
45 | clr(dmat, 1)
46 | clr(dmat, 2)
47 | }
48 | 


--------------------------------------------------------------------------------
/man/compPLS-package.Rd:
--------------------------------------------------------------------------------
 1 | \name{compPLS-package}
 2 | \alias{compPLS-package}
 3 | \alias{compPLS}
 4 | \docType{package}
 5 | \title{
 6 |   A short title line describing what the package does
 7 | }
 8 | \description{
 9 |   A more detailed description of what the package does. A length
10 |   of about one to five lines is recommended.
11 | }
12 | \details{
13 |   This section should provide a more detailed overview of how to use the
14 |   package, including the most important functions.
15 | }
16 | \author{
17 | Who wrote it, email optional.
18 | 
19 | Maintainer: Your Name <your@email.com>
20 | }
21 | \references{
22 |   This optional section can contain literature or other references for
23 |   background information.
24 | }
25 | % Optionally other standard keywords, one per line,
26 | % from the file KEYWORDS in the R documentation.
27 | \keyword{ package }
28 | \seealso{
29 |   Optional links to other man pages
30 | }
31 | \examples{
32 |   # Optional simple examples of the most important functions
33 | }
34 | 


--------------------------------------------------------------------------------
/man/ggbiplot.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/ggbiplot.R
  3 | \name{ggbiplot}
  4 | \alias{ggbiplot}
  5 | \alias{ggbiplot.princomp}
  6 | \alias{ggbiplot.prcomp}
  7 | \alias{ggbiplot.lda}
  8 | \alias{ggbiplot.plsda}
  9 | \alias{ggbiplot.splsda}
 10 | \alias{ggbiplot.matrix}
 11 | \alias{ggbiplot.default}
 12 | \title{methods for making biplots from various projection & classification models}
 13 | \usage{
 14 | ggbiplot(xobj, ...)
 15 | 
 16 | \method{ggbiplot}{princomp}(xobj, ...)
 17 | 
 18 | \method{ggbiplot}{prcomp}(xobj, ...)
 19 | 
 20 | \method{ggbiplot}{lda}(xobj, ...)
 21 | 
 22 | \method{ggbiplot}{plsda}(xobj, Yplot = FALSE, ...)
 23 | 
 24 | \method{ggbiplot}{splsda}(xobj, ...)
 25 | 
 26 | \method{ggbiplot}{matrix}(xobj, ...)
 27 | 
 28 | \method{ggbiplot}{default}(
 29 |   xobj,
 30 |   grouping,
 31 |   select = 1:2,
 32 |   circle = FALSE,
 33 |   circle.prob = 0.69,
 34 |   plot.loadings = TRUE,
 35 |   label.loadings = FALSE,
 36 |   sub.loadings = 1:nrow(xobj$loadings),
 37 |   label.offset = 0,
 38 |   label.size = 4.5,
 39 |   scale.loadings = 1,
 40 |   col.loadings = scales::muted("red"),
 41 |   alpha = 1,
 42 |   col = grouping,
 43 |   shape = NULL,
 44 |   group.ellipse = FALSE,
 45 |   scale.ellipse = 1,
 46 |   group.cloud = FALSE,
 47 |   xlab = "",
 48 |   ylab = "",
 49 |   equalcoord = TRUE,
 50 |   size = 3,
 51 |   size.loadings = 1,
 52 |   loadingsOnTop = FALSE
 53 | )
 54 | }
 55 | \arguments{
 56 | \item{xobj}{The object to be plotted}
 57 | 
 58 | \item{grouping}{an optional grouping vector (ie - for coloring points)}
 59 | 
 60 | \item{select}{index of components to be plotted (must be length 2)}
 61 | 
 62 | \item{circle}{enclose points in a circle}
 63 | 
 64 | \item{circle.prob}{controls circle diameter (scales data std dev) if \code{circle = TRUE}}
 65 | 
 66 | \item{plot.loadings}{should loading vectors be plotted}
 67 | 
 68 | \item{label.loadings}{text of loadings labels, taken from rownames of loadings (depends on class of \code{xobj})}
 69 | 
 70 | \item{label.offset}{absolute offset for loading labels, so labels don't cover loadings vectors}
 71 | 
 72 | \item{scale.loadings}{scale length of loading vectors for plotting purposes}
 73 | 
 74 | \item{col.loadings}{a single value of vector for color of loadings}
 75 | 
 76 | \item{alpha}{controls relative transparency of various plot features}
 77 | 
 78 | \item{col}{color factor for points}
 79 | 
 80 | \item{group.ellipse}{enclose within-group points in an covariance ellipse}
 81 | 
 82 | \item{scale.ellipse}{scale \code{group.ellipse} to 1 standard deviation}
 83 | 
 84 | \item{group.cloud}{connect within-group points to a group mean point with a straight edge}
 85 | 
 86 | \item{xlab}{label for x axis}
 87 | 
 88 | \item{ylab}{label for y axis}
 89 | 
 90 | \item{equalcoord}{equal coordinates, ie should the plot area be square?}
 91 | 
 92 | \item{size}{point size}
 93 | 
 94 | \item{size.loadings}{line width of loading vectors}
 95 | }
 96 | \description{
 97 | Pretty biplots using ggplots
 98 | }
 99 | \details{
100 | additional plotting attributes (eg colors, themes, etc) can be chained on in the usual way for ggplots
101 | }
102 | \examples{
103 |  # an LDA example with iris data
104 | ldamod <- lda(iris[,1:4], grouping=iris[,5])
105 | ggbiplot(ldamod, grouping=iris[,5], alpha=.7, group.cloud=TRUE) + theme_bw()
106 | }
107 | 


--------------------------------------------------------------------------------
/man/hello.Rd:
--------------------------------------------------------------------------------
 1 | \name{hello}
 2 | \alias{hello}
 3 | \title{
 4 |   A simple function doing little
 5 | }
 6 | \description{
 7 |   This function shows a standard text on the console. In a time-honoured
 8 |   tradition, it defaults to displaying \emph{hello, world}.
 9 | }
10 | \examples{
11 |   hello()
12 |   hello("and goodbye")
13 | }
14 | 


--------------------------------------------------------------------------------
/man/plsDA.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PLSDA.R
 3 | \name{plsDA}
 4 | \alias{plsDA}
 5 | \title{plsDA partial least squares discriminant analysis}
 6 | \usage{
 7 | plsDA(x, grouping, K, usePriors = FALSE, plsfun = caret::plsda, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{data with samples in rows, features are columns (not necessarily compositional data)}
11 | 
12 | \item{grouping}{a numeric vector or factor with sample classes (length should equal \code{nrow(x)})}
13 | 
14 | \item{K}{number of components in the PLS model (default: number of classes - 1)}
15 | 
16 | \item{usePriors}{use priors for very biased sample size between groups (ie - put strong penalty on misclassifying small groups)}
17 | }
18 | \value{
19 | a plsda fitted model
20 | }
21 | \description{
22 | Partial Least Squares Discriminant Analysis
23 | PLS regression to discriminate classes (via a logistic model)
24 | basically this is a wrapper for the \code{plsda} function in the caret package,
25 | but with default setup for dealing with uneven classes (via the priors option, see details)
26 | see caret::plsda for implementation details
27 | }
28 | \details{
29 | run this code if you don't need to fit paramaters by cross-validation
30 | }
31 | \seealso{
32 | \code{\link{plsDA_main}}, \code{\link{caret::plsda}}
33 | }
34 | 


--------------------------------------------------------------------------------
/man/plsDA_main.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PLSDA.R, R/SPLSDA.R
 3 | \name{plsDA_main}
 4 | \alias{plsDA_main}
 5 | \alias{splsDA_main}
 6 | \title{plsDA_main partial least squares discriminant analysis}
 7 | \usage{
 8 | plsDA_main(
 9 |   x,
10 |   grouping,
11 |   K,
12 |   usePriors = FALSE,
13 |   fold = 5,
14 |   nboots = 999,
15 |   n.core = 4,
16 |   noise = 0,
17 |   ...
18 | )
19 | 
20 | splsDA_main(
21 |   x,
22 |   grouping,
23 |   eta,
24 |   K,
25 |   usePriors = FALSE,
26 |   fold = 5,
27 |   nboots = 999,
28 |   n.core = 4,
29 |   noise = 0,
30 |   ...
31 | )
32 | }
33 | \arguments{
34 | \item{x}{data with samples in rows, features are columns (not necessarily compositional x)}
35 | 
36 | \item{grouping}{a numeric vector or factor with sample classes (length should equal \code{nrow(x)})}
37 | 
38 | \item{K}{numeric vector containing number of components in the PLS model}
39 | 
40 | \item{usePriors}{use priors for very biased sample size between groups (ie - put strong penalty on misclassifying small groups)}
41 | 
42 | \item{fold}{number of partitions to randomly subsample for cross-validation}
43 | 
44 | \item{nboots}{number of bootstraps/permutations for estimating coefficient p-vals}
45 | 
46 | \item{n.core}{number of cores for paralellization of bootstraps}
47 | 
48 | \item{noise}{for very sparse components, some subsamples may have zero variance. Optionally, add some Gaussian noise to to avoid PLS errors}
49 | 
50 | \item{...}{additional arguments passed to plsDA}
51 | }
52 | \value{
53 | a \code{plsDA} object that contains: the plsda model/object, \code{pvals}, the original data, \code{x}, and \code{groupings}
54 | 
55 | a \code{plsDA} object that contains: the plsda model/object, \code{pvals}, the original data, \code{x}, and \code{groupings}
56 | }
57 | \description{
58 | The main wrapper for full Partial Least Squares discriminant analysis,
59 | performing cross-validation to tune model parameters (here, number of components)
60 | and do permutation tests (ie bootstrapping) to get pseudo-pvals estimates for model coefficients
61 | 
62 | The main wrapper for full sparse Partial Least Squares discriminant analysis,
63 | performing cross-validation to tune model parameters (here, number of components)
64 | and do permutation tests (ie bootstrapping) to get pseudo-pvals estimates for model coefficients
65 | }
66 | \seealso{
67 | \code{\link{plsDA}}
68 | 
69 | \code{\link{plsDA}}
70 | }
71 | 


--------------------------------------------------------------------------------
/man/spls.stars.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/stability.R
 3 | \name{spls.stars}
 4 | \alias{spls.stars}
 5 | \title{stability selection of sparse models 
 6 | ' via stars}
 7 | \usage{
 8 | spls.stars(
 9 |   x,
10 |   y,
11 |   fold = 10,
12 |   K,
13 |   eta,
14 |   kappa = 0.5,
15 |   select = "pls2",
16 |   fit = "simpls",
17 |   scale.x = TRUE,
18 |   scale.y = FALSE,
19 |   stars.thresh = 0.05,
20 |   ncores = 2,
21 |   rep.num = 20,
22 |   stars.subsample.ratio = NULL
23 | )
24 | }
25 | \description{
26 | stability selection of sparse models 
27 | ' via stars
28 | }
29 | 


--------------------------------------------------------------------------------
/man/splsDA.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SPLSDA.R
 3 | \name{splsDA}
 4 | \alias{splsDA}
 5 | \title{splsDA sparse partial least squares discriminant analysis}
 6 | \usage{
 7 | splsDA(x, grouping, eta, K, usePriors = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{x with samples in rows, features are columns (not necessarily compositional x)}
11 | 
12 | \item{grouping}{a numeric vector or factor with sample classes (length should equal \code{nrow(x)})}
13 | 
14 | \item{eta}{parameter that adjusts sparsity of the PLS model (between 0 and 1)}
15 | 
16 | \item{K}{number of components in the PLS model (default: number of classes - 1)}
17 | 
18 | \item{usePriors}{use priors for very biased sample size between groups (ie - put strong penalty on misclassifying small groups)}
19 | }
20 | \value{
21 | a plsda fitted model
22 | }
23 | \description{
24 | sparse Partial Least Squares Discriminant Analysis
25 | sPLS regression to discriminate classes (via a logistic model)
26 | basically this is a wrapper for the \code{splsda} function in the caret package,
27 | but with default setup for dealing with uneven classes (via the priors option, see details)
28 | see caret::splsda for implementation details
29 | }
30 | \details{
31 | run this code if you don't need to fit paramaters by cross-validation
32 | }
33 | \seealso{
34 | \code{\link{plsDA_main}}, \code{\link{caret::plsda}}, \code{\link{caret::splsda}}
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | test_check("yourpackage")
3 | 


--------------------------------------------------------------------------------
/tests/testthat/test_norms_and_transforms.R:
--------------------------------------------------------------------------------
  1 | context("norms and transforms")
  2 | 
  3 | test_that("total sum norms add up", {
  4 |     rvec <- exp(rnorm(10))
  5 |     expect_equal(sum(norm_to_total(rvec)), 1)
  6 |     expect_equal(sum(norm_pseudo(rvec)), 1)
  7 | })
  8 | 
  9 | 
 10 | test_that("clr vector output is expected", {
 11 |     testvec1    <- 1:10
 12 |     testvecrand <- runif(10, 0, 1)
 13 |     # test clr function and various equivilent definitions of the clr
 14 |     expect_equal(clr(testvec1), log(testvec1/prod(testvec1)^(1/length(testvec1))))
 15 |     expect_equal(clr(testvec1), log(testvec1/exp(mean(log(testvec1)))))
 16 |     expect_equal(clr(testvec1), log(testvec1) - mean(log(testvec1)))
 17 | 
 18 |     expect_equal(clr(testvecrand), log(testvecrand/prod(testvecrand)^(1/length(testvecrand))))
 19 |     expect_equal(clr(testvecrand), log(testvecrand/exp(mean(log(testvecrand)))))
 20 |     expect_equal(clr(testvecrand), log(testvecrand) - mean(log(testvecrand)))
 21 | 
 22 |     ## test presence of zero component
 23 |     expect_equal(clr(c(0, testvecrand))[1], 0)
 24 |     
 25 |     ## test that a composition doesn't need to be closed
 26 |     expect_equal(clr(testvec1), clr(norm_to_total(testvec1)))
 27 |     expect_equal(clr(testvecrand), clr(norm_to_total(testvecrand)))
 28 | })
 29 | 
 30 | test_that("clr matrix output is expected", {
 31 | 
 32 |     testmat1    <- matrix(1:110, 10)
 33 |     testmatrand <- matrix(runif(110, 0, 1), 10)
 34 | 
 35 |     expect_equal(clr(testmat1, 1), t(apply(testmat1, 1, clr.default)))
 36 |     expect_equal(clr(testmat1, 2),   apply(testmat1, 2, clr.default))
 37 | 
 38 |     expect_equal(clr(testmatrand, 1), t(apply(testmatrand, 1, clr.default)))
 39 |     expect_equal(clr(testmatrand, 2),   apply(testmatrand, 2, clr.default))
 40 | 
 41 |     ## test zero counts
 42 |     expect_equal(clr(rbind(0, testmatrand, 2))[1,], rep(0, ncol(testmatrand)))
 43 | 
 44 |     ## test that a composition doesn't need to be closed
 45 |     expect_equal(clr(testmat1, 2),    clr(apply(testmat1, 2, norm_to_total), 2))
 46 |     expect_equal(clr(testmatrand, 2), clr(apply(testmatrand, 2, norm_to_total), 2))
 47 | 
 48 | })
 49 | 
 50 | 
 51 | test_that("clr data.frame output is expected", {
 52 | 
 53 |     testdf <- as.data.frame(testmatrand)
 54 |     colnames(testmatrand) <- colnames(testdf)
 55 |     expect_equal(clr(testdf, 2), clr(testmatrand, 2))
 56 |     expect_equal(clr(testdf, 1), clr(testmatrand, 1))
 57 | })
 58 | 
 59 | 
 60 | 
 61 | test_that("alr vector output is expected", {
 62 |     testvec1    <- 1:10
 63 |     testvecrand <- runif(10, 0, 1)
 64 |     # test alr function and various equivilent definitions of the alr
 65 |     expect_equal(alr(testvec1, divcomp=1), log(testvec1/testvec1[1])[-1])
 66 |     expect_equal(alr(testvec1, divcomp=1), log(testvec1)[-1] - log(testvec1[1]))
 67 |     expect_equal(alr(testvec1, divcomp=5), log(testvec1/testvec1[5])[-5])
 68 |     expect_equal(alr(testvec1, divcomp=5), log(testvec1)[-5] - log(testvec1[5]))
 69 | 
 70 |     expect_equal(alr(testvecrand, divcomp=1), log(testvecrand/testvecrand[1])[-1])
 71 |     expect_equal(alr(testvecrand, divcomp=1), log(testvecrand)[-1] - log(testvecrand[1]))
 72 |     expect_equal(alr(testvecrand, divcomp=5), log(testvecrand/testvecrand[5])[-5])
 73 |     expect_equal(alr(testvecrand, divcomp=5), log(testvecrand)[-5] - log(testvecrand[5]))
 74 | 
 75 | 
 76 | 
 77 |     ## test presence of zero component
 78 |     expect_equal(alr(c(0, testvecrand), divcomp=2)[1], 0)
 79 |     
 80 |     ## test that a composition doesn't need to be closed
 81 |     expect_equal(alr(testvec1), alr(norm_to_total(testvec1)))
 82 |     expect_equal(alr(testvecrand), alr(norm_to_total(testvecrand)))
 83 | })
 84 | 
 85 | test_that("alr matrix output is expected", {
 86 | 
 87 |     testmat1    <- matrix(1:110, 10)
 88 |     testmatrand <- matrix(runif(110, 0, 1), 10)
 89 | 
 90 |     expect_equal(alr(testmat1, 1), t(apply(testmat1, 1, alr.default)))
 91 |     expect_equal(alr(testmat1, 2),   apply(testmat1, 2, alr.default))
 92 | 
 93 |     expect_equal(alr(testmatrand, 1), t(apply(testmatrand, 1, alr.default)))
 94 |     expect_equal(alr(testmatrand, 2),   apply(testmatrand, 2, alr.default))
 95 | 
 96 |     ## test zero counts
 97 |     expect_equal(alr(cbind(0, testmatrand), 1, divcomp=2)[,1], rep(0, nrow(testmatrand)))
 98 |     expect_equal(alr(rbind(0, testmatrand), 2, divcomp=2)[1,], rep(0, ncol(testmatrand)))
 99 | 
100 |     ## test that a composition doesn't need to be closed
101 |     expect_equal(alr(testmat1, 2),    alr(apply(testmat1, 2, norm_to_total), 2))
102 |     expect_equal(alr(testmatrand, 2), alr(apply(testmatrand, 2, norm_to_total), 2))
103 | 
104 | })
105 | 
106 | 
107 | test_that("alr data.frame output is expected", {
108 | 
109 |     testdf <- as.data.frame(testmatrand)
110 |     colnames(testmatrand) <- colnames(testdf)
111 |     expect_equal(alr(testdf, 2), alr(testmatrand, 2))
112 |     expect_equal(alr(testdf, 1), alr(testmatrand, 1))
113 | })
114 | 
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/tests/testthat/test_plsda.R:
--------------------------------------------------------------------------------
 1 | context("partial least squares discriminant analysis")
 2 | 
 3 | 
 4 | irisdat <- clr(iris[,1:4], 1)
 5 | y       <- iris[,5]
 6 | mod <- plsDA(irisdat, y)
 7 | 
 8 | test_that("plsDA returns correct class", {
 9 | 
10 |     expect_that(mod, is_a('plsda'))
11 |     expect_that(mod, is_a('mvr'))
12 | 
13 | })
14 | 


--------------------------------------------------------------------------------