├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R ├── AllClasses.R ├── AllGenerics.R ├── ExpressionSet-methods.R ├── deFit-methods.R ├── deSet-methods.R ├── edge.R ├── form_models.R ├── getMethods.R ├── kl_clust-functions.R ├── lrt-functions.R ├── misc.R ├── odp-functions.R ├── setMethods.R └── wls.R ├── README.md ├── data ├── endotoxin.rda ├── gibson.rda └── kidney.rda ├── inst └── NEWS ├── man ├── apply_jackstraw.Rd ├── apply_qvalue.Rd ├── apply_snm.Rd ├── apply_sva.Rd ├── betaCoef.Rd ├── build_models.Rd ├── build_study.Rd ├── deFit-class.Rd ├── deSet-class.Rd ├── deSet.Rd ├── edge.Rd ├── endotoxin.Rd ├── fitFull.Rd ├── fitNull.Rd ├── fit_models.Rd ├── fullMatrix.Rd ├── fullModel.Rd ├── gibson.Rd ├── individual.Rd ├── kidney.Rd ├── kl_clust.Rd ├── lrt.Rd ├── nullMatrix.Rd ├── nullModel.Rd ├── odp.Rd ├── qvalueObj.Rd ├── resFull.Rd ├── resNull.Rd ├── sType.Rd ├── show.Rd └── summary.Rd ├── src ├── edge-init.c ├── edgeKLODP.c └── edgeKLODP.h ├── tests ├── testthat.R └── testthat │ ├── test-edgeSet.R │ └── test-modelCreation.R └── vignettes ├── edge.Rnw ├── edgecomp.pdf └── edgerefs.bib /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: edge 2 | Type: Package 3 | Title: Extraction of Differential Gene Expression 4 | Date: 2015-04-15 5 | Version: 2.5.3 6 | Author: John D. Storey, Jeffrey T. Leek and Andrew J. Bass 7 | Maintainer: John D. Storey , Andrew J. Bass 8 | biocViews: MultipleComparison, DifferentialExpression, TimeCourse, 9 | Regression, GeneExpression, DataImport 10 | Description: The edge package implements methods for carrying out differential 11 | expression analyses of genome-wide gene expression studies. Significance 12 | testing using the optimal discovery procedure and generalized likelihood 13 | ratio tests (equivalent to F-tests and t-tests) are implemented for general study 14 | designs. Special functions are available to facilitate the analysis of 15 | common study designs, including time course experiments. Other packages 16 | such as snm, sva, and qvalue are integrated in edge to provide a wide range 17 | of tools for gene expression analysis. 18 | VignetteBuilder: knitr 19 | Imports: 20 | methods, 21 | splines, 22 | sva, 23 | snm, 24 | jackstraw, 25 | qvalue(>= 1.99.0), 26 | MASS 27 | Suggests: 28 | testthat, 29 | knitr, 30 | ggplot2, 31 | reshape2 32 | Depends: 33 | R(>= 3.1.0), 34 | Biobase 35 | URL: https://github.com/StoreyLab/edge 36 | BugReports: https://github.com/StoreyLab/edge/issues 37 | LazyData: true 38 | License: MIT + file LICENSE 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2005-2015 2 | COPYRIGHT HOLDER: John D. Storey 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2 (4.1.1): do not edit by hand 2 | 3 | export("fullMatrix<-") 4 | export("fullModel<-") 5 | export("individual<-") 6 | export("nullMatrix<-") 7 | export("nullModel<-") 8 | export("qvalueObj<-") 9 | export(apply_jackstraw) 10 | export(apply_qvalue) 11 | export(apply_snm) 12 | export(apply_sva) 13 | export(betaCoef) 14 | export(build_models) 15 | export(build_study) 16 | export(deSet) 17 | export(fitFull) 18 | export(fitNull) 19 | export(fullMatrix) 20 | export(fullModel) 21 | export(individual) 22 | export(lrt) 23 | export(nullMatrix) 24 | export(odp) 25 | export(qvalueObj) 26 | export(resFull) 27 | export(resNull) 28 | export(show) 29 | export(summary) 30 | exportClasses(deFit) 31 | exportClasses(deSet) 32 | exportMethods(fit_models) 33 | exportMethods(kl_clust) 34 | exportMethods(nullModel) 35 | exportMethods(sType) 36 | import(Biobase) 37 | import(MASS) 38 | import(methods) 39 | import(qvalue) 40 | import(snm) 41 | import(splines) 42 | import(sva) 43 | import(jackstraw) 44 | useDynLib(edge,kldistance) 45 | useDynLib(edge,odpScoreCluster) 46 | -------------------------------------------------------------------------------- /R/AllClasses.R: -------------------------------------------------------------------------------- 1 | # Allows to set qvalue to S4 slot 2 | setOldClass("qvalue") 3 | 4 | deSetCheck <- function(object) { 5 | errors <- character() 6 | epsilon <- 10e-8 7 | # Allow easy conversion for an ExpressionSet using function 'as' 8 | if (is.list(object@null.model) && is.list(object@full.model) && 9 | length(object@individual) == 0) { 10 | return(TRUE) 11 | } 12 | # Name mismatch 13 | f.vars <- all.vars(object@full.model) 14 | n.vars <- all.vars(object@null.model) 15 | names <- unique(c(f.vars, n.vars)) 16 | if (sum((f.vars %in% c("grp", "bio.var", "time.basis"))) == 0) { 17 | if (sum(!(names %in% varLabels(object))) != 0) { 18 | msg <- paste("naming mismatch between phenoData covariates and models.") 19 | errors <- c(errors, msg) 20 | return(errors) 21 | } 22 | } 23 | # Singular matrix 24 | xx0 <- model.matrix(object@null.model, data=object) 25 | xx1 <- model.matrix(object@full.model, data=object) 26 | # sCheck.null <- min(svd(xx0)$d) < epsilon 27 | sCheck.full <- min(svd(xx1)$d) < epsilon 28 | # if (sCheck.null) { 29 | # msg <- paste("null model matrix is near singular.") 30 | # errors <- c(errors, msg) 31 | # } 32 | if (sCheck.full) { 33 | msg <- paste("full model matrix is near singular.") 34 | errors <- c(errors, msg) 35 | } 36 | # Dimensionality test- this may be impossible to make in deSet 37 | dataDim <- dim(exprs(object)) 38 | if (dataDim[2] != nrow(xx1)) { 39 | msg <- paste( "dimension mismatch between full model and assayData.") 40 | errors <- c(errors, msg) 41 | } 42 | if (dataDim[2] != nrow(xx0)) { 43 | msg <- paste( "dimension mismatch between null model and assayData.") 44 | errors <- c(errors, msg) 45 | } 46 | # inidividual input size 47 | if (length(object@individual) != 0) { 48 | if (length(object@individual) != ncol(exprs(object))) { 49 | msg <- paste("individual must be the same length as the number of arrays") 50 | errors <- c(errors, msg) 51 | } 52 | } 53 | if (length(errors) == 0) { 54 | TRUE 55 | } else { 56 | errors 57 | } 58 | } 59 | 60 | deFitCheck <- function(object) { 61 | errors <- character() 62 | # Dimensionality test 63 | if (!( (ncol(object@fit.full)==ncol(object@fit.null) 64 | && (ncol(object@res.full) == ncol(object@res.null)) 65 | && (ncol(object@dH.full) == ncol(object@fit.full)) 66 | && (ncol(object@fit.full) == ncol(object@res.null))))) { 67 | msg <- paste("column length of fitted matrices, dH.full and residuals", 68 | "must be the same.") 69 | errors <- c(errors, msg) 70 | } 71 | if (!((nrow(object@fit.full) == nrow(object@fit.null)) 72 | && (nrow(object@res.full) == nrow(object@res.null)) 73 | && (nrow(object@res.full) == nrow(object@fit.full)))) { 74 | msg <- paste("row length of fitted matrices and residuals", 75 | "must be the same.") 76 | errors <- c(errors, msg) 77 | } 78 | # Correct statistic input check 79 | if (!(object@stat.type %in% c("lrt", "odp"))) { 80 | msg <- paste("stat.type must be lrt or odp. Inputted stat.type: ", 81 | object@stat.type) 82 | errors <- c(errors, msg) 83 | } 84 | if (length(errors) == 0) { 85 | TRUE 86 | } else { 87 | errors 88 | } 89 | } 90 | 91 | #' The differential expression class (deSet) 92 | #' 93 | #' The \code{deSet} class extends the \code{\link{ExpressionSet}} class. 94 | #' While the \code{ExpressionSet} class contains information about the 95 | #' experiment, the \code{deSet} class contains both experimental information and 96 | #' additional information relevant for differential expression analysis, 97 | #' explained below in Slots. 98 | #' 99 | #' @slot null.model \code{formula}: contains the adjustment variables in the 100 | #' experiment. The null model is used for comparison when fitting the 101 | #' full model. 102 | #' @slot full.model \code{formula}: contains the adjustment variables and the 103 | #' biological variables of interest. 104 | #' @slot null.matrix \code{matrix}: the null model as a matrix. 105 | #' @slot full.matrix \code{matrix}: the full model as a matrix. 106 | #' @slot individual \code{factor}: contains information on which sample 107 | #' is from which individual in the experiment. 108 | #' @slot qvalueObj \code{S3 object}: containing \code{qvalue} object. 109 | #' See \code{\link{qvalue}} for additional details. 110 | #' 111 | #' @section Methods: 112 | #' \describe{ 113 | #' \item{\code{as(ExpressionSet, "deSet")}}{Coerce objects of 114 | #' \code{ExpressionSet} to \code{deSet}.} 115 | #' \item{\code{lrt(deSet, ...)}}{Performs a generalized likelihood ratio test 116 | #' using the full and null models.} 117 | #' \item{\code{odp(deSet, ...)}}{Performs the optimal discovery procedure, 118 | #' which is a new approach for optimally performing many hypothesis tests in 119 | #' a high-dimensional study.} 120 | #' \item{\code{kl_clust(deSet, ...)}}{An implementation of mODP that assigns 121 | #' genes to modules based off of the Kullback-Leibler distance.} 122 | #' \item{\code{fit_models(deSet, ...)}}{Fits a linear model to each gene by 123 | #' method of least squares.} 124 | #' \item{\code{apply_qvalue(deSet, ...)}}{Applies \code{\link{qvalue}} 125 | #' function.} 126 | #' \item{\code{apply_snm(deSet, ...)}}{Applies surpervised normalization of 127 | #' microarrays (\code{\link{snm}}) on gene expression data.} 128 | #' \item{\code{apply_sva(deSet, ...)}}{Applies surrogate variable analysis 129 | #' (\code{\link{sva}}).} 130 | #' \item{\code{fullMatrix(deSet)}}{Access and set full matrix.} 131 | #' \item{\code{nullMatrix(deSet)}}{Access and set null matrix.} 132 | #' \item{\code{fullModel(deSet)}}{Access and set full model.} 133 | #' \item{\code{nullModel(deSet)}}{Access and set null model.} 134 | #' \item{\code{individual(deSet)}}{Access and set individual slot.} 135 | #' \item{\code{qvalueObj(deSet)}}{Access \code{qvalue} object. 136 | #' See \code{\link{qvalue}}.} 137 | #' \item{\code{validObject(deSet)}}{Check validity of \code{deSet} object.} 138 | #' } 139 | #' 140 | #' @note 141 | #' See \code{\link{ExpressionSet}} for additional slot information. 142 | #' 143 | #' @author 144 | #' John Storey, Jeffrey Leek, Andrew Bass 145 | #' @inheritParams ExpressionSet 146 | #' @exportClass deSet 147 | setClass("deSet", slots=c(null.model = "formula", 148 | full.model = "formula", 149 | null.matrix = "matrix", 150 | full.matrix = "matrix", 151 | individual = "factor", 152 | qvalueObj = "qvalue"), 153 | prototype=prototype(null.model = formula(NULL), 154 | full.model = formula(NULL), 155 | null.matrix = matrix(), 156 | full.matrix = matrix(), 157 | individual = as.factor(NULL), 158 | qvalueObj = structure(list(), 159 | class = "qvalue")), 160 | validity = deSetCheck, 161 | contains = c("ExpressionSet")) 162 | 163 | #' The differential expression class for the model fits 164 | #' 165 | #' Object returned from \code{\link{fit_models}} containing information 166 | #' regarding the model fits for the experiment. 167 | #' 168 | #' @slot fit.full \code{matrix}: containing fitted values for the full model. 169 | #' @slot fit.null \code{matrix}: containing fitted values for the null model. 170 | #' @slot res.full \code{matrix}: the residuals of the full model. 171 | #' @slot res.null \code{matrix}: the residuals of the null model. 172 | #' @slot dH.full \code{vector}: contains diagonal elements in the projection 173 | #' matrix for the full model. 174 | #' @slot beta.coef \code{matrix}: fitted coefficients for the full model. 175 | #' @slot stat.type \code{string}: information on the statistic of interest. 176 | #' Currently, the only options are ``lrt'' and ``odp''. 177 | #' 178 | #' @section Methods: 179 | #' \describe{ 180 | #' \item{\code{fitNull(deFit)}}{Access fitted data from null model.} 181 | #' \item{\code{fitFull(deFit)}}{Access fitted data from full model.} 182 | #' \item{\code{resNull(deFit)}}{Access residuals from null model fit.} 183 | #' \item{\code{resFull(deFit)}}{Access residuals from full model fit.} 184 | #' \item{\code{betaCoef(deFit)}}{Access beta coefficients in linear model.} 185 | #' \item{\code{sType(deFit)}}{Access statistic type of model fitting utilized 186 | #' in function.} 187 | #' } 188 | #' 189 | #' @author 190 | #' John Storey, Jeffrey Leek, Andrew Bass 191 | #' 192 | #' @exportClass deFit 193 | setClass("deFit", slots=c(fit.full = "matrix", 194 | fit.null = "matrix", 195 | res.full = "matrix", 196 | res.null = "matrix", 197 | dH.full = "matrix", 198 | beta.coef = "matrix", 199 | stat.type = "character"), 200 | validity = deFitCheck) 201 | -------------------------------------------------------------------------------- /R/AllGenerics.R: -------------------------------------------------------------------------------- 1 | #' Performs F-test (likelihood ratio test using Normal likelihood) 2 | #' 3 | #' \code{lrt} performs a generalized likelihood ratio test using the full and 4 | #' null models. 5 | #' 6 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}. 7 | #' @param de.fit \code{S4 object}: \code{\linkS4class{deFit}}. Optional. 8 | #' @param nullDistn \code{character}: either "normal" or "bootstrap", If 9 | #' "normal" then the p-values are calculated using the F distribution. If 10 | #' "bootstrap" then a bootstrap algorithm is implemented to simulate 11 | #' statistics from the null distribution. In the "bootstrap" case, empirical 12 | #' p-values are calculated using the observed and null statistics (see 13 | #' \code{\link{empPvals}}). Default is "normal". 14 | #' @param weights \code{matrix}: weights for each observation. Default is NULL. 15 | #' @param bs.its \code{integer}: number of null statistics generated (only 16 | #' applicable for "bootstrap" method). Default is 100. 17 | #' @param seed \code{integer}: set the seed value. Default is NULL. 18 | #' @param verbose \code{boolean}: print iterations for bootstrap method. 19 | #' Default is TRUE. 20 | #' @param mod.F \code{boolean}: Moderated F-test, recommended for experiments 21 | #' with a small sample size. Default is FALSE. 22 | #' @param ... Additional arguments for \code{\link{apply_qvalue}} and 23 | #' \code{\link{empPvals}} function. 24 | #' 25 | #' @details \code{lrt} fits the full and null models to each gene using the 26 | #' function \code{\link{fit_models}} and then performs a likelihood ratio test. 27 | #' The user has the option to calculate p-values a Normal distribution 28 | #' assumption or through a bootstrap algorithm. If \code{nullDistn} is 29 | #' "bootstrap" then empirical p-values will be determined from the 30 | #' \code{\link{qvalue}} package (see \code{\link{empPvals}}). 31 | #' 32 | #' @author John Storey, Andrew Bass 33 | #' 34 | #' @return \code{\linkS4class{deSet}} object 35 | #' 36 | #' @examples 37 | #' # import data 38 | #' library(splines) 39 | #' data(kidney) 40 | #' age <- kidney$age 41 | #' sex <- kidney$sex 42 | #' kidexpr <- kidney$kidexpr 43 | #' cov <- data.frame(sex = sex, age = age) 44 | #' 45 | #' # create models 46 | #' null_model <- ~sex 47 | #' full_model <- ~sex + ns(age, df = 4) 48 | #' 49 | #' # create deSet object from data 50 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 51 | #' full.model = full_model) 52 | #' 53 | #' # lrt method 54 | #' de_lrt <- lrt(de_obj, nullDistn = "normal") 55 | #' 56 | #' # to generate p-values from bootstrap 57 | #' de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 30) 58 | #' 59 | #' # input a deFit object directly 60 | #' de_fit <- fit_models(de_obj, stat.type = "lrt") 61 | #' de_lrt <- lrt(de_obj, de.fit = de_fit) 62 | #' 63 | #' # summarize object 64 | #' summary(de_lrt) 65 | #' 66 | #' @references 67 | #' Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance 68 | #' analysis of time course microarray experiments. Proceedings of the National 69 | #' Academy of Sciences, 102: 12837-12842. 70 | #' 71 | #' \url{http://en.wikipedia.org/wiki/Likelihood-ratio_test} 72 | #' 73 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{build_models}}, 74 | #' \code{\link{odp}} 75 | #' 76 | #' @export 77 | setGeneric("lrt", function(object, de.fit, 78 | nullDistn = c("normal","bootstrap"), weights = NULL, 79 | bs.its = 100, seed = NULL, verbose = TRUE, 80 | mod.F = FALSE, ...) 81 | standardGeneric("lrt")) 82 | 83 | 84 | #' The optimal discovery procedure 85 | #' 86 | #' \code{odp} performs the optimal discovery procedure, which is a framework for 87 | #' optimally performing many hypothesis tests in a high-dimensional study. When 88 | #' testing whether a feature is significant, the optimal discovery procedure 89 | #' uses information across all features when testing for significance. 90 | #' 91 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}} 92 | #' @param de.fit \code{S4 object}: \code{\linkS4class{deFit}}. Optional. 93 | #' @param odp.parms \code{list}: parameters for each cluster. See 94 | #' \code{\link{kl_clust}}. 95 | #' @param weights \code{matrix}: weights for each observation. Default is NULL. 96 | #' @param bs.its \code{numeric}: number of null bootstrap iterations. Default 97 | #' is 100. 98 | #' @param n.mods \code{integer}: number of clusters used in 99 | #' \code{\link{kl_clust}}. Default is 50. 100 | #' @param seed \code{integer}: set the seed value. Default is NULL. 101 | #' @param verbose \code{boolean}: print iterations for bootstrap method. 102 | #' Default is TRUE. 103 | #' @param ... Additional arguments for \code{\link{qvalue}} and 104 | #' \code{\link{empPvals}}. 105 | #' 106 | #' 107 | #' @details 108 | #' The full ODP estimator computationally grows quadratically with respect to 109 | #' the number of genes. This becomes computationally taxing at a certain point. 110 | #' Therefore, an alternative method called mODP is used which has been shown to 111 | #' provide results that are very similar. mODP utilizes a clustering algorithm 112 | #' where genes are assigned to a cluster based on the Kullback-Leiber distance. 113 | #' Each gene is assigned an module-average parameter to calculate the ODP score 114 | #' and it reduces the computations time to approximately linear (see Woo, Leek 115 | #' and Storey 2010). If the number of clusters is equal to the number of genes 116 | #' then the original ODP is implemented. Depending on the number of hypothesis 117 | #' tests, this can take some time. 118 | #' 119 | #' @return \code{\linkS4class{deSet}} object 120 | #' 121 | #' @examples 122 | #' # import data 123 | #' library(splines) 124 | #' data(kidney) 125 | #' age <- kidney$age 126 | #' sex <- kidney$sex 127 | #' kidexpr <- kidney$kidexpr 128 | #' cov <- data.frame(sex = sex, age = age) 129 | #' 130 | #' # create models 131 | #' null_model <- ~sex 132 | #' full_model <- ~sex + ns(age, df = 4) 133 | #' 134 | #' # create deSet object from data 135 | #' de_obj <- build_models(data = kidexpr, cov = cov, 136 | #' null.model = null_model, full.model = full_model) 137 | #' 138 | #' # odp method 139 | #' de_odp <- odp(de_obj, bs.its = 30) 140 | #' 141 | #' # input a deFit object or ODP parameters ... not necessary 142 | #' de_fit <- fit_models(de_obj, stat.type = "odp") 143 | #' de_clust <- kl_clust(de_obj, n.mods = 10) 144 | #' de_odp <- odp(de_obj, de.fit = de_fit, odp.parms = de_clust, 145 | #' bs.its = 30) 146 | #' 147 | #' # summarize object 148 | #' summary(de_odp) 149 | #' 150 | #' @references 151 | #' Storey JD. (2007) The optimal discovery procedure: A new approach to 152 | #' simultaneous significance testing. Journal of the Royal Statistical 153 | #' Society, Series B, 69: 347-368. 154 | #' 155 | #' Storey JD, Dai JY, and Leek JT. (2007) The optimal discovery procedure for 156 | #' large-scale significance testing, with applications to comparative 157 | #' microarray experiments. Biostatistics, 8: 414-432. 158 | #' 159 | #' Woo S, Leek JT, Storey JD (2010) A computationally efficient modular 160 | #' optimal discovery procedure. Bioinformatics, 27(4): 509-515. 161 | #' 162 | #' @author John Storey, Jeffrey Leek, Andrew Bass 163 | #' 164 | #' @seealso \code{\link{kl_clust}}, \code{\link{build_models}} and 165 | #' \code{\linkS4class{deSet}} 166 | #' 167 | #' @export 168 | setGeneric("odp", function(object, de.fit, odp.parms = NULL, weights = NULL, bs.its = 100, 169 | n.mods = 50, seed = NULL, verbose = TRUE, ...) 170 | standardGeneric("odp")) 171 | 172 | 173 | #' Modular optimal discovery procedure (mODP) 174 | #' 175 | #' \code{kl_clust} is an implementation of mODP that assigns genes to modules 176 | #' based on of the Kullback-Leibler distance. 177 | #' 178 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}. 179 | #' @param de.fit \code{S4 object}: \code{\linkS4class{deFit}}. 180 | #' @param n.mods \code{integer}: number of modules (i.e., clusters). 181 | #' 182 | #' @details mODP utilizes a k-means clustering algorithm where genes are 183 | #' assigned to a cluster based on the Kullback-Leiber distance. Each gene is 184 | #' assigned an module-average parameter to calculate the ODP score (See Woo, 185 | #' Leek and Storey 2010 for more details). The mODP and full ODP produce nearly 186 | #' exact results but mODP has the advantage of being computationally 187 | #' faster. 188 | #' 189 | #' @note The results are generally insensitive to the number of modules after a 190 | #' certain threshold of about n.mods>=50 in our experience. It is recommended 191 | #' that users experiment with the number of modules. If the number of modules 192 | #' is equal to the number of genes then the original ODP is implemented. 193 | #' 194 | #' @return 195 | #' A list with the following slots: 196 | #' \itemize{ 197 | #' \item {mu.full: cluster averaged fitted values from full model.} 198 | #' \item {mu.null: cluster averaged fitted values from null model.} 199 | #' \item {sig.full: cluster standard deviations from full model.} 200 | #' \item {sig.null: cluster standard deviations from null model.} 201 | #' \item {n.per.mod: total members in each cluster.} 202 | #' \item {clustMembers: cluster membership for each gene.} 203 | #' } 204 | #' 205 | #' @examples 206 | #' # import data 207 | #' library(splines) 208 | #' data(kidney) 209 | #' age <- kidney$age 210 | #' sex <- kidney$sex 211 | #' kidexpr <- kidney$kidexpr 212 | #' cov <- data.frame(sex = sex, age = age) 213 | #' 214 | #' # create models 215 | #' null_model <- ~sex 216 | #' full_model <- ~sex + ns(age, df = 4) 217 | #' 218 | #' # create deSet object from data 219 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 220 | #' full.model = full_model) 221 | #' 222 | #' # mODP method 223 | #' de_clust <- kl_clust(de_obj) 224 | #' 225 | #' # change the number of clusters 226 | #' de_clust <- kl_clust(de_obj, n.mods = 10) 227 | #' 228 | #' # input a deFit object 229 | #' de_fit <- fit_models(de_obj, stat.type = "odp") 230 | #' de_clust <- kl_clust(de_obj, de.fit = de_fit) 231 | #' 232 | #' @references 233 | #' Storey JD. (2007) The optimal discovery procedure: A new approach to 234 | #' simultaneous significance testing. Journal of the Royal Statistical 235 | #' Society, Series B, 69: 347-368. 236 | #' 237 | #' Storey JD, Dai JY, and Leek JT. (2007) The optimal discovery procedure for 238 | #' large-scale significance testing, with applications to comparative 239 | #' microarray experiments. Biostatistics, 8: 414-432. 240 | #' 241 | #' Woo S, Leek JT, Storey JD (2010) A computationally efficient modular optimal 242 | #' discovery procedure. Bioinformatics, 27(4): 509-515. 243 | #' 244 | #' @author John Storey, Jeffrey Leek 245 | #' 246 | #' @seealso \code{\link{odp}}, \code{\link{fit_models}} 247 | #' 248 | #' @exportMethod kl_clust 249 | setGeneric("kl_clust", function(object, de.fit = NULL, n.mods = 50) 250 | standardGeneric("kl_clust")) 251 | 252 | #' Linear regression of the null and full models 253 | #' 254 | #' \code{fit_models} fits a model matrix to each gene by using the least 255 | #' squares method. Model fits can be either statistic type "odp" (optimal 256 | #' discovery procedure) or "lrt" (likelihood ratio test). 257 | #' 258 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}. 259 | #' @param stat.type \code{character}: type of statistic to be used. Either 260 | #' "lrt" or "odp". Default is "lrt". 261 | #' @param weights \code{matrix}: weights for each observation. Default is NULL. 262 | #' 263 | #' @details 264 | #' If "odp" method is implemented then the null model is removed from the full 265 | #' model (see Storey 2007). Otherwise, the statistic type has no affect on the 266 | #' model fit. 267 | #' 268 | #' @note \code{fit_models} does not have to be called by the user to use 269 | #' \code{\link{odp}}, \code{\link{lrt}} or \code{\link{kl_clust}} as it is an 270 | #' optional input and is implemented in the methods. The 271 | #' \code{\linkS4class{deFit}} object can be created by the user if a different 272 | #' statistical implementation is required. 273 | #' 274 | #' @return \code{\linkS4class{deFit}} object 275 | #' 276 | #' @examples 277 | #' # import data 278 | #' library(splines) 279 | #' data(kidney) 280 | #' age <- kidney$age 281 | #' sex <- kidney$sex 282 | #' kidexpr <- kidney$kidexpr 283 | #' cov <- data.frame(sex = sex, age = age) 284 | #' 285 | #' # create models 286 | #' null_model <- ~sex 287 | #' full_model <- ~sex + ns(age, df = 4) 288 | #' 289 | #' # create deSet object from data 290 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 291 | #' full.model = full_model) 292 | #' 293 | #' # retrieve statistics from linear regression for each gene 294 | #' fit_lrt <- fit_models(de_obj, stat.type = "lrt") # lrt method 295 | #' fit_odp <- fit_models(de_obj, stat.type = "odp") # odp method 296 | #' 297 | #' # summarize object 298 | #' summary(fit_odp) 299 | #' 300 | #' @references 301 | #' Storey JD. (2007) The optimal discovery procedure: A new approach to 302 | #' simultaneous significance testing. Journal of the Royal Statistical 303 | #' Society, Series B, 69: 347-368. 304 | #' 305 | #' Storey JD, Dai JY, and Leek JT. (2007) The optimal discovery procedure for 306 | #' large-scale significance testing, with applications to comparative 307 | #' microarray experiments. Biostatistics, 8: 414-432. 308 | #' 309 | #' Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance 310 | #' analysis of time course microarray experiments. Proceedings of the National 311 | #' Academy of Sciences, 102: 12837-12842. 312 | #' 313 | #' @seealso \code{\linkS4class{deFit}}, \code{\link{odp}} and 314 | #' \code{\link{lrt}} 315 | #' 316 | #' @author John Storey 317 | #' @exportMethod fit_models 318 | setGeneric("fit_models", 319 | function(object, stat.type = c("lrt", "odp"), weights = NULL) { 320 | standardGeneric("fit_models") 321 | }) 322 | 323 | #' Create a deSet object from an ExpressionSet 324 | #' 325 | #' Creates a \code{\linkS4class{deSet}} object that extends the 326 | #' \code{\link{ExpressionSet}} object. 327 | #' 328 | #' @param object \code{S4 object}: \code{\link{ExpressionSet}} 329 | #' @param full.model \code{formula}: full model containing the both the 330 | #' adjustment and the biological variables for the experiment. 331 | #' @param null.model \code{formula}: null model containing the adjustment 332 | #' variables for the experiment. 333 | #' @param individual \code{factor}: information on repeated samples in 334 | #' experiment. 335 | #' 336 | #' @note It is essential that the null and full models have the same variables 337 | #' as the ExpressionSet phenoType column names. 338 | #' 339 | #' @return \code{\linkS4class{deSet}} object 340 | #' 341 | #' @examples 342 | #' # import data 343 | #' library(splines) 344 | #' data(kidney) 345 | #' age <- kidney$age 346 | #' sex <- kidney$sex 347 | #' kidexpr <- kidney$kidexpr 348 | #' cov <- data.frame(sex = sex, age = age) 349 | #' pDat <- as(cov, "AnnotatedDataFrame") 350 | #' exp_set <- ExpressionSet(assayData = kidexpr, phenoData = pDat) 351 | #' 352 | #' # create models 353 | #' null_model <- ~sex 354 | #' full_model <- ~sex + ns(age, df = 4) 355 | #' 356 | #' # create deSet object from data 357 | #' de_obj <- deSet(exp_set, null.model = null_model, 358 | #' full.model = full_model) 359 | #' 360 | #' # optionally add individuals to experiment, in this case there are 36 361 | #' # individuals that were sampled twice 362 | #' indSamples <- as.factor(rep(1:36, each = 2)) 363 | #' de_obj <- deSet(exp_set, null.model = null_model, 364 | #' full.model = full_model, ind = indSamples) 365 | #' summary(de_obj) 366 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{odp}} and 367 | #' \code{\link{lrt}} 368 | #' 369 | #' @author John Storey, Andrew Bass 370 | #' 371 | #' @export 372 | setGeneric("deSet", function(object, full.model, null.model, 373 | individual=NULL) standardGeneric("deSet")) 374 | 375 | #' Estimate the q-values for a given set of p-values 376 | #' 377 | #' Runs \code{\link{qvalue}} on a \code{\linkS4class{deSet}} object. 378 | #' 379 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}} 380 | #' @param ... Additional arguments for \code{\link{qvalue}} 381 | #' 382 | #' @return \code{\linkS4class{deSet}} object with slots updated by \code{\link{qvalue}} 383 | #' calculations. 384 | #' 385 | #' @examples 386 | #' # import data 387 | #' library(splines) 388 | #' data(kidney) 389 | #' age <- kidney$age 390 | #' sex <- kidney$sex 391 | #' kidexpr <- kidney$kidexpr 392 | #' cov <- data.frame(sex = sex, age = age) 393 | #' 394 | #' # create models 395 | #' null_model <- ~sex 396 | #' full_model <- ~sex + ns(age, df = 4) 397 | #' 398 | #' # create deSet object from data 399 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 400 | #' full.model = full_model) 401 | #' 402 | #' # Run lrt (or odp) and apply_qvalue 403 | #' de_lrt <- lrt(de_obj) 404 | #' de_lrt <- apply_qvalue(de_lrt, fdr.level = 0.05, 405 | #' pi0.method = "bootstrap", adj=1.2) 406 | #' summary(de_lrt) 407 | #' 408 | #' @references 409 | #' Storey JD and Tibshirani R. (2003) Statistical significance for 410 | #' genome-wide studies. Proceedings of the National Academy of Sciences, 411 | #' 100: 9440-9445 412 | #' 413 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{odp}} and 414 | #' \code{\link{lrt}} 415 | #' 416 | #' @author John Storey, Andrew Bass 417 | #' 418 | #' @export 419 | setGeneric("apply_qvalue", function(object, ...) 420 | standardGeneric("apply_qvalue")) 421 | 422 | #' Estimate surrogate variables 423 | #' 424 | #' Runs \code{\link{sva}} on the null and full models in 425 | #' \code{\linkS4class{deSet}}. See \code{\link{sva}} for additional details. 426 | #' 427 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}} 428 | #' @param ... Additional arguments for \code{\link{sva}} 429 | #' 430 | #' @return \code{\linkS4class{deSet}} object where the surrogate variables 431 | #' estimated by \code{\link{sva}} are added to the full model and null model 432 | #' matrices. 433 | #' 434 | #' @examples 435 | #' # import data 436 | #' library(splines) 437 | #' data(kidney) 438 | #' age <- kidney$age 439 | #' sex <- kidney$sex 440 | #' kidexpr <- kidney$kidexpr 441 | #' cov <- data.frame(sex = sex, age = age) 442 | #' 443 | #' # create models 444 | #' null_model <- ~sex 445 | #' full_model <- ~sex + ns(age, df = 4) 446 | #' 447 | #' # create deSet object from data 448 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 449 | #' full.model = full_model) 450 | #' 451 | #' # run surrogate variable analysis 452 | #' de_sva <- apply_sva(de_obj) 453 | #' 454 | #' # run odp/lrt with surrogate variables added 455 | #' de_odp <- odp(de_sva, bs.its = 30) 456 | #' summary(de_odp) 457 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{odp}} and 458 | #' \code{\link{lrt}} 459 | #' 460 | #' @references 461 | #' Leek JT, Storey JD (2007) Capturing Heterogeneity in Gene Expression 462 | #' Studies by Surrogate Variable Analysis. PLoS Genet 3(9): e161. 463 | #' doi:10.1371/journal.pgen.0030161 464 | #' 465 | #' Leek JT and Storey JD. (2008) A general framework for multiple testing 466 | #' dependence. Proceedings of the National Academy of Sciences, 105: 18718- 467 | #' 18723. 468 | #' 469 | #' @author John Storey, Jeffrey Leek, Andrew Bass 470 | #' @export 471 | setGeneric("apply_sva", function(object, ...) 472 | standardGeneric("apply_sva")) 473 | 474 | #' Supervised normalization of data in edge 475 | #' 476 | #' Runs \code{snm} on a deSet object based on the null and full models in 477 | #' \code{\linkS4class{deSet}}. See \code{\link{snm}} for additional details 478 | #' on the algorithm. 479 | #' 480 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}} 481 | #' @param int.var \code{data frame}: intensity-dependent effects (see 482 | #' \code{\link{snm}} for details) 483 | #' @param ... Additional arguments for \code{\link{snm}} 484 | #' 485 | #' @return \code{apply_snm} returns a \code{\linkS4class{deSet}} object where 486 | #' assayData (the expression data) that has been passed to apply_snm is replaced 487 | #' with the normalized data that \code{\link{snm}} returns. Specifically, 488 | #' \code{exprs(object)} is replaced by \code{$norm.dat} from \code{\link{snm}}, 489 | #' where \code{object} is the \code{\link{deSet}} object. 490 | #' 491 | #' @references 492 | #' Mechan BH, Nelson PS, Storey JD. Supervised normalization of microarrays. 493 | #' Bioinformatics 2010;26:1308-1315. 494 | #' 495 | #' @examples 496 | #' # simulate data 497 | #' library(snm) 498 | #' singleChannel <- sim.singleChannel(12345) 499 | #' data <- singleChannel$raw.data 500 | #' 501 | #' # create deSet object using build_models (can use ExpressionSet see manual) 502 | #' cov <- data.frame(grp = singleChannel$bio.var[,2]) 503 | #' full_model <- ~grp 504 | #' null_model <- ~1 505 | #' 506 | #' # create deSet object using build_models 507 | #' de_obj <- build_models(data = data, cov = cov, full.model = full_model, 508 | #' null.model = null_model) 509 | #' 510 | #' # run snm using intensity-dependent adjustment variable 511 | #' de_snm <- apply_snm(de_obj, int.var = singleChannel$int.var, 512 | #' verbose = FALSE, num.iter = 1) 513 | #' 514 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{odp}} and 515 | #' \code{\link{lrt}} 516 | #' 517 | #' @author John Storey, Andrew Bass 518 | #' @export 519 | setGeneric("apply_snm", function(object, int.var=NULL, ...) 520 | standardGeneric("apply_snm")) 521 | 522 | 523 | #' Non-Parametric Jackstraw for Principal Component Analysis (PCA) 524 | #' 525 | #' Estimates statistical significance of association between variables and 526 | #' their principal components (PCs). 527 | #' 528 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}} 529 | #' @param r1 a numeric vector of principal components of interest. Choose a subset of r significant PCs to be used. 530 | #' @param r a number (a positive integer) of significant principal components. 531 | #' @param s a number (a positive integer) of synthetic null variables. Out of m variables, s variables are independently permuted. 532 | #' @param B a number (a positive integer) of resampling iterations. There will be a total of s*B null statistics. 533 | #' @param covariate a data matrix of covariates with corresponding n observations. 534 | #' @param verbose a logical indicator as to whether to print the progress. 535 | #' @param seed a seed for the random number generator. 536 | #' 537 | #' @details 538 | #' This function computes m p-values of linear association between m variables 539 | #' and their PCs. Its resampling strategy accounts for the over-fitting 540 | #' characteristics due to direct computation of PCs from the observed data 541 | #' and protects against an anti-conservative bias. 542 | #' 543 | #' Provide the \code{\linkS4class{deSet}}, 544 | #' with m variables as rows and n observations as columns. Given that there are 545 | #' r significant PCs, this function tests for linear association between m 546 | #' varibles and their r PCs. 547 | #' 548 | #' You could specify a subset of significant PCs 549 | #' that you are interested in r1. If PC is given, then this function computes 550 | #' statistical significance of association between m variables and PC, while 551 | #' adjusting for other PCs (i.e., significant PCs that are not your interest). 552 | #' For example, if you want to identify variables associated with 1st and 2nd 553 | #' PCs, when your data contains three significant PCs, set r=3 and r1=c(1,2). 554 | #' 555 | #' Please take a careful look at your data and use appropriate graphical and 556 | #' statistical criteria to determine a number of significant PCs, r. The number 557 | #' of significant PCs depends on the data structure and the context. In a case 558 | #' when you fail to specify r, it will be estimated from a permutation test 559 | #' (Buja and Eyuboglu, 1992) using a function \code{\link{permutationPA}}. 560 | #' 561 | #' If s is not supplied, s is set to about 10% of m variables. If B is not 562 | #' supplied, B is set to m*10/s. 563 | #' 564 | #' @return \code{apply_jackstraw} returns a \code{list} containing the following 565 | #' slots: 566 | #' \itemize{ 567 | #' \item{\code{p.value} the m p-values of association tests between variables 568 | #' and their principal components} 569 | #' \item{\code{obs.stat} the observed F-test statistics} 570 | #' \item{\code{null.stat} the s*B null F-test statistics} 571 | #' } 572 | #' 573 | #' 574 | #' @references 575 | #' Chung and Storey (2013) Statistical Significance of 576 | #' Variables Driving Systematic Variation in 577 | #' High-Dimensional Data. arXiv:1308.6013 [stat.ME] 578 | #' \url{http://arxiv.org/abs/1308.6013} 579 | #' 580 | #'More information available at \url{http://ncc.name/} 581 | #' 582 | #' @examples 583 | # import data 584 | #' library(splines) 585 | #' data(kidney) 586 | #' age <- kidney$age 587 | #' sex <- kidney$sex 588 | #' kidexpr <- kidney$kidexpr 589 | #' cov <- data.frame(sex = sex, age = age) 590 | 591 | #' # create models 592 | #' null_model <- ~sex 593 | #' full_model <- ~sex + ns(age, df = 4) 594 | 595 | #' # create deSet object from data 596 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 597 | #' full.model = full_model) 598 | #' ## apply the jackstraw 599 | #' out = apply_jackstraw(de_obj, r1=1, r=1) 600 | #' ## Use optional arguments 601 | #' ## For example, set s and B for a balance between speed of the algorithm and accuracy of p-values 602 | #' ## out = apply_jackstraw(dat, r1=1, r=1, s=10, B=1000, seed=5678) 603 | #' 604 | #' @seealso \code{\link{permutationPA}} 605 | #' 606 | #' @author Neo Christopher Chung \email{nc@@princeton.edu} 607 | #' @import jackstraw 608 | #' @export 609 | setGeneric("apply_jackstraw", function(object, r1 = NULL, r = NULL, s = NULL, B = NULL, 610 | covariate = NULL, verbose = TRUE, seed = NULL) 611 | standardGeneric("apply_jackstraw")) 612 | 613 | #' Full model equation 614 | #' 615 | #' These generic functions access and set the full model for 616 | #' \code{\linkS4class{deSet}} object. 617 | #' 618 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}} 619 | #' @param value \code{formula}: The experiment design for the full model. 620 | #' 621 | #' @examples 622 | #' # import data 623 | #' library(splines) 624 | #' data(kidney) 625 | #' age <- kidney$age 626 | #' sex <- kidney$sex 627 | #' kidexpr <- kidney$kidexpr 628 | #' cov <- data.frame(sex = sex, age = age) 629 | #' 630 | #' # create models 631 | #' null_model <- ~sex 632 | #' full_model <- ~sex + ns(age, df = 4) 633 | #' 634 | #' # create deSet object from data 635 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 636 | #' full.model = full_model) 637 | #' 638 | #' # extract out the full model equation 639 | #' mod_full <- fullModel(de_obj) 640 | #' 641 | #' # change the full model in the experiment 642 | #' fullModel(de_obj) <- ~sex + ns(age, df = 2) 643 | #' 644 | #' 645 | #' @return the formula for the full model. 646 | #' 647 | #' @author John Storey, Andrew Bass 648 | #' 649 | #' @seealso \code{\linkS4class{deSet}} 650 | #' 651 | #' @export 652 | setGeneric("fullModel", function(object) standardGeneric("fullModel")) 653 | 654 | #' @rdname fullModel 655 | #' @export 656 | setGeneric("fullModel<-", function(object, value) { 657 | standardGeneric("fullModel<-") 658 | }) 659 | 660 | #' Null model equation from deSet object 661 | #' 662 | #' These generic functions access and set the null model for 663 | #' \code{\linkS4class{deSet}} object. 664 | #' 665 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}} 666 | #' @param value \code{formula}: The experiment design for the null model. 667 | #' 668 | #' @return \code{nullModel} returns the formula for the null model. 669 | #' 670 | #' @examples 671 | #' # import data 672 | #' library(splines) 673 | #' data(kidney) 674 | #' age <- kidney$age 675 | #' sex <- kidney$sex 676 | #' kidexpr <- kidney$kidexpr 677 | #' cov <- data.frame(sex = sex, age = age) 678 | #' 679 | #' # create models 680 | #' null_model <- ~sex 681 | #' full_model <- ~sex + ns(age, df = 4) 682 | #' 683 | #' # create deSet object from data 684 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 685 | #' full.model = full_model) 686 | #' 687 | #' # extract the null model equation 688 | #' mod_null <- nullModel(de_obj) 689 | #' 690 | #' # change null model in experiment but must update full model 691 | #' nullModel(de_obj) <- ~1 692 | #' fullModel(de_obj) <- ~1 + ns(age, df=4) 693 | #' @author John Storey, Andrew Bass 694 | #' 695 | #' @seealso \code{\linkS4class{deSet}} 696 | #' 697 | #' @keywords nullModel, nullModel<- 698 | #' 699 | #' @exportMethod nullModel 700 | setGeneric("nullModel", function(object) standardGeneric("nullModel")) 701 | 702 | #' @rdname nullModel 703 | #' @export 704 | setGeneric("nullModel<-", function(object, value) { 705 | standardGeneric("nullModel<-") 706 | }) 707 | 708 | #' Matrix representation of null model 709 | #' 710 | #' These generic functions access and set the null matrix for 711 | #' \code{\linkS4class{deSet}} object. 712 | #' 713 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}} 714 | #' @param value \code{matrix}: null model matrix where columns are covariates 715 | #' and rows are observations 716 | #' 717 | #' @return \code{nullMatrix} returns the value of the null model matrix. 718 | #' 719 | #' @examples 720 | #' # import data 721 | #' library(splines) 722 | #' data(kidney) 723 | #' age <- kidney$age 724 | #' sex <- kidney$sex 725 | #' kidexpr <- kidney$kidexpr 726 | #' cov <- data.frame(sex = sex, age = age) 727 | #' 728 | #' # create models 729 | #' null_model <- ~sex 730 | #' full_model <- ~sex + ns(age, df = 4) 731 | #' 732 | #' # create deSet object from data 733 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 734 | #' full.model = full_model) 735 | #' 736 | #' # extract the null model as a matrix 737 | #' mat_null <- nullMatrix(de_obj) 738 | #' 739 | #' @author John Storey, Andrew Bass 740 | #' 741 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{fullModel}} and 742 | #' \code{\link{fullModel}} 743 | #' 744 | #' @export 745 | setGeneric("nullMatrix", function(object) standardGeneric("nullMatrix")) 746 | 747 | #' @rdname nullMatrix 748 | #' @export 749 | setGeneric("nullMatrix<-", function(object, value) { 750 | standardGeneric("nullMatrix<-") 751 | }) 752 | 753 | #' Matrix representation of full model 754 | #' 755 | #' These generic functions access and set the full matrix for 756 | #' \code{\linkS4class{deSet}} object. 757 | #' 758 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}} 759 | #' @param value \code{matrix}: full model matrix where the columns are the 760 | #' covariates and rows are observations 761 | #' 762 | #' @return \code{fullMatrix} returns the value of the full model matrix. 763 | #' 764 | #' @examples 765 | #' # import data 766 | #' library(splines) 767 | #' data(kidney) 768 | #' age <- kidney$age 769 | #' sex <- kidney$sex 770 | #' kidexpr <- kidney$kidexpr 771 | #' cov <- data.frame(sex = sex, age = age) 772 | #' 773 | #' # create models 774 | #' null_model <- ~sex 775 | #' full_model <- ~sex + ns(age, df = 4) 776 | #' 777 | #' # create deSet object from data 778 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 779 | #' full.model = full_model) 780 | #' 781 | #' # extract the full model equation as a matrix 782 | #' mat_full <- fullMatrix(de_obj) 783 | #' @author Andrew Bass, John Storey 784 | #' 785 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{fullModel}} 786 | #' 787 | #' @export 788 | setGeneric("fullMatrix", function(object) standardGeneric("fullMatrix")) 789 | 790 | #' @rdname fullMatrix 791 | #' @export 792 | setGeneric("fullMatrix<-", function(object, value) { 793 | standardGeneric("fullMatrix<-") 794 | }) 795 | 796 | 797 | #' Access/set qvalue slot 798 | #' 799 | #' These generic functions access and set the \code{qvalue} object in the 800 | #' \code{\linkS4class{deSet}} object. 801 | #' 802 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}} 803 | #' @param value S3 \code{object}: \code{\link{qvalue}} 804 | #' 805 | #' @return \code{qvalueObj} returns a \code{\link{qvalue}} object. 806 | #' 807 | #' @examples 808 | #' # import data 809 | #' library(splines) 810 | #' library(qvalue) 811 | #' data(kidney) 812 | #' age <- kidney$age 813 | #' sex <- kidney$sex 814 | #' kidexpr <- kidney$kidexpr 815 | #' cov <- data.frame(sex = sex, age = age) 816 | #' 817 | #' # create models 818 | #' null_model <- ~sex 819 | #' full_model <- ~sex + ns(age, df = 4) 820 | #' 821 | #' # create deSet object from data 822 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 823 | #' full.model = full_model) 824 | #' 825 | #' # run the odp method 826 | #' de_odp <- odp(de_obj, bs.its = 20) 827 | #' 828 | #' # extract out significance results 829 | #' qval_obj <- qvalueObj(de_odp) 830 | #' 831 | #' # run qvalue and assign it to deSet slot 832 | #' pvals <- qval_obj$pvalues 833 | #' qval_new <- qvalue(pvals, pfdr = TRUE, fdr.level = 0.1) 834 | #' qvalueObj(de_odp) <- qval_new 835 | #' 836 | #' @author John Storey, Andrew Bass 837 | #' 838 | #' @seealso \code{\link{lrt}}, \code{\link{odp}} and 839 | #' \code{\linkS4class{deSet}} 840 | #' 841 | #' @export 842 | setGeneric("qvalueObj", function(object) standardGeneric("qvalueObj")) 843 | 844 | #' @rdname qvalueObj 845 | #' @export 846 | setGeneric("qvalueObj<-", function(object, value) { 847 | standardGeneric("qvalueObj<-") 848 | }) 849 | 850 | #' Individuals sampled in experiment 851 | #' 852 | #' These generic functions access and set the individual slot in 853 | #' \code{\linkS4class{deSet}}. 854 | #' 855 | #' @param object \code{\linkS4class{deSet}} 856 | #' @param value \code{factor}: Identifies which samples correspond to which 857 | #' individuals. Important if the same individuals are sampled multiple times 858 | #' in a longitudinal fashion. 859 | #' 860 | #' @return \code{individual} returns information regarding dinstinct individuals 861 | #' sampled in the experiment. 862 | #' 863 | #' @examples 864 | #' library(splines) 865 | #' # import data 866 | #' data(endotoxin) 867 | #' ind <- endotoxin$ind 868 | #' time <- endotoxin$time 869 | #' class <- endotoxin$class 870 | #' endoexpr <- endotoxin$endoexpr 871 | #' cov <- data.frame(individual = ind, time = time, class = class) 872 | #' 873 | #' # create ExpressionSet object 874 | #' pDat <- as(cov, "AnnotatedDataFrame") 875 | #' exp_set <- ExpressionSet(assayData = endoexpr, phenoData = pDat) 876 | #' 877 | #' # formulate null and full models in experiement 878 | #' # note: interaction term is a way of taking into account group effects 879 | #' mNull <- ~ns(time, df=4, intercept = FALSE) 880 | #' mFull <- ~ns(time, df=4, intercept = FALSE) + 881 | #' ns(time, df=4, intercept = FALSE):class + class 882 | #' 883 | #' # create deSet object 884 | #' de_obj <- deSet(exp_set, full.model = mFull, null.model = mNull, 885 | #' individual = ind) 886 | #' 887 | #' # extract out the individuals factor 888 | #' ind_exp <- individual(de_obj) 889 | #' 890 | #' @author John Storey, Andrew Bass 891 | #' 892 | #' @seealso \code{\linkS4class{deSet}} 893 | #' 894 | #' @export 895 | setGeneric("individual", function(object) standardGeneric("individual")) 896 | 897 | #' @rdname individual 898 | #' @export 899 | setGeneric("individual<-", function(object, value) { 900 | standardGeneric("individual<-") 901 | }) 902 | 903 | #' Regression coefficients from full model fit 904 | #' 905 | #' Access the full model fitted coefficients of a 906 | #' \code{\linkS4class{deFit}} object. 907 | #' 908 | #' @param object \code{S4 object}: \code{\linkS4class{deFit}} 909 | #' 910 | #' @return \code{betaCoef} returns the regression coefficients for the full 911 | #' model fit. 912 | #' 913 | #' @author John Storey, Andrew Bass 914 | #' 915 | #' @seealso \code{\link{fit_models}} 916 | #' 917 | #' @examples 918 | #' # import data 919 | #' library(splines) 920 | #' data(kidney) 921 | #' age <- kidney$age 922 | #' sex <- kidney$sex 923 | #' kidexpr <- kidney$kidexpr 924 | #' cov <- data.frame(sex = sex, age = age) 925 | #' 926 | #' # create models 927 | #' null_model <- ~sex 928 | #' full_model <- ~sex + ns(age, df = 4) 929 | #' 930 | #' # create deSet object from data 931 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 932 | #' full.model = full_model) 933 | #' 934 | #' # run fit_models to get model fits 935 | #' de_fit <- fit_models(de_obj) 936 | #' 937 | #' # extract beta coefficients 938 | #' beta <- betaCoef(de_fit) 939 | #' 940 | #' @export 941 | setGeneric("betaCoef", function(object) standardGeneric("betaCoef")) 942 | 943 | #' Statistic type used in analysis 944 | #' 945 | #' Access the statistic type in a \code{\linkS4class{deFit}} object. Can 946 | #' either be the optimal discovery procedure (odp) or the likelihood ratio 947 | #' test (lrt). 948 | #' 949 | #' @param object \code{S4 object}: \code{\linkS4class{deFit}} 950 | #' 951 | #' @examples 952 | #' # import data 953 | #' library(splines) 954 | #' data(kidney) 955 | #' age <- kidney$age 956 | #' sex <- kidney$sex 957 | #' kidexpr <- kidney$kidexpr 958 | #' cov <- data.frame(sex = sex, age = age) 959 | #' 960 | #' # create models 961 | #' null_model <- ~sex 962 | #' full_model <- ~sex + ns(age, df = 4) 963 | #' 964 | #' # create deSet object from data 965 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 966 | #' full.model = full_model) 967 | #' 968 | #' # run fit_models to get model fits 969 | #' de_fit <- fit_models(de_obj) 970 | #' 971 | #' # extract the statistic type of model fits 972 | #' stat_type <- sType(de_fit) 973 | #' 974 | #' @return \code{sType} returns the statistic type- either "odp" or "lrt". 975 | #' 976 | #' @author John Storey, Andrew Bass 977 | #' 978 | #' @seealso \code{\link{fit_models}}, \code{\linkS4class{deFit}} and 979 | #' \code{\linkS4class{deSet}} 980 | #' 981 | #' @keywords sType 982 | #' 983 | #' @exportMethod sType 984 | setGeneric("sType", function(object) standardGeneric("sType")) 985 | 986 | #' Fitted data from the full model 987 | #' 988 | #' Access the fitted data from the full model in a 989 | #' \code{\linkS4class{deFit}} object. 990 | #' 991 | #' @param object \code{S4 object}: \code{\linkS4class{deFit}} 992 | #' 993 | #' @usage fitFull(object) 994 | #' 995 | #' @return \code{fitFull} returns a matrix of fitted values from full model. 996 | #' 997 | #' @examples 998 | #' # import data 999 | #' library(splines) 1000 | #' data(kidney) 1001 | #' age <- kidney$age 1002 | #' sex <- kidney$sex 1003 | #' kidexpr <- kidney$kidexpr 1004 | #' cov <- data.frame(sex = sex, age = age) 1005 | #' 1006 | #' # create models 1007 | #' null_model <- ~sex 1008 | #' full_model <- ~sex + ns(age, df = 4) 1009 | #' 1010 | #' # create deSet object from data 1011 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 1012 | #' full.model = full_model) 1013 | #' 1014 | #' # run fit_models to get model fits 1015 | #' de_fit <- fit_models(de_obj) 1016 | #' 1017 | #' # extract fitted values for full model 1018 | #' fitted_full <- fitFull(de_fit) 1019 | #' 1020 | #' @author John Storey, Andrew Bass 1021 | #' 1022 | #' @seealso \code{\link{fit_models}} 1023 | #' 1024 | #' @export 1025 | setGeneric("fitFull", function(object) standardGeneric("fitFull")) 1026 | 1027 | #' Fitted data from the null model 1028 | #' 1029 | #' Access the fitted data from the null model in an 1030 | #' \code{\linkS4class{deFit}} object. 1031 | #' 1032 | #' @param object \code{S4 object}: \code{\linkS4class{deFit}} 1033 | #' 1034 | #' @usage fitNull(object) 1035 | #' 1036 | #' @return \code{fitNull} returns a matrix of fitted values from null model. 1037 | #' 1038 | #' @examples 1039 | #' # import data 1040 | #' library(splines) 1041 | #' data(kidney) 1042 | #' age <- kidney$age 1043 | #' sex <- kidney$sex 1044 | #' kidexpr <- kidney$kidexpr 1045 | #' cov <- data.frame(sex = sex, age = age) 1046 | #' 1047 | #' # create models 1048 | #' null_model <- ~sex 1049 | #' full_model <- ~sex + ns(age, df = 4) 1050 | #' 1051 | #' # create deSet object from data 1052 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 1053 | #' full.model = full_model) 1054 | #' 1055 | #' # run fit_models to get model fits 1056 | #' de_fit <- fit_models(de_obj) 1057 | #' 1058 | #' # extract fitted values from null model 1059 | #' fitted_null <- fitNull(de_fit) 1060 | #' 1061 | #' @author John Storey, Andrew Bass 1062 | #' 1063 | #' @seealso \code{\link{fit_models}} 1064 | #' 1065 | #' @export 1066 | setGeneric("fitNull", function(object) standardGeneric("fitNull")) 1067 | 1068 | #' Residuals of full model fit 1069 | #' 1070 | #' Access the fitted full model residuals in an \code{\linkS4class{deFit}} 1071 | #' object. 1072 | #' 1073 | #' @param object \code{S4 object}: \code{\linkS4class{deFit}} 1074 | #' 1075 | #' @usage resFull(object) 1076 | #' 1077 | #' @return \code{resFull} returns a matrix of residuals from full model. 1078 | #' 1079 | #' @examples 1080 | #' # import data 1081 | #' library(splines) 1082 | #' data(kidney) 1083 | #' age <- kidney$age 1084 | #' sex <- kidney$sex 1085 | #' kidexpr <- kidney$kidexpr 1086 | #' cov <- data.frame(sex = sex, age = age) 1087 | #' 1088 | #' # create models 1089 | #' null_model <- ~sex 1090 | #' full_model <- ~sex + ns(age, df = 4) 1091 | #' 1092 | #' # create deSet object from data 1093 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 1094 | #' full.model = full_model) 1095 | #' 1096 | #' # run fit_models to get model fits 1097 | #' de_fit <- fit_models(de_obj) 1098 | #' 1099 | #' # extract out the full residuals from the model fit 1100 | #' res_full <- resFull(de_fit) 1101 | #' 1102 | #' @author John Storey, Andrew Bass 1103 | #' 1104 | #' @seealso \code{\link{fit_models}} 1105 | #' 1106 | #' @export 1107 | setGeneric("resFull", function(object) standardGeneric("resFull")) 1108 | 1109 | #' Residuals of null model fit 1110 | #' 1111 | #' Access the fitted null model residuals in an \code{\linkS4class{deFit}} 1112 | #' object. 1113 | #' 1114 | #' @param object \code{S4 object}: \code{\linkS4class{deFit}} 1115 | #' 1116 | #' @usage resNull(object) 1117 | #' 1118 | #' @return \code{resNull} returns a matrix of residuals from null model. 1119 | #' 1120 | #' @examples 1121 | #' # import data 1122 | #' library(splines) 1123 | #' data(kidney) 1124 | #' age <- kidney$age 1125 | #' sex <- kidney$sex 1126 | #' kidexpr <- kidney$kidexpr 1127 | #' cov <- data.frame(sex = sex, age = age) 1128 | #' 1129 | #' # create models 1130 | #' null_model <- ~sex 1131 | #' full_model <- ~sex + ns(age, df = 4) 1132 | #' 1133 | #' # create deSet object from data 1134 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 1135 | #' full.model = full_model) 1136 | #' 1137 | #' # run fit_models to get model fits 1138 | #' de_fit <- fit_models(de_obj) 1139 | #' 1140 | #' # extract out the null residuals from the model fits 1141 | #' res_null <- resNull(de_fit) 1142 | #' @author John Storey, Andrew Bass 1143 | #' 1144 | #' @seealso \code{\link{fit_models}} 1145 | #' 1146 | #' @export 1147 | setGeneric("resNull", function(object) standardGeneric("resNull")) 1148 | 1149 | #' Summary of deFit and deSet 1150 | #' 1151 | #' Summary of \code{\linkS4class{deFit}} and \code{\linkS4class{deSet}} objects. 1152 | #' 1153 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}} 1154 | #' @param \dots additional parameters 1155 | #' 1156 | #' @examples 1157 | #' # import data 1158 | #' library(splines) 1159 | #' data(kidney) 1160 | #' age <- kidney$age 1161 | #' sex <- kidney$sex 1162 | #' kidexpr <- kidney$kidexpr 1163 | #' cov <- data.frame(sex = sex, age = age) 1164 | #' 1165 | #' # create models 1166 | #' null_model <- ~sex 1167 | #' full_model <- ~sex + ns(age, df = 4) 1168 | #' 1169 | #' # create deSet object from data 1170 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 1171 | #' full.model = full_model) 1172 | #' 1173 | #' # get summary 1174 | #' summary(de_obj) 1175 | #' 1176 | #' # run odp and summarize 1177 | #' de_odp <- odp(de_obj, bs.its= 20) 1178 | #' summary(de_odp) 1179 | #' @author John Storey, Andrew Bass 1180 | #' 1181 | #' @return 1182 | #' Summary of \code{\linkS4class{deSet}} object 1183 | #' 1184 | #' @keywords summary 1185 | #' 1186 | #' @export summary 1187 | setGeneric("summary") 1188 | 1189 | #' Show function for deFit and deSet 1190 | #' 1191 | #' Show function for \code{\linkS4class{deFit}} and \code{\linkS4class{deSet}} 1192 | #' objects. 1193 | #' 1194 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}} 1195 | #' @param \dots additional parameters 1196 | #' 1197 | #' @examples 1198 | #' # import data 1199 | #' library(splines) 1200 | #' data(kidney) 1201 | #' age <- kidney$age 1202 | #' sex <- kidney$sex 1203 | #' kidexpr <- kidney$kidexpr 1204 | #' cov <- data.frame(sex = sex, age = age) 1205 | #' 1206 | #' # create models 1207 | #' null_model <- ~sex 1208 | #' full_model <- ~sex + ns(age, df = 4) 1209 | #' 1210 | #' # create deSet object from data 1211 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 1212 | #' full.model = full_model) 1213 | #' 1214 | #' # get summary 1215 | #' summary(de_obj) 1216 | #' 1217 | #' # run odp and summarize 1218 | #' de_odp <- odp(de_obj, bs.its= 20) 1219 | #' de_odp 1220 | #' @author John Storey, Andrew Bass 1221 | #' 1222 | #' @return 1223 | #' Nothing of interest 1224 | #' 1225 | #' @export 1226 | setGeneric("show") 1227 | -------------------------------------------------------------------------------- /R/ExpressionSet-methods.R: -------------------------------------------------------------------------------- 1 | setAs("ExpressionSet", "deSet", function(from, to) updateOldExpSet(from, "deSet")) 2 | 3 | updateOldExpSet <- function(from, toClass, ...) { # to deSet 4 | # new object 5 | object <- new(toClass, 6 | assayData = from@assayData, 7 | phenoData = from@phenoData, 8 | featureData = annotatedDataFrameFrom(from@assayData, 9 | byrow = TRUE), 10 | experimentData = from@experimentData, 11 | annotation = from@annotation) 12 | validObject(object) 13 | object 14 | } 15 | #' @rdname deSet 16 | setMethod("deSet", 17 | signature = signature(object = "ExpressionSet"), 18 | function(object, 19 | full.model, 20 | null.model, 21 | individual = NULL) { 22 | deObj <- as(object, "deSet") 23 | # Input checks 24 | if (!is.null(individual)) { 25 | if (length(individual) != ncol(exprs(object))) { 26 | stop("ind must be the same length as the number of arrays") 27 | } 28 | } 29 | if (missing(full.model) || missing(null.model)) { 30 | stop("provide both full and null models") 31 | } 32 | createSet(deObj, 33 | nMod = null.model, 34 | fMod= full.model, 35 | ind = individual) 36 | }) 37 | -------------------------------------------------------------------------------- /R/deFit-methods.R: -------------------------------------------------------------------------------- 1 | #' @rdname summary 2 | setMethod("summary", 3 | signature=signature(object="deFit"), 4 | function(object) { 5 | cat('\n'); cat('deFit Summary', '\n', '\n') 6 | # cat('Models:', '\n') 7 | # print(object@fitted.models) 8 | cat('fit.full:', '\n') 9 | print(signif(object@fit.full[(1:min(3, nrow(object@fit.full))), ]), digits=3) 10 | cat('\nfit.null:', '\n') 11 | print(signif(object@fit.null[(1:min(3, nrow(object@fit.null))), ]), digits=3) 12 | cat('\nres.full:', '\n') 13 | print(signif(object@res.full[(1:min(3, nrow(object@res.full))), ]), digits=3) 14 | cat('\nres.null:', '\n') 15 | print(signif(object@res.null[(1:min(3, nrow(object@res.null))), ]), digits=3) 16 | cat('\nbeta.coef:', '\n') 17 | print(signif(object@beta.coef[(1:min(3, nrow(object@beta.coef))), ]), digits=3) 18 | cat('\nstat.type:', '\n') 19 | print(object@stat.type) 20 | }) 21 | #'@rdname show 22 | setMethod("show", 23 | signature=signature(object="deFit"), 24 | function(object) { 25 | cat('\n'); cat('deFit Summary', '\n', '\n') 26 | # cat('Models:', '\n') 27 | # print(object@fitted.models) 28 | cat('fit.full:', '\n') 29 | print(signif(object@fit.full[(1:min(2, nrow(object@fit.full))), ]), digits=3) 30 | cat('\nfit.null:', '\n') 31 | print(signif(object@fit.null[(1:min(2, nrow(object@fit.null))), ]), digits=3) 32 | cat('\nres.full:', '\n') 33 | print(signif(object@res.full[(1:min(2, nrow(object@res.full))), ]), digits=3) 34 | cat('\nres.null:', '\n') 35 | print(signif(object@res.null[(1:min(2, nrow(object@res.null))), ]), digits=3) 36 | cat('\nbeta.coef:', '\n') 37 | print(signif(object@beta.coef[(1:min(5, nrow(object@beta.coef))), ]), digits=3) 38 | cat('\nstat.type:', '\n') 39 | print(object@stat.type) 40 | }) 41 | -------------------------------------------------------------------------------- /R/deSet-methods.R: -------------------------------------------------------------------------------- 1 | #' @rdname fit_models 2 | setMethod("fit_models", 3 | "deSet", 4 | function(object, stat.type = c("lrt", "odp"), weights = NULL) { 5 | # Initializations 6 | if (!is.null(weights)) return(fit_wmodels(object, stat.type = stat.type, w = weights)) 7 | stat.var <- match.arg(stat.type, c("lrt", "odp")) 8 | exprsData <- exprs(object) 9 | n <- ncol(exprsData) 10 | null.matrix <- object@null.matrix 11 | full.matrix <- object@full.matrix 12 | # Rescale if there are group individual factors 13 | if (length(object@individual) != 0) { 14 | ind.matrix <- model.matrix(~-1 + as.factor(object@individual)) 15 | Hi <- projMatrix(ind.matrix) 16 | fitInd <- t(Hi %*% t(exprsData)) 17 | exprsData <- exprsData - fitInd 18 | full.matrix <- full.matrix - Hi %*% full.matrix 19 | null.matrix <- null.matrix - Hi %*% null.matrix 20 | full.matrix <- rm.zero.cols(full.matrix) 21 | null.matrix <- rm.zero.cols(null.matrix) 22 | } 23 | # Fitted exprsData and statistics under null model and full model 24 | H.null <- projMatrix(null.matrix) 25 | fitNull <- t(H.null %*% t(exprsData)) 26 | resNull <- exprsData - fitNull 27 | if (stat.var == "odp") { 28 | full.matrix <- full.matrix - H.null %*% full.matrix 29 | full.matrix <- rm.zero.cols(full.matrix) 30 | H.full <- projMatrix(full.matrix) 31 | B.coef <- resNull %*% full.matrix %*% ginv(t(full.matrix) %*% full.matrix) 32 | dHFull <- diag(H.full) 33 | fitFull <- t(H.full %*% t(resNull)) 34 | resFull <- resNull - fitFull 35 | } else { 36 | H.full <- projMatrix(full.matrix) 37 | dHFull <- diag(H.full) 38 | B.coef <- exprsData %*% full.matrix %*% ginv(t(full.matrix) %*% full.matrix) 39 | fitFull <- t(H.full %*% t(exprsData)) 40 | resFull <- exprsData - fitFull 41 | } 42 | efObj <- new("deFit", fit.full = fitFull, fit.null = fitNull, 43 | dH.full = matrix(dHFull, nrow = nrow(resFull), ncol = length(dHFull), byrow = T), res.full = resFull, 44 | res.null = resNull, beta.coef = B.coef, 45 | stat.type = stat.var) 46 | return(efObj) 47 | }) 48 | 49 | #' @rdname odp 50 | setMethod("odp", 51 | signature = signature(object = "deSet", de.fit = "missing"), 52 | function(object, de.fit, odp.parms = NULL, weights = NULL, bs.its = 100, 53 | n.mods = 50, seed = NULL, verbose = TRUE, ...) { 54 | de.fit <- fit_models(object, 55 | stat.type = "odp", weights = weights) 56 | results <- odp(object, de.fit, 57 | odp.parms = odp.parms, 58 | n.mods = n.mods, 59 | bs.its = bs.its, 60 | seed = seed, 61 | verbose = verbose, ...) 62 | return(results) 63 | }) 64 | 65 | #' @rdname odp 66 | setMethod("odp", 67 | signature = signature(object = "deSet", de.fit = "deFit"), 68 | function(object, de.fit, odp.parms = NULL, weights = NULL, bs.its = 100, 69 | n.mods = 50, seed = NULL, verbose = TRUE, ...) { 70 | if (!is.null(seed)) { 71 | set.seed(seed) 72 | } 73 | if (is.null(odp.parms)) { 74 | odp.parms <- kl_clust(object, de.fit = de.fit, 75 | n.mods = n.mods) 76 | } else if (sum(!(names(odp.parms) %in% c("mu.full", "sig.full", 77 | "mu.null", "sig.null", 78 | "n.per.mod", 79 | "clustMembers"))) != 0) { 80 | stop("Not a correct ODP parameter list. See kl_clust documentation") 81 | } 82 | odp.stat <- odpStat(n.res = de.fit@res.null, 83 | clustParms = odp.parms) 84 | null.stat <- bootstrap(object = object, 85 | obs.fit = de.fit, 86 | clustParms = odp.parms, 87 | bs.its = bs.its, 88 | verbose = verbose) 89 | pval <- empPvals(stat = odp.stat, 90 | stat0 = null.stat) 91 | qval <- qvalue(pval, ...) 92 | qval$stat0 <- null.stat 93 | qval$stat <- odp.stat 94 | qvalueObj(object) <- qval 95 | return(object) 96 | }) 97 | 98 | #' @rdname lrt 99 | setMethod("lrt", 100 | signature = signature(object = "deSet", de.fit = "missing"), 101 | function(object, de.fit, nullDistn = c("normal", "bootstrap"), weights = NULL, 102 | bs.its = 100, seed = NULL, verbose = TRUE, mod.F = FALSE, ...) { 103 | de.fit <- fit_models(object, 104 | stat.type = "lrt", weights = weights) 105 | results <- lrt(object, 106 | de.fit = de.fit, 107 | nullDistn = nullDistn, 108 | bs.its = bs.its, 109 | seed = seed, 110 | verbose = verbose, 111 | mod.F = mod.F, ...) 112 | return(results) 113 | }) 114 | 115 | #' @rdname lrt 116 | setMethod("lrt", 117 | signature = signature(object = "deSet", de.fit = "deFit"), 118 | function(object, de.fit, nullDistn = c("normal", "bootstrap"), weights = NULL, 119 | bs.its = 100, seed = NULL, verbose = TRUE, mod.F = FALSE, ...) { 120 | # Initilizations 121 | nFull <- ncol(object@full.matrix) 122 | nNull <- ncol(object@null.matrix) 123 | n <- ncol(object) 124 | m <- nrow(object) 125 | post.var <- out<- NULL 126 | if (!is.null(seed)) { 127 | set.seed(seed) 128 | } 129 | nullDistn <- match.arg(nullDistn, c("normal", "bootstrap")) 130 | # lrt observed stat 131 | if (mod.F) { 132 | df_full <- n - nFull 133 | var_full <- rowSums(de.fit@res.full ^ 2) / df_full 134 | out <- squeezeVar(var_full, df_full, covariate = rowMeans(exprs(object))) 135 | post.var <- out$var.post 136 | prior.df <- out$df.prior 137 | df2 = (n - nFull) + prior.df 138 | } 139 | stat <- lrtStat(resNull = de.fit@res.null, 140 | resFull = de.fit@res.full, 141 | post.var = post.var) 142 | # If nullDistn is normal then return p-values from F-test else 143 | # return empirical p-values from qvalue package 144 | if (nullDistn == "normal") { 145 | if (mod.F) { 146 | df1 <- nFull - nNull 147 | stat = stat / df1 148 | } else { 149 | df1 = nFull - nNull 150 | df2 = n - nFull 151 | stat = stat * df2 / df1 152 | } 153 | pval <- pf(q = stat, df1 = df1, df2 = df2, lower.tail = FALSE) 154 | qval <- qvalue(pval, ...) 155 | qval$stat <- stat 156 | qval$df2 <- df2 157 | qval$df1 <- df1 158 | qvalueObj(object) <- qval 159 | return(object) 160 | } else { 161 | null.stat <- bootstrap(object = object, 162 | obs.fit = de.fit, 163 | bs.its = bs.its, 164 | verbose = verbose, 165 | mod.F = mod.F, 166 | post.var = out) 167 | pval <- empPvals(stat = stat, 168 | stat0 = null.stat) 169 | qval <- qvalue(pval, ...) 170 | qval$stat0 <- null.stat 171 | qval$stat <- stat 172 | qvalueObj(object) <- qval 173 | return(object) 174 | } 175 | }) 176 | 177 | #' @rdname kl_clust 178 | setMethod("kl_clust", 179 | signature = signature(object = "deSet", de.fit = "missing"), 180 | function(object, de.fit, n.mods = 50) { 181 | de.fit <- fit_models(object, stat.type = "odp") 182 | results <- kl_clust(object, de.fit, 183 | n.mods = n.mods) 184 | return(results) 185 | }) 186 | 187 | #' @rdname kl_clust 188 | setMethod("kl_clust", 189 | signature = signature(object = "deSet", de.fit = "deFit"), 190 | function(object, de.fit, n.mods = 50) { 191 | nf <- mod.df(object@full.matrix) 192 | nn <- mod.df(object@null.matrix) 193 | mod.member <- klmod(de.fit, nf = nf, 194 | n.mods = n.mods) 195 | return(mod.parms(de.fit, nf = nf, nn = nn, 196 | clMembers = mod.member)) 197 | }) 198 | 199 | #' @rdname summary 200 | setMethod("summary", 201 | signature = signature(object="deSet"), 202 | function(object, ...) { 203 | cat('\n') 204 | cat('ExpressionSet Summary', '\n', '\n') 205 | tmp <- as(object, "ExpressionSet") 206 | print(tmp) 207 | cat('\n') 208 | cat('de Analysis Summary', '\n', '\n') 209 | cat('Total number of arrays:', ncol(exprs(object)), '\n') 210 | cat('Total number of probes:', nrow(exprs(object)), '\n', '\n') 211 | cat('Biological variables:', '\n') 212 | cat('\tNull Model:') 213 | print(nullModel(object)) 214 | cat('\n\tFull Model:') 215 | print(fullModel(object)) 216 | cat('\n') 217 | if (length(object@individual) != 0) { 218 | cat('Individuals:', '\n') 219 | ind <- as.numeric(object@individual) 220 | print(individual(object)) 221 | cat('\n') 222 | } 223 | cat('.......', '\n', '\n') 224 | if (!is.null(object@qvalueObj$pvalues)) { 225 | cuts <- c(0.0001, 0.001, 0.01, 0.025, 0.05, 0.10, 1) 226 | digits <- getOption("digits") 227 | cat("\nStatistical significance summary:\n") 228 | cat("pi0:", format(object@qvalueObj$pi0, digits = digits), 229 | "\n", sep = "\t") 230 | cat("\n") 231 | cat("Cumulative number of significant calls:\n") 232 | cat("\n") 233 | counts <- sapply(cuts, function(x) c("p-value" = sum(object@qvalueObj$pvalues < x), 234 | "q-value" = sum(object@qvalueObj$qvalues < x), 235 | "local fdr" = sum(object@qvalueObj$lfdr < x))) 236 | colnames(counts) <- paste("<", cuts, sep = "") 237 | print(counts) 238 | cat("\n") 239 | } 240 | }) 241 | #' @rdname show 242 | setMethod("show", 243 | signature = signature(object="deSet"), 244 | function(object) { 245 | cat('\n') 246 | cat('ExpressionSet Summary', '\n', '\n') 247 | tmp <- as(object, "ExpressionSet") 248 | print(tmp) 249 | cat('\n') 250 | cat('de Analysis Summary', '\n', '\n') 251 | cat('Total number of arrays:', ncol(exprs(object)), '\n') 252 | cat('Total number of probes:', nrow(exprs(object)), '\n', '\n') 253 | cat('Biological variables:', '\n') 254 | cat('\tNull Model: ') 255 | print(nullModel(object)) 256 | cat('\tFull Model: ') 257 | print(fullModel(object)) 258 | cat('\n') 259 | if (length(object@individual) != 0) { 260 | cat('Individuals:', '\n') 261 | ind <- as.numeric(object@individual) 262 | print(matrix(apply(((1:length(ind)) * t((ind))), 2, sum), 263 | nrow=1)) 264 | cat('\n') 265 | } 266 | cat('Expression data:', '\n') 267 | print(signif(exprs(object)[(1:min(5, nrow(exprs(object)))), ]), 268 | digits = 3) 269 | cat('.......','\n','\n') 270 | if (!is.null(object@qvalueObj$pvalues)) { 271 | cuts <- c(0.0001, 0.001, 0.01, 0.025, 0.05, 0.10, 1) 272 | digits <- getOption("digits") 273 | cat("\nStatistical significance summary:\n") 274 | cat("pi0:", format(object@qvalueObj$pi0, digits = digits), "\n", 275 | sep = "\t") 276 | cat("\n") 277 | cat("Cumulative number of significant calls:\n") 278 | cat("\n") 279 | counts <- sapply(cuts, function(x) c("p-value" = sum(object@qvalueObj$pvalues < x), 280 | "q-value" = sum(object@qvalueObj$qvalues < x), 281 | "local fdr" = sum(object@qvalueObj$lfdr < x))) 282 | colnames(counts) <- paste("<", cuts, sep="") 283 | print(counts) 284 | cat("\n") 285 | } 286 | }) 287 | 288 | #' @rdname apply_qvalue 289 | setMethod("apply_qvalue", 290 | signature = signature(object="deSet"), 291 | function(object, ...) { 292 | if (length(object@qvalueObj) == 0) { 293 | stop("qvalueObj is empty- need to run either odp or lrt") 294 | } 295 | qvalueObj(object) <- qvalue(object@qvalueObj$pvalues, ...) 296 | validObject(object) 297 | object 298 | }) 299 | 300 | #' @rdname apply_sva 301 | setMethod("apply_sva", 302 | signature = signature(object="deSet"), 303 | function(object, ...) { 304 | full.matrix <- object@full.matrix 305 | null.matrix <- object@null.matrix 306 | sv.sva <- sva(exprs(object), 307 | mod0 = null.matrix, 308 | mod = full.matrix, ...)$sv 309 | colnames(sv.sva) <- paste("SV", 1:ncol(sv.sva), sep="") 310 | pData(object) <- cbind(pData(object), sv.sva) 311 | fullModel(object) <- as.formula(paste("~", 312 | paste(c(colnames(sv.sva), 313 | attr(terms(fullModel(object)), 314 | "term.labels")), 315 | collapse=" + "), 316 | sep="")) 317 | nullModel(object) <- as.formula(paste("~",paste(c(colnames(sv.sva), 318 | attr(terms(nullModel(object)), 319 | "term.labels")), 320 | collapse=" + "), 321 | sep="")) 322 | validObject(object) 323 | object 324 | }) 325 | 326 | #' @rdname apply_snm 327 | setMethod("apply_snm", 328 | signature = signature(object="deSet"), 329 | function(object, int.var=NULL, ...) { 330 | full.matrix <- object@full.matrix 331 | null.matrix <- object@null.matrix 332 | full.matrix <- full.matrix - projMatrix(null.matrix) %*% full.matrix 333 | full.matrix <- as.matrix(rm.zero.cols(full.matrix)) 334 | if(is.null(int.var)) { 335 | int.var <- 1:ncol(exprs(object)) 336 | warning("Setting int.var=1:n where n is number of samples.") 337 | } 338 | exprs(object) <- snm(exprs(object), 339 | bio.var = full.matrix, 340 | adj.var = null.matrix, 341 | int.var = int.var, ...)$norm.dat 342 | validObject(object) 343 | object 344 | }) 345 | 346 | 347 | #' @rdname apply_jackstraw 348 | setMethod("apply_jackstraw", 349 | signature = signature(object="deSet"), 350 | function(object, r1 = NULL, r = NULL, s = NULL, B = NULL, 351 | covariate = NULL, verbose = TRUE, seed = NULL) { 352 | dat <- exprs(object) 353 | js <- jackstraw::jackstraw_pca(dat, r1 = r1, r = r, s = s, B = B, 354 | covariate = covariate, verbose = verbose, seed = seed) 355 | return(js) 356 | }) 357 | -------------------------------------------------------------------------------- /R/edge.R: -------------------------------------------------------------------------------- 1 | #' @title 2 | #' Extraction of Differential Gene Expression 3 | #' 4 | #' @description 5 | #' The edge package implements methods for carrying out differential 6 | #' expression analyses of genome-wide gene expression studies. Significance 7 | #' testing using the optimal discovery procedure and generalized likelihood 8 | #' ratio tests (equivalent to F-tests and t-tests) are implemented for general study 9 | #' designs. Special functions are available to facilitate the analysis of 10 | #' common study designs, including time course experiments. Other packages 11 | #' such as snm, sva, and qvalue are integrated in edge to provide a wide range 12 | #' of tools for gene expression analysis. 13 | #' 14 | #' @examples 15 | #' \dontrun{ 16 | #' browseVignettes("edge") 17 | #' } 18 | #' @name edge 19 | #' @author John Storey, Jeffrey Leek, Andrew Bass 20 | #' @docType package 21 | #' @import Biobase methods splines sva snm qvalue MASS 22 | #' @useDynLib edge odpScoreCluster kldistance 23 | NULL 24 | 25 | #' @title Gene expression dataset from Calvano et al. (2005) Nature 26 | #' 27 | #' @description 28 | #' The data provide gene expression measurements in an endotoxin study where 29 | #' four subjects were given endotoxin and four subjects were given a placebo. 30 | #' Blood samples were collected and leukocytes were isolated from the samples 31 | #' before infusion and at times 2, 4, 6, 9, 24 hours. 32 | #' 33 | #' @usage data(endotoxin) 34 | #' @format 35 | #' \itemize{ 36 | #' \item endoexpr: A 500 rows by 46 columns data frame containing expression 37 | #' values. 38 | #' \item class: A vector of length 46 containing information about which 39 | #' individuals were given endotoxin. 40 | #' \item ind: A vector of length 46 providing indexing measurements for each 41 | #' individual in the experiment. 42 | #' \item time: A vector of length 46 indicating time measurements. 43 | #' } 44 | #' 45 | #' @note 46 | #' The data is a random subset of 500 genes from the full dataset. To 47 | #' download the full data set, go to \url{http://genomine.org/edge/}. 48 | #' 49 | #' @references 50 | #' Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance 51 | #' analysis of time course microarray experiments. PNAS, 102: 12837-12842. \cr 52 | #' \url{http://www.pnas.org/content/100/16/9440.full} 53 | #' 54 | #' @examples 55 | #' library(splines) 56 | #' # import data 57 | #' data(endotoxin) 58 | #' ind <- endotoxin$ind 59 | #' class <- endotoxin$class 60 | #' time <- endotoxin$time 61 | #' endoexpr <- endotoxin$endoexpr 62 | #' cov <- data.frame(individual = ind, time = time, class = class) 63 | #' 64 | #' # formulate null and full models in experiement 65 | #' # note: interaction term is a way of taking into account group effects 66 | #' mNull <- ~ns(time, df=4, intercept = FALSE) + class 67 | #' mFull <- ~ns(time, df=4, intercept = FALSE) + 68 | #' ns(time, df=4, intercept = FALSE):class + class 69 | #' 70 | #' # create deSet object 71 | #' de_obj <- build_models(endoexpr, cov = cov, full.model = mFull, 72 | #' null.model = mNull, ind = ind) 73 | #' 74 | #' # Perform ODP/lrt statistic to determine significant genes in study 75 | #' de_odp <- odp(de_obj, bs.its = 10) 76 | #' de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 10) 77 | #' 78 | #' # summarize significance results 79 | #' summary(de_odp) 80 | #' @name endotoxin 81 | #' @return endotoxin dataset 82 | #' @docType data 83 | #' @keywords datasets 84 | NULL 85 | 86 | #' @title Gene expression dataset from Rodwell et al. (2004) 87 | #' 88 | #' @usage 89 | #' data(kidney) 90 | #' 91 | #' @description 92 | #' Gene expression measurements from kidney samples were obtained from 72 93 | #' human subjects ranging in age from 27 to 92 years. Only one array was 94 | #' obtained per individual, and the age and sex of each individual were 95 | #' recorded. 96 | #' 97 | #' @format 98 | #' \itemize{ 99 | #' \item kidcov: A 133 rows by 6 columns data frame detailing the study 100 | #' design. 101 | #' \item kidexpr: A 500 rows by 133 columns matrix of gene expression values, 102 | #' where each row corresponds to a different probe-set and each column to a 103 | #' different tissue sample. 104 | #' \item age: A vector of length 133 giving the age of each sample. 105 | #' \item sex: A vector of length 133 giving the sex of each sample. 106 | #' } 107 | #' @note 108 | #' These data are a random subset of 500 probe-sets from the total number of 109 | #' probe-sets in the original data set. To download the full data set, go to 110 | #' \url{http://genomine.org/edge/}. The \code{age} and \code{sex} are contained 111 | #' in \code{kidcov} data frame. 112 | #' 113 | #' @references 114 | #' Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance 115 | #' analysis of time course microarray experiments. PNAS, 102: 12837-12842. \cr 116 | #' \url{http://www.pnas.org/content/100/16/9440.full} 117 | #' 118 | #' @examples 119 | #' # import data 120 | #' data(kidney) 121 | #' sex <- kidney$sex 122 | #' age <- kidney$age 123 | #' kidexpr <- kidney$kidexpr 124 | #' 125 | #' # create model 126 | #' de_obj <- build_study(data = kidexpr, adj.var = sex, tme = age, 127 | #' sampling = "timecourse", basis.df = 4) 128 | #' 129 | #' # use the ODP/lrt method to determine significant genes 130 | #' de_odp <- odp(de_obj, bs.its=10) 131 | #' de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 10) 132 | #' 133 | #' # summarize significance results 134 | #' summary(de_odp) 135 | #' @name kidney 136 | #' @return kidney dataset 137 | #' @docType data 138 | #' @keywords datasets 139 | NULL 140 | 141 | #' @title Gene expression dataset from Idaghdour et al. (2008) 142 | #' 143 | #' @usage 144 | #' data(gibson) 145 | #' 146 | #' @description 147 | #' The data provide gene expression measurements in peripheral blood leukocyte 148 | #' samples from three Moroccan groups leading distinct ways of life: 149 | #' desert nomadic (DESERT), mountain agrarian (VILLAGE), and coastal urban 150 | #' (AGADIR). 151 | #' 152 | #' @format 153 | #' \itemize{ 154 | #' \item batch: Batches in experiment. 155 | #' \item location: Environment/lifestyle of Moroccan Amazigh groups. 156 | #' \item gender: Sex of individuals. 157 | #' \item gibexpr: A 500 rows by 46 columns matrix of gene expression values. 158 | #' } 159 | #' 160 | #' @note 161 | #' These data are a random subset of 500 genes from the total number of genes 162 | #' in the original data set. To download the full data set, go to 163 | #' \url{http://genomine.org/de/}. 164 | #' 165 | #' @references 166 | #' Idaghdour Y, Storey JD, Jadallah S, and Gibson G. (2008) A genome-wide gene 167 | #' expression signature of lifestyle in peripheral blood of Moroccan Amazighs. 168 | #' PLoS Genetics, 4: e1000052. 169 | #' 170 | #' @examples 171 | #' # import 172 | #' data(gibson) 173 | #' batch <- gibson$batch 174 | #' gender <- gibson$gender 175 | #' location <- gibson$location 176 | #' gibexpr <- gibson$gibexpr 177 | #' cov <- data.frame(Batch = batch, Gender = gender, 178 | #' Location = location) 179 | #' 180 | #' # create deSet for experiment- static experiment 181 | #' mNull <- ~Gender + Batch 182 | #' mFull <- ~Gender + Batch + Location 183 | #' 184 | #' # create deSet object 185 | #' de_obj <- build_models(gibexpr, cov = cov, full.model = mFull, 186 | #' null.model = mNull) 187 | #' 188 | #' # Perform ODP/lrt statistic to determine significant genes in study 189 | #' de_odp <- odp(de_obj, bs.its = 10) 190 | #' de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 10) 191 | #' 192 | #' # summarize significance results 193 | #' summary(de_odp) 194 | #' @name gibson 195 | #' @return gibson dataset 196 | #' @docType data 197 | #' @keywords datasets 198 | NULL 199 | -------------------------------------------------------------------------------- /R/form_models.R: -------------------------------------------------------------------------------- 1 | #' Formulates the experimental models 2 | #' 3 | #' \code{build_study} generates the full and null models for users unfamiliar 4 | #' with building models in R. There are two types of experimental designs: 5 | #' static and time-course. For more details, refer to the vignette. 6 | #' 7 | #' @param data \code{matrix}: gene expression data (rows are genes, columns are 8 | #' samples). 9 | #' @param sampling \code{string}: type of study. Either "static" or 10 | #' "timecourse". Default is "static". 11 | #' @param grp \code{vector}: group assignement in the study (for K-class 12 | #' studies). Optional. 13 | #' @param tme \code{vector}: time variable in a time course study. Optional. 14 | #' @param ind \code{factor}: individual factor for repeated observations of the 15 | #' same individuals. Optional. 16 | #' @param bio.var \code{matrix}: biological variables. Optional. 17 | #' @param basis.df \code{numeric}: degrees of freedom of the basis for time 18 | #' course study. Default is 2. 19 | #' @param basis.type \code{string}: either "ncs" (natural cubic spline) or "ps" 20 | #' (polynomial spline) basis for time course study. Default is "ncs". 21 | #' @param adj.var \code{matrix}: adjustment variables. Optional. 22 | #' 23 | #' @return \code{\linkS4class{deSet}} object 24 | #' 25 | #' @examples 26 | #' # create ExpressionSet object from kidney dataset 27 | #' library(splines) 28 | #' data(kidney) 29 | #' age <- kidney$age 30 | #' sex <- kidney$sex 31 | #' kidexpr <- kidney$kidexpr 32 | #' 33 | #' # create deSet object from data 34 | #' de_obj <- build_study(data = kidexpr, adj.var = sex, tme = age, 35 | #' sampling = "timecourse", basis.df = 4) 36 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{build_models}} 37 | #' @author John Storey, Andy Bass 38 | #' @export 39 | build_study = function(data, grp = NULL, adj.var = NULL, bio.var = NULL, 40 | tme = NULL, ind = NULL, 41 | sampling = c("static", "timecourse"), basis.df = 2, 42 | basis.type = c("ncs", "poly")) { 43 | n <- ncol(data) 44 | m <- nrow(data) 45 | if (!is.matrix(data)) { 46 | stop("data must be a matrix") 47 | } 48 | if (!is.null(tme)) { 49 | if (is.matrix(tme) | is.vector(tme)) { 50 | tme <- data.frame(tme) 51 | } else { 52 | stop("tme must be a matrix") 53 | } 54 | # intercept <- !apply(tme, 2, var) 55 | # tme <- subset(tme, select=!intercept) 56 | } 57 | if (!is.null(adj.var)) { 58 | if (is.matrix(adj.var) | is.vector(adj.var) | is.factor(adj.var)) { 59 | adj.var <- data.frame(adj.var) 60 | } else { 61 | stop("adj.var must be a matrix") 62 | } 63 | #intercept <- !apply(adj.var, 2, var) 64 | # adj.var <- subset(adj.var, select=!intercept) 65 | } 66 | if (!is.null(bio.var)) { 67 | # sampling <- "notApplicable" 68 | if (is.matrix(bio.var)| is.vector(bio.var) | is.factor(bio.var)) { 69 | bio.var <- data.frame(bio.var) 70 | } else { 71 | stop("bio.var must be a matrix") 72 | } 73 | #intercept <- !apply(bio.var, 2, var) 74 | # bio.var <- subset(bio.var, select=!intercept) 75 | # Create models 76 | if (is.null(adj.var)) { 77 | pdat <- data.frame(bio.var) 78 | fmod <- paste("~", paste(names(pdat), collapse=" + ")) 79 | nmod <- "~1" 80 | } else { 81 | pdat <- data.frame(adj.var, bio.var) 82 | fmod <- paste("~", paste(names(pdat), collapse=" + ")) 83 | nmod <- paste("~", paste(names(adj.var), collapse=" + ")) 84 | } 85 | } else { 86 | sampling <- match.arg(sampling, choices=c("static", "timecourse")) 87 | if (!is.null(grp)) { 88 | if (is.factor(grp)) { 89 | grp <- data.frame(grp = as.factor(grp)) 90 | } else { 91 | stop("grp must be a factor") 92 | } 93 | # intercept <- !apply(grp, 2, var) 94 | #grp <- subset(grp, select=!intercept) 95 | } else { 96 | if(sampling == "static") { 97 | stop("grp variable cannot be missing for static sampling.") 98 | } 99 | grp <- data.frame(grp=rep(1,n)) 100 | } 101 | g <- nrow(unique(grp)) 102 | if (sampling == "static") { 103 | if (g==1) { 104 | stop("grp must have more than one unique value for static sampling.") 105 | } 106 | if (is.null(adj.var)) { 107 | pdat <- data.frame(grp) 108 | nmod <- "~1" 109 | fmod <- paste("~", paste(names(pdat), collapse=" + ")) 110 | } else { 111 | pdat <- data.frame(adj.var, grp) 112 | fmod <- paste("~", paste(names(pdat), collapse=" + ")) 113 | nmod <- paste("~", paste(names(adj.var), collapse=" + ")) 114 | } 115 | } 116 | 117 | if (sampling == "timecourse") { 118 | basis.type <- match.arg(basis.type) 119 | varName <- colnames(data.frame(tme)) 120 | if (length(varName) != 1) stop("Only one time variable is allowed. See ?deSet for information on how to create complicated models") 121 | if (basis.type == "ncs") { 122 | time.basis <- paste("ns(", varName,", df=", basis.df,", intercept=FALSE)", sep="") 123 | } else if (basis.type == "poly") { 124 | time.basis <- paste("bs(", varName,", df=", basis.df,", intercept=FALSE)", sep="") 125 | } 126 | if (g == 1) { 127 | # time course with no groups 128 | if (is.null(adj.var)) { 129 | pdat <- data.frame(tme) 130 | nmod <- "~1" 131 | fmod <- paste("~", time.basis) 132 | } else { 133 | pdat <- data.frame(adj.var, tme) 134 | fmod <- paste("~", paste(names(adj.var), collapse=" + "), "+", time.basis) 135 | nmod <- paste("~", paste(names(adj.var), collapse=" + ")) 136 | } 137 | } else { 138 | if (is.null(adj.var)) { 139 | pdat <- data.frame(tme, grp) 140 | } else { 141 | pdat <- data.frame(tme, adj.var, grp) 142 | } 143 | # time course with groups 144 | nmod <- paste(paste("~", paste(names(pdat)[-1], collapse=" + ")), "+", time.basis) 145 | fmod <- paste(paste("~", paste(names(pdat)[-1], collapse=" + ")),"+",time.basis,"+", paste( "(", paste(names(pdat)[ncol(pdat)], collapse=" + ", sep=""), ")", ":", time.basis)) } 146 | } 147 | } 148 | rownames(pdat) <- colnames(data) 149 | exp_set <- ExpressionSet(as.matrix(data), AnnotatedDataFrame(pdat)) 150 | edgeObj <- deSet(exp_set, full.model=as.formula(fmod), 151 | null.model=as.formula(nmod), individual=ind) 152 | return(edgeObj) 153 | } 154 | 155 | #' Generate a deSet object with full and null models 156 | #' 157 | #' \code{build_models} creates a \code{\link{deSet}} object. The user inputs 158 | #' the full and null models. 159 | #' 160 | #' @param data \code{matrix}: gene expression data. 161 | #' @param cov \code{data.frame}: the covariates in the study. 162 | #' @param full.model \code{formula}: the adjustment and the biological 163 | #' variables of interest. 164 | #' @param null.model \code{formula}: the adjustment variables. 165 | #' @param ind \code{factor}: individuals sampled in the study. Default is 166 | #' NULL. Optional. 167 | #' 168 | #' @return \code{\linkS4class{deSet}} object 169 | #' 170 | #' @examples 171 | #' # create ExpressionSet object from kidney dataset 172 | #' library(splines) 173 | #' data(kidney) 174 | #' age <- kidney$age 175 | #' sex <- kidney$sex 176 | #' kidexpr <- kidney$kidexpr 177 | #' cov <- data.frame(sex = sex, age = age) 178 | #' 179 | #' # create models 180 | #' null.model <- ~sex 181 | #' full.model <- ~sex + ns(age, df=4) 182 | #' 183 | #' # create deSet object from data 184 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null.model, 185 | #' full.model = full.model) 186 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{build_study}} 187 | #' @author John Storey, Andy Bass 188 | #' @export 189 | build_models <- function(data, cov, full.model = NULL, null.model = NULL, 190 | ind = NULL) { 191 | n <- ncol(data) 192 | m <- nrow(data) 193 | if (!is.matrix(data)) { 194 | stop("data must be a matrix") 195 | } else if (!is.data.frame(cov)) { 196 | stop("cov must be a data frame") 197 | } else if (is.null(full.model)) { 198 | stop("need an alternative model") 199 | } 200 | if (is.null(null.model)) { 201 | null.model <- ~1 202 | } 203 | if (!is(full.model, "formula") | !is(null.model, "formula")) { 204 | stop("alternative and null models must be formatted as a formula") 205 | } 206 | 207 | exp_set <- ExpressionSet(data, AnnotatedDataFrame(cov)) 208 | edgeObj <- deSet(exp_set, full.model = full.model, null.model = null.model, 209 | individual = ind) 210 | return(edgeObj) 211 | } 212 | -------------------------------------------------------------------------------- /R/getMethods.R: -------------------------------------------------------------------------------- 1 | #' @rdname sType 2 | setMethod("sType", 3 | signature = signature(object = "deFit"), 4 | function(object) { 5 | slot(object, "stat.type") 6 | }) 7 | 8 | #' @rdname betaCoef 9 | setMethod("betaCoef", 10 | signature = signature(object = "deFit"), 11 | function(object) { 12 | slot(object, "beta.coef") 13 | }) 14 | #' @rdname resFull 15 | setMethod("resFull", 16 | signature = signature(object = "deFit"), 17 | function(object) { 18 | slot(object, "res.full") 19 | }) 20 | #' @rdname resNull 21 | setMethod("resNull", 22 | signature = signature(object = "deFit"), 23 | function(object) { 24 | slot(object, "res.null") 25 | }) 26 | #' @rdname fitFull 27 | setMethod("fitFull", 28 | signature = signature(object = "deFit"), 29 | function(object) { 30 | slot(object, "fit.full") 31 | }) 32 | #' @rdname fitNull 33 | setMethod("fitNull", 34 | signature = signature(object = "deFit"), 35 | function(object) { 36 | slot(object, "fit.null") 37 | }) 38 | #' @rdname fullModel 39 | setMethod("fullModel", 40 | signature = signature(object = "deSet"), 41 | function(object) { 42 | slot(object, "full.model") 43 | }) 44 | #' @rdname nullModel 45 | setMethod("nullModel", 46 | signature = signature(object = "deSet"), 47 | function(object) { 48 | slot(object, "null.model") 49 | }) 50 | #' @rdname fullMatrix 51 | setMethod("fullMatrix", 52 | signature = signature(object = "deSet"), 53 | function(object) { 54 | slot(object, "full.matrix") 55 | }) 56 | #' @rdname nullMatrix 57 | setMethod("nullMatrix", 58 | signature = signature(object = "deSet"), 59 | function(object) { 60 | slot(object, "null.matrix") 61 | }) 62 | #' @rdname individual 63 | setMethod("individual", 64 | signature = signature(object = "deSet"), 65 | function(object) { 66 | slot(object, "individual") 67 | }) 68 | #' @rdname qvalueObj 69 | setMethod("qvalueObj", 70 | signature = signature(object = "deSet"), 71 | function(object) { 72 | slot(object, "qvalueObj") 73 | }) 74 | -------------------------------------------------------------------------------- /R/kl_clust-functions.R: -------------------------------------------------------------------------------- 1 | klmod <- function(de.fit, nf, n.mods = 50) { 2 | m <- nrow(de.fit@fit.full) 3 | n <- ncol(de.fit@fit.full) 4 | if (m <= n.mods) { 5 | mod.member <- as.factor(1:m) 6 | return(mod.member) 7 | } 8 | sigma2 <- rowSums(de.fit@res.full ^ 2) / (n - nf) 9 | int.n.mods <- n.mods 10 | orig.n.mods <- n.mods 11 | int.center <- sample(x = m, 12 | size = n.mods, replace = FALSE) 13 | center.fitFull <- de.fit@fit.full[int.center, ] 14 | center.var <- sigma2[int.center] 15 | 16 | eps <- 0.1 17 | mod.member <- NULL 18 | KL <- matrix(nrow = m, 19 | ncol = n.mods) 20 | itr <- 0 21 | KL.cutoff <- 1 22 | 23 | pos.center.fitFull <- center.fitFull 24 | pos.center.var <- center.var 25 | while (KL.cutoff > eps) { 26 | itr <- itr + 1 27 | pre.center.fitFull <- pos.center.fitFull 28 | pre.center.var <- pos.center.var 29 | 30 | temp.center.fitFull <- as.vector(t(center.fitFull)) 31 | temp.fitFull <- as.vector(t(de.fit@fit.full)) 32 | 33 | kldd <- t(matrix(kl(temp.center.fitFull, temp.fitFull, center.var, 34 | sigma2, n=n), ncol=m)) 35 | mod.member = apply(kldd, 1, function(x) which.min(x)) 36 | 37 | # First of all, we check whether there is any cluster that does not 38 | # include any gene. For this case, we exclude this cluster from the 39 | # original clusters. Therefore, it reduces the number of clusters 40 | notempty <- 1:n.mods %in% unique(mod.member) 41 | # notempty <- sort(unique(mod.member)) 42 | # all.equal(notempty, notempty2) 43 | center.fitFull <- center.fitFull[notempty, ] 44 | center.var <- center.var[notempty] 45 | KL <- KL[notempty, ] 46 | 47 | # Once the number of clusters were decided, we need to find new centers 48 | # for each cluster 49 | if (any(!notempty)) { 50 | n.mods <- sum(!notempty) 51 | } 52 | 53 | # Average the mean and variance over genes included in each cluster 54 | l <- 1 55 | for (i in 1:orig.n.mods) { 56 | ntmp <- sum(mod.member == i) 57 | if (ntmp == 0) { 58 | next 59 | } else { 60 | if (ntmp == 1) { 61 | center.fitFull[l, ] <- de.fit@fit.full[mod.member == i, ] 62 | } else { 63 | center.fitFull[l, ] <- colMeans(de.fit@fit.full[mod.member == i, ]) 64 | } 65 | center.var[l] <- drop(sum(sigma2[mod.member == i]) / ntmp) 66 | l <- l + 1 67 | } 68 | } 69 | 70 | pos.center.fitFull <- center.fitFull 71 | pos.center.var <- center.var 72 | if (length(pos.center.var) != length(pre.center.var)) { 73 | # if the n.mods is reduced 74 | KL.cutoff <- 1 75 | } else { 76 | KL.cutoff <- NULL 77 | res2 <- rowSums((pos.center.fitFull - pre.center.fitFull) ^ 2) 78 | normconst <- 1 / pos.center.var + 1 / pre.center.var 79 | centerconst <- n * ((pos.center.var / pre.center.var + pre.center.var / pos.center.var) / 2 - 1) 80 | KL.cutoff <- res2 / normconst + centerconst 81 | } 82 | KL.cutoff <- max(KL.cutoff) 83 | } 84 | return(as.factor(mod.member)) 85 | } 86 | 87 | mod.parms <- function(de.fit, nf, nn, clMembers) { 88 | # Initlizations 89 | n <- ncol(de.fit@res.full) 90 | varFull <- rowSums(de.fit@res.full ^ 2) / (n - nf) 91 | varNull <- rowSums(de.fit@res.null ^ 2) / (n - nn) 92 | mod.membership <- clMembers 93 | n.mods <- length(unique(mod.membership)) 94 | 95 | mod.fitFull <- matrix(nrow = n.mods, 96 | ncol = n) 97 | n.per.mod <- vector(length = n.mods) 98 | mod.varNull <- vector(length = n.mods) 99 | mod.varFull <- vector(length = n.mods) 100 | # Calculate statistics (variance and mean) for each cluster 101 | for (i in 1:n.mods) { 102 | if(length(mod.membership[mod.membership == i]) == 1) { 103 | n.per.mod[i] <- 1 104 | mod.fitFull[i, ] <- de.fit@fit.full[mod.membership==i, ] 105 | } else { 106 | n.per.mod[i] <- sum(mod.membership == i) 107 | mod.fitFull[i, ] <- colMeans(de.fit@fit.full[mod.membership == i, ]) 108 | } 109 | mod.varNull[i] <- mean(varNull[mod.membership == i]) 110 | mod.varFull[i] <- mean(varFull[mod.membership == i]) 111 | } 112 | mod.fitNull <- 0*mod.fitFull 113 | # Assign slots 114 | return(list(mu.full = mod.fitFull, sig.full = sqrt(mod.varFull), 115 | mu.null = mod.fitNull, sig.null = sqrt(mod.varNull), 116 | n.per.mod = n.per.mod, clustMembers = clMembers)) 117 | } 118 | 119 | kl <- function(temp.center.fitFull, temp.fitFull, center.var, sigma2, n) { 120 | # Initializations 121 | m <- length(sigma2) 122 | n.cluster <- length(center.var) 123 | # C function to calculate kl distance 124 | kldd <- .C("kldistance", 125 | centerFit=as.double(temp.center.fitFull), 126 | centerVar=as.double(center.var), 127 | fit=as.double(temp.fitFull), 128 | var=as.double(sigma2), 129 | m=as.integer(m), 130 | nc=as.integer(n.cluster), 131 | n=as.integer(n), 132 | kldd=double(m * n.cluster))$kldd 133 | return(kldd) 134 | } 135 | 136 | mod.df = function(x) { 137 | df = try(sum(diag(x%*%solve(t(x)%*%x)%*%t(x))), silent=TRUE) 138 | df 139 | } 140 | -------------------------------------------------------------------------------- /R/lrt-functions.R: -------------------------------------------------------------------------------- 1 | lrtStat <- function(resNull, resFull, post.var = NULL) { 2 | rss.full <- rowSums(resFull ^ 2) 3 | rss.null <- rowSums(resNull ^ 2) 4 | 5 | # F-statistic 6 | if (is.null(post.var)) { 7 | stat <- (rss.null - rss.full) / rss.full 8 | } else { 9 | stat <- (rss.null - rss.full) / post.var 10 | } 11 | return(stat) 12 | } 13 | 14 | # EMPIRICAL BAYES SQUEEZING OF VARIANCES 15 | 16 | squeezeVar <- function(var, df, covariate=NULL, winsor.tail.p=c(0.05,0.1)) 17 | # Empirical Bayes posterior variances 18 | # Gordon Smyth 19 | # 2 March 2004. Last modified 2 Dec 2013. 20 | { 21 | n <- length(var) 22 | if(n == 0) stop("var is empty") 23 | if(n == 1) return(list(var.post=var,var.prior=var,df.prior=0)) 24 | if(length(df)==1) { 25 | df <- rep.int(df,n) 26 | } else { 27 | if(length(df) != n) stop("lengths differ") 28 | } 29 | 30 | # Estimate prior var and df 31 | fit <- fitFDist(var, df1=df, covariate=covariate) 32 | 33 | # Prior var will be vector if robust=TRUE, otherwise scalar 34 | var.prior <- fit$scale 35 | 36 | # Prior df will be vector if covariate is non-NULL, otherwise scalar 37 | df.prior <- fit$df2.shrunk 38 | if(is.null(df.prior)) df.prior <- fit$df2 39 | 40 | # Check estimated prior df 41 | if(is.null(df.prior) || any(is.na(df.prior))) stop("Could not estimate prior df") 42 | 43 | # Squeeze the posterior variances 44 | df.total <- df + df.prior 45 | var[df==0] <- 0 # guard against missing or infinite values 46 | Infdf <- df.prior==Inf 47 | if(any(Infdf)) { 48 | var.post <- rep(var.prior,length.out=n) 49 | i <- which(!Infdf) 50 | if(length(i)) { 51 | if(is.null(covariate)) 52 | s02 <- var.prior 53 | else 54 | s02 <- var.prior[i] 55 | var.post[i] <- (df[i]*var[i] + df.prior[i]*s02) / df.total[i] 56 | } 57 | } else { 58 | var.post <- (df*var + df.prior*var.prior) / df.total 59 | } 60 | 61 | list(df.prior=df.prior,var.prior=var.prior,var.post=var.post) 62 | } 63 | 64 | fitFDist <- function(x,df1,covariate=NULL) 65 | # Moment estimation of the parameters of a scaled F-distribution 66 | # The first degrees of freedom are given 67 | # Gordon Smyth and Belinda Phipson 68 | # 8 Sept 2002. Last revised 27 Oct 2012. 69 | { 70 | # Check covariate 71 | if(!is.null(covariate)) { 72 | if(length(covariate) != length(x)) stop("covariate and x must be of same length") 73 | if(any(is.na(covariate))) stop("NA covariate values not allowed") 74 | isfin <- is.finite(covariate) 75 | if(!all(isfin)) { 76 | if(!any(isfin)) 77 | covariate <- sign(covariate) 78 | else { 79 | r <- range(covariate[isfin]) 80 | covariate[covariate == -Inf] <- r[1]-1 81 | covariate[covariate == Inf] <- r[2]+1 82 | } 83 | } 84 | splinedf <- min(4,length(unique(covariate))) 85 | if(splinedf < 2) covariate <- NULL 86 | } 87 | # Remove missing or infinite values and zero degrees of freedom 88 | ok <- is.finite(x) & is.finite(df1) & (x > -1e-15) & (df1 > 1e-15) 89 | notallok <- !all(ok) 90 | if(notallok) { 91 | x <- x[ok] 92 | df1 <- df1[ok] 93 | if(!is.null(covariate)) { 94 | covariate2 <- covariate[!ok] 95 | covariate <- covariate[ok] 96 | } 97 | } 98 | n <- length(x) 99 | if(n==0) return(list(scale=NA,df2=NA)) 100 | 101 | # Avoid exactly zero values 102 | x <- pmax(x,0) 103 | m <- median(x) 104 | if(m==0) { 105 | warning("More than half of residual variances are exactly zero: eBayes unreliable") 106 | m <- 1 107 | } else { 108 | if(any(x==0)) warning("Zero sample variances detected, have been offset",call.=FALSE) 109 | } 110 | x <- pmax(x, 1e-5 * m) 111 | 112 | # Better to work on with log(F) 113 | z <- log(x) 114 | e <- z-digamma(df1/2)+log(df1/2) 115 | 116 | if(is.null(covariate)) { 117 | emean <- mean(e) 118 | evar <- sum((e-emean)^2)/(n-1) 119 | } else { 120 | if(!requireNamespace("splines",quietly=TRUE)) stop("splines package required but is not available") 121 | design <- try(splines::ns(covariate,df=splinedf,intercept=TRUE),silent=TRUE) 122 | if(is(design,"try-error")) stop("Problem with covariate") 123 | fit <- lm.fit(design,e) 124 | if(notallok) { 125 | design2 <- predict(design,newx=covariate2) 126 | emean <- rep.int(0,n+length(covariate2)) 127 | emean[ok] <- fit$fitted 128 | emean[!ok] <- design2 %*% fit$coefficients 129 | } else { 130 | emean <- fit$fitted 131 | } 132 | evar <- mean(fit$residuals[-(1:fit$rank)]^2) 133 | } 134 | evar <- evar - mean(trigamma(df1/2)) 135 | if(evar > 0) { 136 | df2 <- 2*trigammaInverse(evar) 137 | s20 <- exp(emean+digamma(df2/2)-log(df2/2)) 138 | } else { 139 | df2 <- Inf 140 | s20 <- exp(emean) 141 | } 142 | list(scale=s20,df2=df2) 143 | } 144 | 145 | trigammaInverse <- function(x) { 146 | # Solve trigamma(y) = x for y 147 | # Gordon Smyth 148 | # 8 Sept 2002. Last revised 12 March 2004. 149 | 150 | # Non-numeric or zero length input 151 | if(!is.numeric(x)) stop("Non-numeric argument to mathematical function") 152 | if(length(x)==0) return(numeric(0)) 153 | 154 | # Treat out-of-range values as special cases 155 | omit <- is.na(x) 156 | if(any(omit)) { 157 | y <- x 158 | if(any(!omit)) y[!omit] <- Recall(x[!omit]) 159 | return(y) 160 | } 161 | omit <- (x < 0) 162 | if(any(omit)) { 163 | y <- x 164 | y[omit] <- NaN 165 | warning("NaNs produced") 166 | if(any(!omit)) y[!omit] <- Recall(x[!omit]) 167 | return(y) 168 | } 169 | omit <- (x > 1e7) 170 | if(any(omit)) { 171 | y <- x 172 | y[omit] <- 1/sqrt(x[omit]) 173 | if(any(!omit)) y[!omit] <- Recall(x[!omit]) 174 | return(y) 175 | } 176 | omit <- (x < 1e-6) 177 | if(any(omit)) { 178 | y <- x 179 | y[omit] <- 1/x[omit] 180 | if(any(!omit)) y[!omit] <- Recall(x[!omit]) 181 | return(y) 182 | } 183 | 184 | # Newton's method 185 | # 1/trigamma(y) is convex, nearly linear and strictly > y-0.5, 186 | # so iteration to solve 1/x = 1/trigamma is monotonically convergent 187 | y <- 0.5+1/x 188 | iter <- 0 189 | repeat { 190 | iter <- iter+1 191 | tri <- trigamma(y) 192 | dif <- tri*(1-tri/x)/psigamma(y,deriv=2) 193 | y <- y+dif 194 | if(max(-dif/y) < 1e-8) break 195 | if(iter > 50) { 196 | warning("Iteration limit exceeded") 197 | break 198 | } 199 | } 200 | y 201 | } 202 | 203 | -------------------------------------------------------------------------------- /R/misc.R: -------------------------------------------------------------------------------- 1 | bootstrap <- function(object, obs.fit, clustParms = NULL, bs.its = 100, 2 | verbose = TRUE, mod.F = FALSE, post.var = NULL) { 3 | n.probes <- nrow(obs.fit@res.full) 4 | nf <- mod.df(object@full.matrix) 5 | null.stat <- matrix(nrow = n.probes, 6 | ncol = bs.its) 7 | sType <- obs.fit@stat.type 8 | for (i in 1:bs.its) { 9 | if (verbose) { 10 | cat("\r", "Null iteration: ", i) 11 | if (i == bs.its) cat("\n") 12 | } 13 | exprs(object) <- null(obs.fit = obs.fit, nf = nf, 14 | ind = object@individual) 15 | null.fit <- fit_models(object, 16 | stat.type = sType) 17 | if (sType == "lrt") { 18 | if (!is.null(post.var)) { 19 | nFull <- ncol(object@full.matrix) 20 | n <- ncol(object) 21 | df_full <- n - nFull 22 | var_full <- rowSums(null.fit@res.full ^ 2) / df_full 23 | pv <- (df_full*var_full + post.var$df.prior*post.var$var.prior) / (df_full + post.var$df.prior) 24 | } else { 25 | pv <- NULL 26 | } 27 | null.stat[, i] <- lrtStat(resNull = null.fit@res.null, 28 | resFull = null.fit@res.full, 29 | post.var = pv) 30 | 31 | } 32 | else { 33 | null.stat[, i] <- odpStat(n.res = null.fit@res.null, 34 | clustParms = clustParms) 35 | } 36 | } 37 | return(null.stat) 38 | } 39 | rescale <- function(x, sig) { 40 | means <- rowMeans(x) 41 | n <- ncol(x) 42 | rowsds <- sqrt((rowMeans(x ^ 2) - means ^ 2) * n / (n - 1)) 43 | ret <- (x - means) * sig / rowsds + means 44 | return(ret) 45 | } 46 | null <- function(obs.fit, nf, ind) { 47 | stat.var <- obs.fit@stat.type 48 | n <- ncol(obs.fit@res.full) 49 | if (sum(!is.na(ind[1])) > 0) { 50 | ind <- model.matrix(~-1 + as.factor(ind)) 51 | wts <- sqrt(1 - diag(ind %*% solve(t(ind) %*% ind) %*% t(ind))) 52 | } else { 53 | ind <- NULL 54 | wts <- rep(1, n) 55 | } 56 | wts <- t(t(sqrt(1 - obs.fit@dH.full)) * wts) 57 | res.full <- obs.fit@res.full * wts ^ (-1) 58 | # Random mix columns of residuals from full model 59 | vv <- sample(1:n, replace = TRUE) 60 | bs.res <- res.full[, vv] 61 | # Add random residuals to null data 62 | if (stat.var == "lrt") { 63 | null.dat <- obs.fit@fit.null + bs.res 64 | } else { 65 | sig1 <- sqrt(rowSums(obs.fit@res.full ^ 2) / (n - nf)) 66 | bs.res <- rescale(x = bs.res, 67 | sig = sig1) 68 | null.dat <- obs.fit@fit.null + bs.res 69 | } 70 | return(null.dat) 71 | } 72 | mod.df <- function(x) { 73 | df <- try(sum(diag(x %*% solve(t(x) %*% x) %*% t(x))), silent=TRUE) 74 | return(df) 75 | } 76 | 77 | createSet <- function(object, nMod=NULL, fMod=NULL, ind=NULL, grp=factor(NA)) { 78 | # Create deSet 79 | # require(splines) 80 | object@null.model <- nMod 81 | object@full.model <- fMod 82 | mmf <- model.matrix(object = fMod, data = object) 83 | mmn <- model.matrix(object = nMod, data = object) 84 | colnames(mmf) <- NULL 85 | colnames(mmn) <- NULL 86 | object@null.matrix <- mmn 87 | object@full.matrix <- mmf 88 | object@individual <- as.factor(ind) 89 | validObject(object) 90 | object 91 | } 92 | 93 | rm.zero.cols <- function(x, eps = 10e-12) { 94 | return(x[, colSums(abs(x)) > eps]) 95 | } 96 | 97 | 98 | projMatrix <- function(x) { 99 | H <- x %*% ginv(t(x) %*% x) %*% t(x) 100 | H 101 | } 102 | -------------------------------------------------------------------------------- /R/odp-functions.R: -------------------------------------------------------------------------------- 1 | #' @useDynLib edge odpScoreCluster 2 | odp.score <- function(s.dat.cl, mu, sigma, null, m, n, cluster) { 3 | # Determines ODP score 4 | # 5 | # Args: 6 | # s.dat.cl: Matrix of fitted data by full model 7 | # mu: Vector means of clusters 8 | # sigma: Vector of sd of clusters 9 | # null: Boolean whether NULL model or not 10 | # m: Number of genes 11 | # n: Number of probes/arrays 12 | # cluster: Vector of the number of members in each cluster 13 | # 14 | # Returns: 15 | # scr: Vector of ODP score of each gene 16 | # Initilizations 17 | p <- length(sigma) 18 | 19 | # Call to C file to compute ODP score 20 | scr <- .C("odpScoreCluster", 21 | sumDat = as.double(s.dat.cl), 22 | mu = as.double(mu), 23 | sigma = as.double(sigma), 24 | m = as.integer(m), 25 | n = as.integer(n), 26 | p = as.integer(p), 27 | null = as.integer(null), 28 | cluster = as.integer(cluster), 29 | scr = double(m))$scr 30 | 31 | return(scr) 32 | } 33 | 34 | odpStat <- function(n.res, clustParms) { 35 | # Determines ODP statistic 36 | # 37 | # Args: 38 | # n.res: null residuals 39 | # clustParms: clustering parameters 40 | # 41 | # Returns: 42 | # matrix of null statistics 43 | # Probabilities of alt and null distributions 44 | s.dat1 = c(t(n.res), t(clustParms$mu.full)) 45 | s.dat0 = c(t(n.res), t(clustParms$mu.null)) 46 | cl.den <- odp.score(s.dat0, 47 | mu = rep(0, length(clustParms$sig.null)), 48 | sigma = clustParms$sig.null, 49 | null = TRUE, 50 | m = nrow(n.res), 51 | n = ncol(n.res), 52 | cluster = clustParms$n.per.mod) 53 | cl.num <- odp.score(s.dat1, 54 | mu = rowSums(clustParms$mu.full ^ 2), 55 | sigma = clustParms$sig.full, 56 | null = FALSE, 57 | m = nrow(n.res), 58 | n = ncol(n.res), 59 | cluster = clustParms$n.per.mod) 60 | 61 | # ODP statistic 62 | odp.stat <- 2 * cl.num / (cl.den + cl.num) 63 | return(odp.stat) 64 | } 65 | -------------------------------------------------------------------------------- /R/setMethods.R: -------------------------------------------------------------------------------- 1 | #' @rdname individual 2 | setReplaceMethod("individual", 3 | signature = signature(object = "deSet"), 4 | function(object, value) { 5 | object@individual <- value 6 | validObject(object) 7 | object 8 | }) 9 | #' @rdname qvalueObj 10 | setReplaceMethod("qvalueObj", 11 | signature = signature(object = "deSet"), 12 | function(object, value) { 13 | object@qvalueObj <- value 14 | validObject(object) 15 | object 16 | }) 17 | #' @rdname fullModel 18 | setReplaceMethod("fullModel", 19 | signature = signature(object = "deSet"), 20 | function(object, value) { 21 | object@full.model <- value 22 | fullMatrix(object) <- model.matrix(object = value, data = object) 23 | validObject(object) 24 | object 25 | }) 26 | #' @rdname nullModel 27 | setReplaceMethod("nullModel", 28 | signature = signature(object = "deSet"), 29 | function(object, value) { 30 | object@null.model <- value 31 | nullMatrix(object) <- model.matrix(object = value, data = object) 32 | validObject(object) 33 | object 34 | }) 35 | #' @rdname fullMatrix 36 | setReplaceMethod("fullMatrix", 37 | signature = signature(object = "deSet"), 38 | function(object, value) { 39 | object@full.matrix <- value 40 | validObject(object) 41 | object 42 | }) 43 | #' @rdname nullMatrix 44 | setReplaceMethod("nullMatrix", 45 | signature = signature(object = "deSet"), 46 | function(object, value) { 47 | object@null.matrix <- value 48 | validObject(object) 49 | object 50 | }) 51 | -------------------------------------------------------------------------------- /R/wls.R: -------------------------------------------------------------------------------- 1 | fit_wmodels <- function(object, w = NULL, stat.type = c("lrt", "odp")) { 2 | exprsData <- exprs(object) 3 | n <- ncol(exprsData) 4 | nr <- nrow(exprsData) 5 | stat.var <- match.arg(stat.type, c("lrt", "odp")) 6 | null.matrix <- object@null.matrix 7 | full.matrix <- object@full.matrix 8 | if (length(object@individual) != 0) { 9 | ind.matrix <- model.matrix(~-1 + as.factor(object@individual)) 10 | Hi <- projMatrix(ind.matrix) 11 | fitInd <- t(Hi %*% t(exprsData)) 12 | exprsData <- exprsData - fitInd 13 | full.matrix <- full.matrix - Hi %*% full.matrix 14 | null.matrix <- null.matrix - Hi %*% null.matrix 15 | full.matrix <- rm.zero.cols(full.matrix) 16 | null.matrix <- rm.zero.cols(null.matrix) 17 | } 18 | fitFull <- fitNull <- resNull <- resFull <- dHFull <- matrix(nrow=nr, ncol=n) 19 | for (i in 1:nr) { 20 | wlm_null <- lm.wfit(x = null.matrix, y = exprsData[i,], w = w[i,]) 21 | fitNull[i,] <- wlm_null$fitted.values 22 | resNull[i,] <- wlm_null$residuals * sqrt(wlm_null$weights) 23 | if (stat.var != "odp") { 24 | wlm_full <- lm.wfit(x = full.matrix, y = exprsData[i,], w = w[i,]) 25 | dHFull[i,] <- diag(projMatrix(sqrt(w[i,]) * full.matrix))# double check 26 | fitFull[i,] <- wlm_full$fitted.values 27 | B.coef <- matrix(NA, ncol = length(w[i,]))#wlm_full$coefficients 28 | resFull[i,] <- wlm_full$residuals * sqrt(wlm_full$weights) 29 | } else { 30 | # W <- diag(sqrt(w[i,])) 31 | w_sqrt <- sqrt(w[i,]) 32 | f.matrix.scaled <- full.matrix * w_sqrt 33 | H.null <- projMatrix(null.matrix * w_sqrt) 34 | f.matrix.scaled <- f.matrix.scaled - H.null %*% f.matrix.scaled 35 | f.matrix.scaled <- rm.zero.cols(f.matrix.scaled) 36 | H.full <- projMatrix(f.matrix.scaled) 37 | res.n <- wlm_null$residuals * w_sqrt 38 | B.coef <- matrix(NA, ncol = length(w_sqrt))#res.n %*% full.matrix.scaled %*% ginv(t(full.matrix.scaled) %*% full.matrix.scaled) 39 | dHFull[i,] <- diag(H.full) 40 | fitFull[i,] <- H.full %*% res.n 41 | resFull[i,] <- res.n - fitFull[i,] 42 | } 43 | } 44 | efObj <- new("deFit", fit.full = fitFull, fit.null = fitNull, 45 | dH.full = dHFull, res.full = resFull, 46 | res.null = resNull, beta.coef = B.coef, 47 | stat.type = stat.var) 48 | return(efObj) 49 | } 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | edge: Extraction of Differential Gene Expression 3 | ==== 4 | 5 | Introduction 6 | ------ 7 | The edge package implements methods for carrying out differential 8 | expression analyses of genome-wide gene expression studies. Significance 9 | testing using the optimal discovery procedure and generalized likelihood 10 | ratio tests (equivalent to F-tests and t-tests) are implemented for general study 11 | designs. Special functions are available to facilitate the analysis of 12 | common study designs, including time course experiments. Other packages 13 | such as [snm](http://www.bioconductor.org/packages/release/bioc/html/snm.html), [sva](http://www.bioconductor.org/packages/release/bioc/html/sva.html), and [qvalue](https://github.com/jdstorey/qvalue) are integrated in edge to provide a wide range 14 | of tools for gene expression analysis. 15 | 16 | 17 | ### Installation and Documentation 18 | 19 | To install the Bioconductor release version, open R and type: 20 | ```R 21 | source("http://bioconductor.org/biocLite.R") 22 | biocLite("edge") 23 | ``` 24 | 25 | To install the development version, open R and type: 26 | ```R 27 | install.packages("devtools") 28 | library("devtools") 29 | install_github(c("jdstorey/qvalue","jdstorey/edge"), build_vignettes = TRUE) 30 | ``` 31 | 32 | Instructions on using edge can be viewed by typing: 33 | ```R 34 | library("edge") 35 | browseVignettes("edge") 36 | ``` 37 | 38 | ### Main functions 39 | * `build_models` 40 | * `build_study` 41 | * `odp` 42 | * `lrt` 43 | * `fit_models` 44 | * `kl_clust` 45 | * `apply_sva` 46 | * `apply_snm` 47 | * `apply_qvalue` 48 | 49 | ### Quick start guide 50 | 51 | To get started, first load the kidney dataset included in the package: 52 | ```R 53 | library(edge) 54 | data(kidney) 55 | names(kidney) 56 | ``` 57 | The kidney study is interested in determining differentially expressed genes with respect to age in kidney tissue. The `age` variable is the age of the subjects and the `sex` variable is whether the subjects were male or female. The expression values for the genes are contained in the `kidexpr` variable. 58 | ```R 59 | kidexpr <- kidney$kidexpr 60 | age <- kidney$age 61 | sex <- kidney$sex 62 | ``` 63 | 64 | Once the data has been loaded, the user has two options to create the experimental models: `build_models` or `build_study`. If the experiment models are unknown to the user, `build_study` can be used to create the models: 65 | ```R 66 | edge_obj <- build_study(data = kidexpr, adj.var = sex, tme = age, sampling = "timecourse") 67 | full_model <- fullModel(edge_obj) 68 | null_model <- nullModel(edge_obj) 69 | ``` 70 | 71 | The variable `sampling` describes the type of experiment performed, `adj.var` is the adjustment variable and `tme` is the time variable in the study. If the experiment is more complex then type `?build_study` for additional arguments. 72 | 73 | If the alternative and null models are known to the user then `build_models` can be used to make a deSet object: 74 | ```R 75 | library(splines) 76 | cov <- data.frame(sex = sex, age = age) 77 | null_model <- ~sex 78 | full_model <- ~sex + ns(age, df=4) 79 | edge_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, full.model = full_model) 80 | ``` 81 | 82 | The `cov` is a data frame of covariates, the `null.model` is the null model and the `full.model` is the alternative model. The input `cov` is a data frame with the column names the same as the variables in the alternative and null models. Once the models have been generated, it is often useful to normalize the gene expression matrix using `apply_snm` and/or adjust for unmodelled variables using `apply_sva`. 83 | ```R 84 | edge_norm <- apply_snm(edge_obj, int.var=1:ncol(exprs(edge_obj)), diagnose=FALSE) 85 | edge_sva <- apply_sva(edge_norm) 86 | 87 | ``` 88 | 89 | The `odp` or `lrt` function can be used on `edge_sva` to implement either the optimal discovery procedure or the likelihood ratio test, respectively: 90 | ```R 91 | # optimal discovery procedure 92 | edge_odp <- odp(edge_sva, bs.its = 30, verbose=FALSE) 93 | # likelihood ratio test 94 | edge_lrt <- lrt(edge_sva) 95 | ``` 96 | 97 | To access the proportional of null p-values estimate, p-values, q-values and local false discovery rates for each gene, use the function `qvalueObj`: 98 | ```R 99 | qval_obj <- qvalueObj(edge_odp) 100 | qvals <- qval_obj$qvalues 101 | pvals <- qval_obj$pvalues 102 | lfdr <- qval_obj$lfdr 103 | pi0 <- qval_obj$pi0 104 | ``` 105 | 106 | See the vignette for more detailed explanations of the edge package. 107 | 108 | -------------------------------------------------------------------------------- /data/endotoxin.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StoreyLab/edge/5f973def65bc536b90d46b78e0a0ef849a81caa3/data/endotoxin.rda -------------------------------------------------------------------------------- /data/gibson.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StoreyLab/edge/5f973def65bc536b90d46b78e0a0ef849a81caa3/data/gibson.rda -------------------------------------------------------------------------------- /data/kidney.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StoreyLab/edge/5f973def65bc536b90d46b78e0a0ef849a81caa3/data/kidney.rda -------------------------------------------------------------------------------- /inst/NEWS: -------------------------------------------------------------------------------- 1 | edge 2.1.1: 2 | - Moderated F-test has been added for likelihood ratio test 3 | - Weights can be inputted into odp/lrt which allows it to work for RNA-Seq experiments with low samples 4 | - added function apply_jackstraw 5 | - fixed bug in build_study 6 | 7 | edge 2.0.0: 8 | 9 | The edge package was first released in 2005 and described in the publication: 10 | 11 | Jeffrey T. Leek, Eva Monsen, Alan R. Dabney, and John D. Storey. Edge: 12 | extraction and analysis of differential gene expression. Bioinformatics, 13 | 22(4):507–508, 2006. 14 | http://bioinformatics.oxfordjournals.org/content/22/4/507.abstract 15 | 16 | It was an independently released R package by the John Storey Lab, which 17 | included multi-threading and a graphical user interface. However, edge has been 18 | updated and will now be made available through Bioconductor; edge >=2.0.0 is the 19 | new version released through Bioconductor. 20 | -------------------------------------------------------------------------------- /man/apply_jackstraw.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R 3 | \docType{methods} 4 | \name{apply_jackstraw} 5 | \alias{apply_jackstraw} 6 | \alias{apply_jackstraw,deSet-method} 7 | \title{Non-Parametric Jackstraw for Principal Component Analysis (PCA)} 8 | \usage{ 9 | apply_jackstraw(object, PC = NULL, r = NULL, s = NULL, B = NULL, 10 | covariate = NULL, verbose = TRUE, seed = NULL) 11 | 12 | \S4method{apply_jackstraw}{deSet}(object, PC = NULL, r = NULL, s = NULL, 13 | B = NULL, covariate = NULL, verbose = TRUE, seed = NULL) 14 | } 15 | \arguments{ 16 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}} 17 | 18 | \item{PC}{a numeric vector of principal components of interest. Choose a subset of r significant PCs to be used.} 19 | 20 | \item{r}{a number (a positive integer) of significant principal components.} 21 | 22 | \item{s}{a number (a positive integer) of synthetic null variables. Out of m variables, s variables are independently permuted.} 23 | 24 | \item{B}{a number (a positive integer) of resampling iterations. There will be a total of s*B null statistics.} 25 | 26 | \item{covariate}{a data matrix of covariates with corresponding n observations.} 27 | 28 | \item{verbose}{a logical indicator as to whether to print the progress.} 29 | 30 | \item{seed}{a seed for the random number generator.} 31 | } 32 | \value{ 33 | \code{apply_jackstraw} returns a \code{list} containing the following 34 | slots: 35 | \itemize{ 36 | \item{\code{p.value} the m p-values of association tests between variables 37 | and their principal components} 38 | \item{\code{obs.stat} the observed F-test statistics} 39 | \item{\code{null.stat} the s*B null F-test statistics} 40 | } 41 | } 42 | \description{ 43 | Estimates statistical significance of association between variables and 44 | their principal components (PCs). 45 | } 46 | \details{ 47 | This function computes m p-values of linear association between m variables 48 | and their PCs. Its resampling strategy accounts for the over-fitting 49 | characteristics due to direct computation of PCs from the observed data 50 | and protects against an anti-conservative bias. 51 | 52 | Provide the \code{\linkS4class{deSet}}, 53 | with m variables as rows and n observations as columns. Given that there are 54 | r significant PCs, this function tests for linear association between m 55 | varibles and their r PCs. 56 | 57 | You could specify a subset of significant PCs 58 | that you are interested in (PC). If PC is given, then this function computes 59 | statistical significance of association between m variables and PC, while 60 | adjusting for other PCs (i.e., significant PCs that are not your interest). 61 | For example, if you want to identify variables associated with 1st and 2nd 62 | PCs, when your data contains three significant PCs, set r=3 and PC=c(1,2). 63 | 64 | Please take a careful look at your data and use appropriate graphical and 65 | statistical criteria to determine a number of significant PCs, r. The number 66 | of significant PCs depends on the data structure and the context. In a case 67 | when you fail to specify r, it will be estimated from a permutation test 68 | (Buja and Eyuboglu, 1992) using a function \code{\link{permutationPA}}. 69 | 70 | If s is not supplied, s is set to about 10% of m variables. If B is not 71 | supplied, B is set to m*10/s. 72 | } 73 | \examples{ 74 | library(splines) 75 | data(kidney) 76 | age <- kidney$age 77 | sex <- kidney$sex 78 | kidexpr <- kidney$kidexpr 79 | cov <- data.frame(sex = sex, age = age) 80 | # create models 81 | null_model <- ~sex 82 | full_model <- ~sex + ns(age, df = 4) 83 | # create deSet object from data 84 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 85 | full.model = full_model) 86 | ## apply the jackstraw 87 | out = apply_jackstraw(de_obj, PC=1, r=1) 88 | ## Use optional arguments 89 | ## For example, set s and B for a balance between speed of the algorithm and accuracy of p-values 90 | ## out = apply_jackstraw(dat, PC=1, r=1, s=10, B=1000, seed=5678) 91 | } 92 | \author{ 93 | Neo Christopher Chung \email{nc@princeton.edu} 94 | } 95 | \references{ 96 | Chung and Storey (2013) Statistical Significance of 97 | Variables Driving Systematic Variation in 98 | High-Dimensional Data. arXiv:1308.6013 [stat.ME] 99 | \url{http://arxiv.org/abs/1308.6013} 100 | 101 | More information available at \url{http://ncc.name/} 102 | } 103 | \seealso{ 104 | \code{\link{permutationPA}} 105 | } 106 | 107 | -------------------------------------------------------------------------------- /man/apply_qvalue.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R 3 | \docType{methods} 4 | \name{apply_qvalue} 5 | \alias{apply_qvalue} 6 | \alias{apply_qvalue,deSet-method} 7 | \title{Estimate the q-values for a given set of p-values} 8 | \usage{ 9 | apply_qvalue(object, ...) 10 | 11 | \S4method{apply_qvalue}{deSet}(object, ...) 12 | } 13 | \arguments{ 14 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}} 15 | 16 | \item{...}{Additional arguments for \code{\link{qvalue}}} 17 | } 18 | \value{ 19 | \code{\linkS4class{deSet}} object with slots updated by \code{\link{qvalue}} 20 | calculations. 21 | } 22 | \description{ 23 | Runs \code{\link{qvalue}} on a \code{\linkS4class{deSet}} object. 24 | } 25 | \examples{ 26 | # import data 27 | library(splines) 28 | data(kidney) 29 | age <- kidney$age 30 | sex <- kidney$sex 31 | kidexpr <- kidney$kidexpr 32 | cov <- data.frame(sex = sex, age = age) 33 | 34 | # create models 35 | null_model <- ~sex 36 | full_model <- ~sex + ns(age, df = 4) 37 | 38 | # create deSet object from data 39 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 40 | full.model = full_model) 41 | 42 | # Run lrt (or odp) and apply_qvalue 43 | de_lrt <- lrt(de_obj) 44 | de_lrt <- apply_qvalue(de_lrt, fdr.level = 0.05, 45 | pi0.method = "bootstrap", adj=1.2) 46 | summary(de_lrt) 47 | } 48 | \author{ 49 | John Storey, Andrew Bass 50 | } 51 | \references{ 52 | Storey JD and Tibshirani R. (2003) Statistical significance for 53 | genome-wide studies. Proceedings of the National Academy of Sciences, 54 | 100: 9440-9445 55 | } 56 | \seealso{ 57 | \code{\linkS4class{deSet}}, \code{\link{odp}} and 58 | \code{\link{lrt}} 59 | } 60 | 61 | -------------------------------------------------------------------------------- /man/apply_snm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R 3 | \docType{methods} 4 | \name{apply_snm} 5 | \alias{apply_snm} 6 | \alias{apply_snm,deSet-method} 7 | \title{Supervised normalization of data in edge} 8 | \usage{ 9 | apply_snm(object, int.var = NULL, ...) 10 | 11 | \S4method{apply_snm}{deSet}(object, int.var = NULL, ...) 12 | } 13 | \arguments{ 14 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}} 15 | 16 | \item{int.var}{\code{data frame}: intensity-dependent effects (see 17 | \code{\link{snm}} for details)} 18 | 19 | \item{...}{Additional arguments for \code{\link{snm}}} 20 | } 21 | \value{ 22 | \code{apply_snm} returns a \code{\linkS4class{deSet}} object where 23 | assayData (the expression data) that has been passed to apply_snm is replaced 24 | with the normalized data that \code{\link{snm}} returns. Specifically, 25 | \code{exprs(object)} is replaced by \code{$norm.dat} from \code{\link{snm}}, 26 | where \code{object} is the \code{\link{deSet}} object. 27 | } 28 | \description{ 29 | Runs \code{snm} on a deSet object based on the null and full models in 30 | \code{\linkS4class{deSet}}. See \code{\link{snm}} for additional details 31 | on the algorithm. 32 | } 33 | \examples{ 34 | # simulate data 35 | library(snm) 36 | singleChannel <- sim.singleChannel(12345) 37 | data <- singleChannel$raw.data 38 | 39 | # create deSet object using build_models (can use ExpressionSet see manual) 40 | cov <- data.frame(grp = singleChannel$bio.var[,2]) 41 | full_model <- ~grp 42 | null_model <- ~1 43 | 44 | # create deSet object using build_models 45 | de_obj <- build_models(data = data, cov = cov, full.model = full_model, 46 | null.model = null_model) 47 | 48 | # run snm using intensity-dependent adjustment variable 49 | de_snm <- apply_snm(de_obj, int.var = singleChannel$int.var, 50 | verbose = FALSE, num.iter = 1) 51 | } 52 | \author{ 53 | John Storey, Andrew Bass 54 | } 55 | \references{ 56 | Mechan BH, Nelson PS, Storey JD. Supervised normalization of microarrays. 57 | Bioinformatics 2010;26:1308-1315. 58 | } 59 | \seealso{ 60 | \code{\linkS4class{deSet}}, \code{\link{odp}} and 61 | \code{\link{lrt}} 62 | } 63 | 64 | -------------------------------------------------------------------------------- /man/apply_sva.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R 3 | \docType{methods} 4 | \name{apply_sva} 5 | \alias{apply_sva} 6 | \alias{apply_sva,deSet-method} 7 | \title{Estimate surrogate variables} 8 | \usage{ 9 | apply_sva(object, ...) 10 | 11 | \S4method{apply_sva}{deSet}(object, ...) 12 | } 13 | \arguments{ 14 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}} 15 | 16 | \item{...}{Additional arguments for \code{\link{sva}}} 17 | } 18 | \value{ 19 | \code{\linkS4class{deSet}} object where the surrogate variables 20 | estimated by \code{\link{sva}} are added to the full model and null model 21 | matrices. 22 | } 23 | \description{ 24 | Runs \code{\link{sva}} on the null and full models in 25 | \code{\linkS4class{deSet}}. See \code{\link{sva}} for additional details. 26 | } 27 | \examples{ 28 | # import data 29 | library(splines) 30 | data(kidney) 31 | age <- kidney$age 32 | sex <- kidney$sex 33 | kidexpr <- kidney$kidexpr 34 | cov <- data.frame(sex = sex, age = age) 35 | 36 | # create models 37 | null_model <- ~sex 38 | full_model <- ~sex + ns(age, df = 4) 39 | 40 | # create deSet object from data 41 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 42 | full.model = full_model) 43 | 44 | # run surrogate variable analysis 45 | de_sva <- apply_sva(de_obj) 46 | 47 | # run odp/lrt with surrogate variables added 48 | de_odp <- odp(de_sva, bs.its = 30) 49 | summary(de_odp) 50 | } 51 | \author{ 52 | John Storey, Jeffrey Leek, Andrew Bass 53 | } 54 | \references{ 55 | Leek JT, Storey JD (2007) Capturing Heterogeneity in Gene Expression 56 | Studies by Surrogate Variable Analysis. PLoS Genet 3(9): e161. 57 | doi:10.1371/journal.pgen.0030161 58 | 59 | Leek JT and Storey JD. (2008) A general framework for multiple testing 60 | dependence. Proceedings of the National Academy of Sciences, 105: 18718- 61 | 18723. 62 | } 63 | \seealso{ 64 | \code{\linkS4class{deSet}}, \code{\link{odp}} and 65 | \code{\link{lrt}} 66 | } 67 | 68 | -------------------------------------------------------------------------------- /man/betaCoef.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R 3 | \docType{methods} 4 | \name{betaCoef} 5 | \alias{betaCoef} 6 | \alias{betaCoef,deFit-method} 7 | \title{Regression coefficients from full model fit} 8 | \usage{ 9 | betaCoef(object) 10 | 11 | \S4method{betaCoef}{deFit}(object) 12 | } 13 | \arguments{ 14 | \item{object}{\code{S4 object}: \code{\linkS4class{deFit}}} 15 | } 16 | \value{ 17 | \code{betaCoef} returns the regression coefficients for the full 18 | model fit. 19 | } 20 | \description{ 21 | Access the full model fitted coefficients of a 22 | \code{\linkS4class{deFit}} object. 23 | } 24 | \examples{ 25 | # import data 26 | library(splines) 27 | data(kidney) 28 | age <- kidney$age 29 | sex <- kidney$sex 30 | kidexpr <- kidney$kidexpr 31 | cov <- data.frame(sex = sex, age = age) 32 | 33 | # create models 34 | null_model <- ~sex 35 | full_model <- ~sex + ns(age, df = 4) 36 | 37 | # create deSet object from data 38 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 39 | full.model = full_model) 40 | 41 | # run fit_models to get model fits 42 | de_fit <- fit_models(de_obj) 43 | 44 | # extract beta coefficients 45 | beta <- betaCoef(de_fit) 46 | } 47 | \author{ 48 | John Storey, Andrew Bass 49 | } 50 | \seealso{ 51 | \code{\link{fit_models}} 52 | } 53 | 54 | -------------------------------------------------------------------------------- /man/build_models.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/form_models.R 3 | \name{build_models} 4 | \alias{build_models} 5 | \title{Generate a deSet object with full and null models} 6 | \usage{ 7 | build_models(data, cov, full.model = NULL, null.model = NULL, ind = NULL) 8 | } 9 | \arguments{ 10 | \item{data}{\code{matrix}: gene expression data.} 11 | 12 | \item{cov}{\code{data.frame}: the covariates in the study.} 13 | 14 | \item{full.model}{\code{formula}: the adjustment and the biological 15 | variables of interest.} 16 | 17 | \item{null.model}{\code{formula}: the adjustment variables.} 18 | 19 | \item{ind}{\code{factor}: individuals sampled in the study. Default is 20 | NULL. Optional.} 21 | } 22 | \value{ 23 | \code{\linkS4class{deSet}} object 24 | } 25 | \description{ 26 | \code{build_models} creates a \code{\link{deSet}} object. The user inputs 27 | the full and null models. 28 | } 29 | \examples{ 30 | # create ExpressionSet object from kidney dataset 31 | library(splines) 32 | data(kidney) 33 | age <- kidney$age 34 | sex <- kidney$sex 35 | kidexpr <- kidney$kidexpr 36 | cov <- data.frame(sex = sex, age = age) 37 | 38 | # create models 39 | null.model <- ~sex 40 | full.model <- ~sex + ns(age, df=4) 41 | 42 | # create deSet object from data 43 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null.model, 44 | full.model = full.model) 45 | } 46 | \author{ 47 | John Storey, Andy Bass 48 | } 49 | \seealso{ 50 | \code{\linkS4class{deSet}}, \code{\link{build_study}} 51 | } 52 | 53 | -------------------------------------------------------------------------------- /man/build_study.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/form_models.R 3 | \name{build_study} 4 | \alias{build_study} 5 | \title{Formulates the experimental models} 6 | \usage{ 7 | build_study(data, grp = NULL, adj.var = NULL, bio.var = NULL, 8 | tme = NULL, ind = NULL, sampling = c("static", "timecourse"), 9 | basis.df = 2, basis.type = c("ncs", "poly")) 10 | } 11 | \arguments{ 12 | \item{data}{\code{matrix}: gene expression data (rows are genes, columns are 13 | samples).} 14 | 15 | \item{grp}{\code{vector}: group assignement in the study (for K-class 16 | studies). Optional.} 17 | 18 | \item{adj.var}{\code{matrix}: adjustment variables. Optional.} 19 | 20 | \item{bio.var}{\code{matrix}: biological variables. Optional.} 21 | 22 | \item{tme}{\code{vector}: time variable in a time course study. Optional.} 23 | 24 | \item{ind}{\code{factor}: individual factor for repeated observations of the 25 | same individuals. Optional.} 26 | 27 | \item{sampling}{\code{string}: type of study. Either "static" or 28 | "timecourse". Default is "static".} 29 | 30 | \item{basis.df}{\code{numeric}: degrees of freedom of the basis for time 31 | course study. Default is 2.} 32 | 33 | \item{basis.type}{\code{string}: either "ncs" (natural cubic spline) or "ps" 34 | (polynomial spline) basis for time course study. Default is "ncs".} 35 | } 36 | \value{ 37 | \code{\linkS4class{deSet}} object 38 | } 39 | \description{ 40 | \code{build_study} generates the full and null models for users unfamiliar 41 | with building models in R. There are two types of experimental designs: 42 | static and time-course. For more details, refer to the vignette. 43 | } 44 | \examples{ 45 | # create ExpressionSet object from kidney dataset 46 | library(splines) 47 | data(kidney) 48 | age <- kidney$age 49 | sex <- kidney$sex 50 | kidexpr <- kidney$kidexpr 51 | 52 | # create deSet object from data 53 | de_obj <- build_study(data = kidexpr, adj.var = sex, tme = age, 54 | sampling = "timecourse", basis.df = 4) 55 | } 56 | \author{ 57 | John Storey, Andy Bass 58 | } 59 | \seealso{ 60 | \code{\linkS4class{deSet}}, \code{\link{build_models}} 61 | } 62 | 63 | -------------------------------------------------------------------------------- /man/deFit-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllClasses.R 3 | \docType{class} 4 | \name{deFit-class} 5 | \alias{deFit-class} 6 | \title{The differential expression class for the model fits} 7 | \description{ 8 | Object returned from \code{\link{fit_models}} containing information 9 | regarding the model fits for the experiment. 10 | } 11 | \section{Slots}{ 12 | 13 | \describe{ 14 | \item{\code{fit.full}}{\code{matrix}: containing fitted values for the full model.} 15 | 16 | \item{\code{fit.null}}{\code{matrix}: containing fitted values for the null model.} 17 | 18 | \item{\code{res.full}}{\code{matrix}: the residuals of the full model.} 19 | 20 | \item{\code{res.null}}{\code{matrix}: the residuals of the null model.} 21 | 22 | \item{\code{dH.full}}{\code{vector}: contains diagonal elements in the projection 23 | matrix for the full model.} 24 | 25 | \item{\code{beta.coef}}{\code{matrix}: fitted coefficients for the full model.} 26 | 27 | \item{\code{stat.type}}{\code{string}: information on the statistic of interest. 28 | Currently, the only options are ``lrt'' and ``odp''.} 29 | }} 30 | \section{Methods}{ 31 | 32 | \describe{ 33 | \item{\code{fitNull(deFit)}}{Access fitted data from null model.} 34 | \item{\code{fitFull(deFit)}}{Access fitted data from full model.} 35 | \item{\code{resNull(deFit)}}{Access residuals from null model fit.} 36 | \item{\code{resFull(deFit)}}{Access residuals from full model fit.} 37 | \item{\code{betaCoef(deFit)}}{Access beta coefficients in linear model.} 38 | \item{\code{sType(deFit)}}{Access statistic type of model fitting utilized 39 | in function.} 40 | } 41 | } 42 | \author{ 43 | John Storey, Jeffrey Leek, Andrew Bass 44 | } 45 | 46 | -------------------------------------------------------------------------------- /man/deSet-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllClasses.R 3 | \docType{class} 4 | \name{deSet-class} 5 | \alias{deSet-class} 6 | \title{The differential expression class (deSet)} 7 | \description{ 8 | The \code{deSet} class extends the \code{\link{ExpressionSet}} class. 9 | While the \code{ExpressionSet} class contains information about the 10 | experiment, the \code{deSet} class contains both experimental information and 11 | additional information relevant for differential expression analysis, 12 | explained below in Slots. 13 | } 14 | \section{Slots}{ 15 | 16 | \describe{ 17 | \item{\code{null.model}}{\code{formula}: contains the adjustment variables in the 18 | experiment. The null model is used for comparison when fitting the 19 | full model.} 20 | 21 | \item{\code{full.model}}{\code{formula}: contains the adjustment variables and the 22 | biological variables of interest.} 23 | 24 | \item{\code{null.matrix}}{\code{matrix}: the null model as a matrix.} 25 | 26 | \item{\code{full.matrix}}{\code{matrix}: the full model as a matrix.} 27 | 28 | \item{\code{individual}}{\code{factor}: contains information on which sample 29 | is from which individual in the experiment.} 30 | 31 | \item{\code{qvalueObj}}{\code{S3 object}: containing \code{qvalue} object. 32 | See \code{\link{qvalue}} for additional details.} 33 | }} 34 | \note{ 35 | See \code{\link{ExpressionSet}} for additional slot information. 36 | } 37 | \section{Methods}{ 38 | 39 | \describe{ 40 | \item{\code{as(ExpressionSet, "deSet")}}{Coerce objects of 41 | \code{ExpressionSet} to \code{deSet}.} 42 | \item{\code{lrt(deSet, ...)}}{Performs a generalized likelihood ratio test 43 | using the full and null models.} 44 | \item{\code{odp(deSet, ...)}}{Performs the optimal discovery procedure, 45 | which is a new approach for optimally performing many hypothesis tests in 46 | a high-dimensional study.} 47 | \item{\code{kl_clust(deSet, ...)}}{An implementation of mODP that assigns 48 | genes to modules based off of the Kullback-Leibler distance.} 49 | \item{\code{fit_models(deSet, ...)}}{Fits a linear model to each gene by 50 | method of least squares.} 51 | \item{\code{apply_qvalue(deSet, ...)}}{Applies \code{\link{qvalue}} 52 | function.} 53 | \item{\code{apply_snm(deSet, ...)}}{Applies surpervised normalization of 54 | microarrays (\code{\link{snm}}) on gene expression data.} 55 | \item{\code{apply_sva(deSet, ...)}}{Applies surrogate variable analysis 56 | (\code{\link{sva}}).} 57 | \item{\code{fullMatrix(deSet)}}{Access and set full matrix.} 58 | \item{\code{nullMatrix(deSet)}}{Access and set null matrix.} 59 | \item{\code{fullModel(deSet)}}{Access and set full model.} 60 | \item{\code{nullModel(deSet)}}{Access and set null model.} 61 | \item{\code{individual(deSet)}}{Access and set individual slot.} 62 | \item{\code{qvalueObj(deSet)}}{Access \code{qvalue} object. 63 | See \code{\link{qvalue}}.} 64 | \item{\code{validObject(deSet)}}{Check validity of \code{deSet} object.} 65 | } 66 | } 67 | \author{ 68 | John Storey, Jeffrey Leek, Andrew Bass 69 | } 70 | 71 | -------------------------------------------------------------------------------- /man/deSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/ExpressionSet-methods.R 3 | \docType{methods} 4 | \name{deSet} 5 | \alias{deSet} 6 | \alias{deSet,ExpressionSet-method} 7 | \title{Create a deSet object from an ExpressionSet} 8 | \usage{ 9 | deSet(object, full.model, null.model, individual = NULL) 10 | 11 | \S4method{deSet}{ExpressionSet}(object, full.model, null.model, 12 | individual = NULL) 13 | } 14 | \arguments{ 15 | \item{object}{\code{S4 object}: \code{\link{ExpressionSet}}} 16 | 17 | \item{full.model}{\code{formula}: full model containing the both the 18 | adjustment and the biological variables for the experiment.} 19 | 20 | \item{null.model}{\code{formula}: null model containing the adjustment 21 | variables for the experiment.} 22 | 23 | \item{individual}{\code{factor}: information on repeated samples in 24 | experiment.} 25 | } 26 | \value{ 27 | \code{\linkS4class{deSet}} object 28 | } 29 | \description{ 30 | Creates a \code{\linkS4class{deSet}} object that extends the 31 | \code{\link{ExpressionSet}} object. 32 | } 33 | \note{ 34 | It is essential that the null and full models have the same variables 35 | as the ExpressionSet phenoType column names. 36 | } 37 | \examples{ 38 | # import data 39 | library(splines) 40 | data(kidney) 41 | age <- kidney$age 42 | sex <- kidney$sex 43 | kidexpr <- kidney$kidexpr 44 | cov <- data.frame(sex = sex, age = age) 45 | pDat <- as(cov, "AnnotatedDataFrame") 46 | exp_set <- ExpressionSet(assayData = kidexpr, phenoData = pDat) 47 | 48 | # create models 49 | null_model <- ~sex 50 | full_model <- ~sex + ns(age, df = 4) 51 | 52 | # create deSet object from data 53 | de_obj <- deSet(exp_set, null.model = null_model, 54 | full.model = full_model) 55 | 56 | # optionally add individuals to experiment, in this case there are 36 57 | # individuals that were sampled twice 58 | indSamples <- as.factor(rep(1:36, each = 2)) 59 | de_obj <- deSet(exp_set, null.model = null_model, 60 | full.model = full_model, ind = indSamples) 61 | summary(de_obj) 62 | } 63 | \author{ 64 | John Storey, Andrew Bass 65 | } 66 | \seealso{ 67 | \code{\linkS4class{deSet}}, \code{\link{odp}} and 68 | \code{\link{lrt}} 69 | } 70 | 71 | -------------------------------------------------------------------------------- /man/edge.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/edge.R 3 | \docType{package} 4 | \name{edge} 5 | \alias{edge} 6 | \alias{edge-package} 7 | \title{Extraction of Differential Gene Expression} 8 | \description{ 9 | The edge package implements methods for carrying out differential 10 | expression analyses of genome-wide gene expression studies. Significance 11 | testing using the optimal discovery procedure and generalized likelihood 12 | ratio tests (equivalent to F-tests and t-tests) are implemented for general study 13 | designs. Special functions are available to facilitate the analysis of 14 | common study designs, including time course experiments. Other packages 15 | such as snm, sva, and qvalue are integrated in edge to provide a wide range 16 | of tools for gene expression analysis. 17 | } 18 | \examples{ 19 | \dontrun{ 20 | browseVignettes("edge") 21 | } 22 | } 23 | \author{ 24 | John Storey, Jeffrey Leek, Andrew Bass 25 | } 26 | 27 | -------------------------------------------------------------------------------- /man/endotoxin.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/edge.R 3 | \docType{data} 4 | \name{endotoxin} 5 | \alias{endotoxin} 6 | \title{Gene expression dataset from Calvano et al. (2005) Nature} 7 | \format{\itemize{ 8 | \item endoexpr: A 500 rows by 46 columns data frame containing expression 9 | values. 10 | \item class: A vector of length 46 containing information about which 11 | individuals were given endotoxin. 12 | \item ind: A vector of length 46 providing indexing measurements for each 13 | individual in the experiment. 14 | \item time: A vector of length 46 indicating time measurements. 15 | }} 16 | \usage{ 17 | data(endotoxin) 18 | } 19 | \value{ 20 | endotoxin dataset 21 | } 22 | \description{ 23 | The data provide gene expression measurements in an endotoxin study where 24 | four subjects were given endotoxin and four subjects were given a placebo. 25 | Blood samples were collected and leukocytes were isolated from the samples 26 | before infusion and at times 2, 4, 6, 9, 24 hours. 27 | } 28 | \note{ 29 | The data is a random subset of 500 genes from the full dataset. To 30 | download the full data set, go to \url{http://genomine.org/edge/}. 31 | } 32 | \examples{ 33 | library(splines) 34 | # import data 35 | data(endotoxin) 36 | ind <- endotoxin$ind 37 | class <- endotoxin$class 38 | time <- endotoxin$time 39 | endoexpr <- endotoxin$endoexpr 40 | cov <- data.frame(individual = ind, time = time, class = class) 41 | 42 | # formulate null and full models in experiement 43 | # note: interaction term is a way of taking into account group effects 44 | mNull <- ~ns(time, df=4, intercept = FALSE) + class 45 | mFull <- ~ns(time, df=4, intercept = FALSE) + 46 | ns(time, df=4, intercept = FALSE):class + class 47 | 48 | # create deSet object 49 | de_obj <- build_models(endoexpr, cov = cov, full.model = mFull, 50 | null.model = mNull, ind = ind) 51 | 52 | # Perform ODP/lrt statistic to determine significant genes in study 53 | de_odp <- odp(de_obj, bs.its = 10) 54 | de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 10) 55 | 56 | # summarize significance results 57 | summary(de_odp) 58 | } 59 | \references{ 60 | Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance 61 | analysis of time course microarray experiments. PNAS, 102: 12837-12842. \cr 62 | \url{http://www.pnas.org/content/100/16/9440.full} 63 | } 64 | \keyword{datasets} 65 | 66 | -------------------------------------------------------------------------------- /man/fitFull.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R 3 | \docType{methods} 4 | \name{fitFull} 5 | \alias{fitFull} 6 | \alias{fitFull,deFit-method} 7 | \title{Fitted data from the full model} 8 | \usage{ 9 | fitFull(object) 10 | 11 | \S4method{fitFull}{deFit}(object) 12 | } 13 | \arguments{ 14 | \item{object}{\code{S4 object}: \code{\linkS4class{deFit}}} 15 | } 16 | \value{ 17 | \code{fitFull} returns a matrix of fitted values from full model. 18 | } 19 | \description{ 20 | Access the fitted data from the full model in a 21 | \code{\linkS4class{deFit}} object. 22 | } 23 | \examples{ 24 | # import data 25 | library(splines) 26 | data(kidney) 27 | age <- kidney$age 28 | sex <- kidney$sex 29 | kidexpr <- kidney$kidexpr 30 | cov <- data.frame(sex = sex, age = age) 31 | 32 | # create models 33 | null_model <- ~sex 34 | full_model <- ~sex + ns(age, df = 4) 35 | 36 | # create deSet object from data 37 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 38 | full.model = full_model) 39 | 40 | # run fit_models to get model fits 41 | de_fit <- fit_models(de_obj) 42 | 43 | # extract fitted values for full model 44 | fitted_full <- fitFull(de_fit) 45 | } 46 | \author{ 47 | John Storey, Andrew Bass 48 | } 49 | \seealso{ 50 | \code{\link{fit_models}} 51 | } 52 | 53 | -------------------------------------------------------------------------------- /man/fitNull.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R 3 | \docType{methods} 4 | \name{fitNull} 5 | \alias{fitNull} 6 | \alias{fitNull,deFit-method} 7 | \title{Fitted data from the null model} 8 | \usage{ 9 | fitNull(object) 10 | 11 | \S4method{fitNull}{deFit}(object) 12 | } 13 | \arguments{ 14 | \item{object}{\code{S4 object}: \code{\linkS4class{deFit}}} 15 | } 16 | \value{ 17 | \code{fitNull} returns a matrix of fitted values from null model. 18 | } 19 | \description{ 20 | Access the fitted data from the null model in an 21 | \code{\linkS4class{deFit}} object. 22 | } 23 | \examples{ 24 | # import data 25 | library(splines) 26 | data(kidney) 27 | age <- kidney$age 28 | sex <- kidney$sex 29 | kidexpr <- kidney$kidexpr 30 | cov <- data.frame(sex = sex, age = age) 31 | 32 | # create models 33 | null_model <- ~sex 34 | full_model <- ~sex + ns(age, df = 4) 35 | 36 | # create deSet object from data 37 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 38 | full.model = full_model) 39 | 40 | # run fit_models to get model fits 41 | de_fit <- fit_models(de_obj) 42 | 43 | # extract fitted values from null model 44 | fitted_null <- fitNull(de_fit) 45 | } 46 | \author{ 47 | John Storey, Andrew Bass 48 | } 49 | \seealso{ 50 | \code{\link{fit_models}} 51 | } 52 | 53 | -------------------------------------------------------------------------------- /man/fit_models.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R 3 | \docType{methods} 4 | \name{fit_models} 5 | \alias{fit_models} 6 | \alias{fit_models,deSet-method} 7 | \title{Linear regression of the null and full models} 8 | \usage{ 9 | fit_models(object, stat.type = c("lrt", "odp"), weights = NULL) 10 | 11 | \S4method{fit_models}{deSet}(object, stat.type = c("lrt", "odp"), 12 | weights = NULL) 13 | } 14 | \arguments{ 15 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}.} 16 | 17 | \item{stat.type}{\code{character}: type of statistic to be used. Either 18 | "lrt" or "odp". Default is "lrt".} 19 | 20 | \item{weights}{\code{matrix}: weights for each observation. Default is NULL.} 21 | } 22 | \value{ 23 | \code{\linkS4class{deFit}} object 24 | } 25 | \description{ 26 | \code{fit_models} fits a model matrix to each gene by using the least 27 | squares method. Model fits can be either statistic type "odp" (optimal 28 | discovery procedure) or "lrt" (likelihood ratio test). 29 | } 30 | \details{ 31 | If "odp" method is implemented then the null model is removed from the full 32 | model (see Storey 2007). Otherwise, the statistic type has no affect on the 33 | model fit. 34 | } 35 | \note{ 36 | \code{fit_models} does not have to be called by the user to use 37 | \code{\link{odp}}, \code{\link{lrt}} or \code{\link{kl_clust}} as it is an 38 | optional input and is implemented in the methods. The 39 | \code{\linkS4class{deFit}} object can be created by the user if a different 40 | statistical implementation is required. 41 | } 42 | \examples{ 43 | # import data 44 | library(splines) 45 | data(kidney) 46 | age <- kidney$age 47 | sex <- kidney$sex 48 | kidexpr <- kidney$kidexpr 49 | cov <- data.frame(sex = sex, age = age) 50 | 51 | # create models 52 | null_model <- ~sex 53 | full_model <- ~sex + ns(age, df = 4) 54 | 55 | # create deSet object from data 56 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 57 | full.model = full_model) 58 | 59 | # retrieve statistics from linear regression for each gene 60 | fit_lrt <- fit_models(de_obj, stat.type = "lrt") # lrt method 61 | fit_odp <- fit_models(de_obj, stat.type = "odp") # odp method 62 | 63 | # summarize object 64 | summary(fit_odp) 65 | } 66 | \author{ 67 | John Storey 68 | } 69 | \references{ 70 | Storey JD. (2007) The optimal discovery procedure: A new approach to 71 | simultaneous significance testing. Journal of the Royal Statistical 72 | Society, Series B, 69: 347-368. 73 | 74 | Storey JD, Dai JY, and Leek JT. (2007) The optimal discovery procedure for 75 | large-scale significance testing, with applications to comparative 76 | microarray experiments. Biostatistics, 8: 414-432. 77 | 78 | Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance 79 | analysis of time course microarray experiments. Proceedings of the National 80 | Academy of Sciences, 102: 12837-12842. 81 | } 82 | \seealso{ 83 | \code{\linkS4class{deFit}}, \code{\link{odp}} and 84 | \code{\link{lrt}} 85 | } 86 | 87 | -------------------------------------------------------------------------------- /man/fullMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R, R/setMethods.R 3 | \docType{methods} 4 | \name{fullMatrix} 5 | \alias{fullMatrix} 6 | \alias{fullMatrix,deSet-method} 7 | \alias{fullMatrix<-} 8 | \alias{fullMatrix<-,deSet-method} 9 | \title{Matrix representation of full model} 10 | \usage{ 11 | fullMatrix(object) 12 | 13 | fullMatrix(object) <- value 14 | 15 | \S4method{fullMatrix}{deSet}(object) 16 | 17 | \S4method{fullMatrix}{deSet}(object) <- value 18 | } 19 | \arguments{ 20 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}} 21 | 22 | \item{value}{\code{matrix}: full model matrix where the columns are the 23 | covariates and rows are observations} 24 | } 25 | \value{ 26 | \code{fullMatrix} returns the value of the full model matrix. 27 | } 28 | \description{ 29 | These generic functions access and set the full matrix for 30 | \code{\linkS4class{deSet}} object. 31 | } 32 | \examples{ 33 | # import data 34 | library(splines) 35 | data(kidney) 36 | age <- kidney$age 37 | sex <- kidney$sex 38 | kidexpr <- kidney$kidexpr 39 | cov <- data.frame(sex = sex, age = age) 40 | 41 | # create models 42 | null_model <- ~sex 43 | full_model <- ~sex + ns(age, df = 4) 44 | 45 | # create deSet object from data 46 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 47 | full.model = full_model) 48 | 49 | # extract the full model equation as a matrix 50 | mat_full <- fullMatrix(de_obj) 51 | } 52 | \author{ 53 | Andrew Bass, John Storey 54 | } 55 | \seealso{ 56 | \code{\linkS4class{deSet}}, \code{\link{fullModel}} 57 | } 58 | 59 | -------------------------------------------------------------------------------- /man/fullModel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R, R/setMethods.R 3 | \docType{methods} 4 | \name{fullModel} 5 | \alias{fullModel} 6 | \alias{fullModel,deSet-method} 7 | \alias{fullModel<-} 8 | \alias{fullModel<-,deSet-method} 9 | \title{Full model equation} 10 | \usage{ 11 | fullModel(object) 12 | 13 | fullModel(object) <- value 14 | 15 | \S4method{fullModel}{deSet}(object) 16 | 17 | \S4method{fullModel}{deSet}(object) <- value 18 | } 19 | \arguments{ 20 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}} 21 | 22 | \item{value}{\code{formula}: The experiment design for the full model.} 23 | } 24 | \value{ 25 | the formula for the full model. 26 | } 27 | \description{ 28 | These generic functions access and set the full model for 29 | \code{\linkS4class{deSet}} object. 30 | } 31 | \examples{ 32 | # import data 33 | library(splines) 34 | data(kidney) 35 | age <- kidney$age 36 | sex <- kidney$sex 37 | kidexpr <- kidney$kidexpr 38 | cov <- data.frame(sex = sex, age = age) 39 | 40 | # create models 41 | null_model <- ~sex 42 | full_model <- ~sex + ns(age, df = 4) 43 | 44 | # create deSet object from data 45 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 46 | full.model = full_model) 47 | 48 | # extract out the full model equation 49 | mod_full <- fullModel(de_obj) 50 | 51 | # change the full model in the experiment 52 | fullModel(de_obj) <- ~sex + ns(age, df = 2) 53 | } 54 | \author{ 55 | John Storey, Andrew Bass 56 | } 57 | \seealso{ 58 | \code{\linkS4class{deSet}} 59 | } 60 | 61 | -------------------------------------------------------------------------------- /man/gibson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/edge.R 3 | \docType{data} 4 | \name{gibson} 5 | \alias{gibson} 6 | \title{Gene expression dataset from Idaghdour et al. (2008)} 7 | \format{\itemize{ 8 | \item batch: Batches in experiment. 9 | \item location: Environment/lifestyle of Moroccan Amazigh groups. 10 | \item gender: Sex of individuals. 11 | \item gibexpr: A 500 rows by 46 columns matrix of gene expression values. 12 | }} 13 | \usage{ 14 | data(gibson) 15 | } 16 | \value{ 17 | gibson dataset 18 | } 19 | \description{ 20 | The data provide gene expression measurements in peripheral blood leukocyte 21 | samples from three Moroccan groups leading distinct ways of life: 22 | desert nomadic (DESERT), mountain agrarian (VILLAGE), and coastal urban 23 | (AGADIR). 24 | } 25 | \note{ 26 | These data are a random subset of 500 genes from the total number of genes 27 | in the original data set. To download the full data set, go to 28 | \url{http://genomine.org/de/}. 29 | } 30 | \examples{ 31 | # import 32 | data(gibson) 33 | batch <- gibson$batch 34 | gender <- gibson$gender 35 | location <- gibson$location 36 | gibexpr <- gibson$gibexpr 37 | cov <- data.frame(Batch = batch, Gender = gender, 38 | Location = location) 39 | 40 | # create deSet for experiment- static experiment 41 | mNull <- ~Gender + Batch 42 | mFull <- ~Gender + Batch + Location 43 | 44 | # create deSet object 45 | de_obj <- build_models(gibexpr, cov = cov, full.model = mFull, 46 | null.model = mNull) 47 | 48 | # Perform ODP/lrt statistic to determine significant genes in study 49 | de_odp <- odp(de_obj, bs.its = 10) 50 | de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 10) 51 | 52 | # summarize significance results 53 | summary(de_odp) 54 | } 55 | \references{ 56 | Idaghdour Y, Storey JD, Jadallah S, and Gibson G. (2008) A genome-wide gene 57 | expression signature of lifestyle in peripheral blood of Moroccan Amazighs. 58 | PLoS Genetics, 4: e1000052. 59 | } 60 | \keyword{datasets} 61 | 62 | -------------------------------------------------------------------------------- /man/individual.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R, R/setMethods.R 3 | \docType{methods} 4 | \name{individual} 5 | \alias{individual} 6 | \alias{individual,deSet-method} 7 | \alias{individual<-} 8 | \alias{individual<-,deSet-method} 9 | \title{Individuals sampled in experiment} 10 | \usage{ 11 | individual(object) 12 | 13 | individual(object) <- value 14 | 15 | \S4method{individual}{deSet}(object) 16 | 17 | \S4method{individual}{deSet}(object) <- value 18 | } 19 | \arguments{ 20 | \item{object}{\code{\linkS4class{deSet}}} 21 | 22 | \item{value}{\code{factor}: Identifies which samples correspond to which 23 | individuals. Important if the same individuals are sampled multiple times 24 | in a longitudinal fashion.} 25 | } 26 | \value{ 27 | \code{individual} returns information regarding dinstinct individuals 28 | sampled in the experiment. 29 | } 30 | \description{ 31 | These generic functions access and set the individual slot in 32 | \code{\linkS4class{deSet}}. 33 | } 34 | \examples{ 35 | library(splines) 36 | # import data 37 | data(endotoxin) 38 | ind <- endotoxin$ind 39 | time <- endotoxin$time 40 | class <- endotoxin$class 41 | endoexpr <- endotoxin$endoexpr 42 | cov <- data.frame(individual = ind, time = time, class = class) 43 | 44 | # create ExpressionSet object 45 | pDat <- as(cov, "AnnotatedDataFrame") 46 | exp_set <- ExpressionSet(assayData = endoexpr, phenoData = pDat) 47 | 48 | # formulate null and full models in experiement 49 | # note: interaction term is a way of taking into account group effects 50 | mNull <- ~ns(time, df=4, intercept = FALSE) 51 | mFull <- ~ns(time, df=4, intercept = FALSE) + 52 | ns(time, df=4, intercept = FALSE):class + class 53 | 54 | # create deSet object 55 | de_obj <- deSet(exp_set, full.model = mFull, null.model = mNull, 56 | individual = ind) 57 | 58 | # extract out the individuals factor 59 | ind_exp <- individual(de_obj) 60 | } 61 | \author{ 62 | John Storey, Andrew Bass 63 | } 64 | \seealso{ 65 | \code{\linkS4class{deSet}} 66 | } 67 | 68 | -------------------------------------------------------------------------------- /man/kidney.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/edge.R 3 | \docType{data} 4 | \name{kidney} 5 | \alias{kidney} 6 | \title{Gene expression dataset from Rodwell et al. (2004)} 7 | \format{\itemize{ 8 | \item kidcov: A 133 rows by 6 columns data frame detailing the study 9 | design. 10 | \item kidexpr: A 500 rows by 133 columns matrix of gene expression values, 11 | where each row corresponds to a different probe-set and each column to a 12 | different tissue sample. 13 | \item age: A vector of length 133 giving the age of each sample. 14 | \item sex: A vector of length 133 giving the sex of each sample. 15 | }} 16 | \usage{ 17 | data(kidney) 18 | } 19 | \value{ 20 | kidney dataset 21 | } 22 | \description{ 23 | Gene expression measurements from kidney samples were obtained from 72 24 | human subjects ranging in age from 27 to 92 years. Only one array was 25 | obtained per individual, and the age and sex of each individual were 26 | recorded. 27 | } 28 | \note{ 29 | These data are a random subset of 500 probe-sets from the total number of 30 | probe-sets in the original data set. To download the full data set, go to 31 | \url{http://genomine.org/edge/}. The \code{age} and \code{sex} are contained 32 | in \code{kidcov} data frame. 33 | } 34 | \examples{ 35 | # import data 36 | data(kidney) 37 | sex <- kidney$sex 38 | age <- kidney$age 39 | kidexpr <- kidney$kidexpr 40 | 41 | # create model 42 | de_obj <- build_study(data = kidexpr, adj.var = sex, tme = age, 43 | sampling = "timecourse", basis.df = 4) 44 | 45 | # use the ODP/lrt method to determine significant genes 46 | de_odp <- odp(de_obj, bs.its=10) 47 | de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 10) 48 | 49 | # summarize significance results 50 | summary(de_odp) 51 | } 52 | \references{ 53 | Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance 54 | analysis of time course microarray experiments. PNAS, 102: 12837-12842. \cr 55 | \url{http://www.pnas.org/content/100/16/9440.full} 56 | } 57 | \keyword{datasets} 58 | 59 | -------------------------------------------------------------------------------- /man/kl_clust.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R 3 | \docType{methods} 4 | \name{kl_clust} 5 | \alias{kl_clust} 6 | \alias{kl_clust,deSet,deFit-method} 7 | \alias{kl_clust,deSet,missing-method} 8 | \title{Modular optimal discovery procedure (mODP)} 9 | \usage{ 10 | kl_clust(object, de.fit = NULL, n.mods = 50) 11 | 12 | \S4method{kl_clust}{deSet,missing}(object, de.fit = NULL, n.mods = 50) 13 | 14 | \S4method{kl_clust}{deSet,deFit}(object, de.fit = NULL, n.mods = 50) 15 | } 16 | \arguments{ 17 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}.} 18 | 19 | \item{de.fit}{\code{S4 object}: \code{\linkS4class{deFit}}.} 20 | 21 | \item{n.mods}{\code{integer}: number of modules (i.e., clusters).} 22 | } 23 | \value{ 24 | A list with the following slots: 25 | \itemize{ 26 | \item {mu.full: cluster averaged fitted values from full model.} 27 | \item {mu.null: cluster averaged fitted values from null model.} 28 | \item {sig.full: cluster standard deviations from full model.} 29 | \item {sig.null: cluster standard deviations from null model.} 30 | \item {n.per.mod: total members in each cluster.} 31 | \item {clustMembers: cluster membership for each gene.} 32 | } 33 | } 34 | \description{ 35 | \code{kl_clust} is an implementation of mODP that assigns genes to modules 36 | based on of the Kullback-Leibler distance. 37 | } 38 | \details{ 39 | mODP utilizes a k-means clustering algorithm where genes are 40 | assigned to a cluster based on the Kullback-Leiber distance. Each gene is 41 | assigned an module-average parameter to calculate the ODP score (See Woo, 42 | Leek and Storey 2010 for more details). The mODP and full ODP produce nearly 43 | exact results but mODP has the advantage of being computationally 44 | faster. 45 | } 46 | \note{ 47 | The results are generally insensitive to the number of modules after a 48 | certain threshold of about n.mods>=50 in our experience. It is recommended 49 | that users experiment with the number of modules. If the number of modules 50 | is equal to the number of genes then the original ODP is implemented. 51 | } 52 | \examples{ 53 | # import data 54 | library(splines) 55 | data(kidney) 56 | age <- kidney$age 57 | sex <- kidney$sex 58 | kidexpr <- kidney$kidexpr 59 | cov <- data.frame(sex = sex, age = age) 60 | 61 | # create models 62 | null_model <- ~sex 63 | full_model <- ~sex + ns(age, df = 4) 64 | 65 | # create deSet object from data 66 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 67 | full.model = full_model) 68 | 69 | # mODP method 70 | de_clust <- kl_clust(de_obj) 71 | 72 | # change the number of clusters 73 | de_clust <- kl_clust(de_obj, n.mods = 10) 74 | 75 | # input a deFit object 76 | de_fit <- fit_models(de_obj, stat.type = "odp") 77 | de_clust <- kl_clust(de_obj, de.fit = de_fit) 78 | } 79 | \author{ 80 | John Storey, Jeffrey Leek 81 | } 82 | \references{ 83 | Storey JD. (2007) The optimal discovery procedure: A new approach to 84 | simultaneous significance testing. Journal of the Royal Statistical 85 | Society, Series B, 69: 347-368. 86 | 87 | Storey JD, Dai JY, and Leek JT. (2007) The optimal discovery procedure for 88 | large-scale significance testing, with applications to comparative 89 | microarray experiments. Biostatistics, 8: 414-432. 90 | 91 | Woo S, Leek JT, Storey JD (2010) A computationally efficient modular optimal 92 | discovery procedure. Bioinformatics, 27(4): 509-515. 93 | } 94 | \seealso{ 95 | \code{\link{odp}}, \code{\link{fit_models}} 96 | } 97 | 98 | -------------------------------------------------------------------------------- /man/lrt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R 3 | \docType{methods} 4 | \name{lrt} 5 | \alias{lrt} 6 | \alias{lrt,deSet,deFit-method} 7 | \alias{lrt,deSet,missing-method} 8 | \title{Performs F-test (likelihood ratio test using Normal likelihood)} 9 | \usage{ 10 | lrt(object, de.fit, nullDistn = c("normal", "bootstrap"), weights = NULL, 11 | bs.its = 100, seed = NULL, verbose = TRUE, mod.F = FALSE, ...) 12 | 13 | \S4method{lrt}{deSet,missing}(object, de.fit, nullDistn = c("normal", 14 | "bootstrap"), weights = NULL, bs.its = 100, seed = NULL, 15 | verbose = TRUE, mod.F = FALSE, ...) 16 | 17 | \S4method{lrt}{deSet,deFit}(object, de.fit, nullDistn = c("normal", 18 | "bootstrap"), weights = NULL, bs.its = 100, seed = NULL, 19 | verbose = TRUE, mod.F = FALSE, ...) 20 | } 21 | \arguments{ 22 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}.} 23 | 24 | \item{de.fit}{\code{S4 object}: \code{\linkS4class{deFit}}. Optional.} 25 | 26 | \item{nullDistn}{\code{character}: either "normal" or "bootstrap", If 27 | "normal" then the p-values are calculated using the F distribution. If 28 | "bootstrap" then a bootstrap algorithm is implemented to simulate 29 | statistics from the null distribution. In the "bootstrap" case, empirical 30 | p-values are calculated using the observed and null statistics (see 31 | \code{\link{empPvals}}). Default is "normal".} 32 | 33 | \item{weights}{\code{matrix}: weights for each observation. Default is NULL.} 34 | 35 | \item{bs.its}{\code{integer}: number of null statistics generated (only 36 | applicable for "bootstrap" method). Default is 100.} 37 | 38 | \item{seed}{\code{integer}: set the seed value. Default is NULL.} 39 | 40 | \item{verbose}{\code{boolean}: print iterations for bootstrap method. 41 | Default is TRUE.} 42 | 43 | \item{mod.F}{\code{boolean}: Moderated F-test, recommended for experiments 44 | with a small sample size. Default is FALSE.} 45 | 46 | \item{...}{Additional arguments for \code{\link{apply_qvalue}} and 47 | \code{\link{empPvals}} function.} 48 | } 49 | \value{ 50 | \code{\linkS4class{deSet}} object 51 | } 52 | \description{ 53 | \code{lrt} performs a generalized likelihood ratio test using the full and 54 | null models. 55 | } 56 | \details{ 57 | \code{lrt} fits the full and null models to each gene using the 58 | function \code{\link{fit_models}} and then performs a likelihood ratio test. 59 | The user has the option to calculate p-values a Normal distribution 60 | assumption or through a bootstrap algorithm. If \code{nullDistn} is 61 | "bootstrap" then empirical p-values will be determined from the 62 | \code{\link{qvalue}} package (see \code{\link{empPvals}}). 63 | } 64 | \examples{ 65 | # import data 66 | library(splines) 67 | data(kidney) 68 | age <- kidney$age 69 | sex <- kidney$sex 70 | kidexpr <- kidney$kidexpr 71 | cov <- data.frame(sex = sex, age = age) 72 | 73 | # create models 74 | null_model <- ~sex 75 | full_model <- ~sex + ns(age, df = 4) 76 | 77 | # create deSet object from data 78 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 79 | full.model = full_model) 80 | 81 | # lrt method 82 | de_lrt <- lrt(de_obj, nullDistn = "normal") 83 | 84 | # to generate p-values from bootstrap 85 | de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 30) 86 | 87 | # input a deFit object directly 88 | de_fit <- fit_models(de_obj, stat.type = "lrt") 89 | de_lrt <- lrt(de_obj, de.fit = de_fit) 90 | 91 | # summarize object 92 | summary(de_lrt) 93 | } 94 | \author{ 95 | John Storey, Andrew Bass 96 | } 97 | \references{ 98 | Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance 99 | analysis of time course microarray experiments. Proceedings of the National 100 | Academy of Sciences, 102: 12837-12842. 101 | 102 | \url{http://en.wikipedia.org/wiki/Likelihood-ratio_test} 103 | } 104 | \seealso{ 105 | \code{\linkS4class{deSet}}, \code{\link{build_models}}, 106 | \code{\link{odp}} 107 | } 108 | 109 | -------------------------------------------------------------------------------- /man/nullMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R, R/setMethods.R 3 | \docType{methods} 4 | \name{nullMatrix} 5 | \alias{nullMatrix} 6 | \alias{nullMatrix,deSet-method} 7 | \alias{nullMatrix<-} 8 | \alias{nullMatrix<-,deSet-method} 9 | \title{Matrix representation of null model} 10 | \usage{ 11 | nullMatrix(object) 12 | 13 | nullMatrix(object) <- value 14 | 15 | \S4method{nullMatrix}{deSet}(object) 16 | 17 | \S4method{nullMatrix}{deSet}(object) <- value 18 | } 19 | \arguments{ 20 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}} 21 | 22 | \item{value}{\code{matrix}: null model matrix where columns are covariates 23 | and rows are observations} 24 | } 25 | \value{ 26 | \code{nullMatrix} returns the value of the null model matrix. 27 | } 28 | \description{ 29 | These generic functions access and set the null matrix for 30 | \code{\linkS4class{deSet}} object. 31 | } 32 | \examples{ 33 | # import data 34 | library(splines) 35 | data(kidney) 36 | age <- kidney$age 37 | sex <- kidney$sex 38 | kidexpr <- kidney$kidexpr 39 | cov <- data.frame(sex = sex, age = age) 40 | 41 | # create models 42 | null_model <- ~sex 43 | full_model <- ~sex + ns(age, df = 4) 44 | 45 | # create deSet object from data 46 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 47 | full.model = full_model) 48 | 49 | # extract the null model as a matrix 50 | mat_null <- nullMatrix(de_obj) 51 | } 52 | \author{ 53 | John Storey, Andrew Bass 54 | } 55 | \seealso{ 56 | \code{\linkS4class{deSet}}, \code{\link{fullModel}} and 57 | \code{\link{fullModel}} 58 | } 59 | 60 | -------------------------------------------------------------------------------- /man/nullModel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R, R/setMethods.R 3 | \docType{methods} 4 | \name{nullModel} 5 | \alias{nullModel} 6 | \alias{nullModel,deSet-method} 7 | \alias{nullModel<-} 8 | \alias{nullModel<-,deSet-method} 9 | \title{Null model equation from deSet object} 10 | \usage{ 11 | nullModel(object) 12 | 13 | nullModel(object) <- value 14 | 15 | \S4method{nullModel}{deSet}(object) 16 | 17 | \S4method{nullModel}{deSet}(object) <- value 18 | } 19 | \arguments{ 20 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}} 21 | 22 | \item{value}{\code{formula}: The experiment design for the null model.} 23 | } 24 | \value{ 25 | \code{nullModel} returns the formula for the null model. 26 | } 27 | \description{ 28 | These generic functions access and set the null model for 29 | \code{\linkS4class{deSet}} object. 30 | } 31 | \examples{ 32 | # import data 33 | library(splines) 34 | data(kidney) 35 | age <- kidney$age 36 | sex <- kidney$sex 37 | kidexpr <- kidney$kidexpr 38 | cov <- data.frame(sex = sex, age = age) 39 | 40 | # create models 41 | null_model <- ~sex 42 | full_model <- ~sex + ns(age, df = 4) 43 | 44 | # create deSet object from data 45 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 46 | full.model = full_model) 47 | 48 | # extract the null model equation 49 | mod_null <- nullModel(de_obj) 50 | 51 | # change null model in experiment but must update full model 52 | nullModel(de_obj) <- ~1 53 | fullModel(de_obj) <- ~1 + ns(age, df=4) 54 | } 55 | \author{ 56 | John Storey, Andrew Bass 57 | } 58 | \seealso{ 59 | \code{\linkS4class{deSet}} 60 | } 61 | \keyword{nullModel,} 62 | \keyword{nullModel<-} 63 | 64 | -------------------------------------------------------------------------------- /man/odp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R 3 | \docType{methods} 4 | \name{odp} 5 | \alias{odp} 6 | \alias{odp,deSet,deFit-method} 7 | \alias{odp,deSet,missing-method} 8 | \title{The optimal discovery procedure} 9 | \usage{ 10 | odp(object, de.fit, odp.parms = NULL, weights = NULL, bs.its = 100, 11 | n.mods = 50, seed = NULL, verbose = TRUE, ...) 12 | 13 | \S4method{odp}{deSet,missing}(object, de.fit, odp.parms = NULL, 14 | weights = NULL, bs.its = 100, n.mods = 50, seed = NULL, 15 | verbose = TRUE, ...) 16 | 17 | \S4method{odp}{deSet,deFit}(object, de.fit, odp.parms = NULL, 18 | weights = NULL, bs.its = 100, n.mods = 50, seed = NULL, 19 | verbose = TRUE, ...) 20 | } 21 | \arguments{ 22 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}} 23 | 24 | \item{de.fit}{\code{S4 object}: \code{\linkS4class{deFit}}. Optional.} 25 | 26 | \item{odp.parms}{\code{list}: parameters for each cluster. See 27 | \code{\link{kl_clust}}.} 28 | 29 | \item{weights}{\code{matrix}: weights for each observation. Default is NULL.} 30 | 31 | \item{bs.its}{\code{numeric}: number of null bootstrap iterations. Default 32 | is 100.} 33 | 34 | \item{n.mods}{\code{integer}: number of clusters used in 35 | \code{\link{kl_clust}}. Default is 50.} 36 | 37 | \item{seed}{\code{integer}: set the seed value. Default is NULL.} 38 | 39 | \item{verbose}{\code{boolean}: print iterations for bootstrap method. 40 | Default is TRUE.} 41 | 42 | \item{...}{Additional arguments for \code{\link{qvalue}} and 43 | \code{\link{empPvals}}.} 44 | } 45 | \value{ 46 | \code{\linkS4class{deSet}} object 47 | } 48 | \description{ 49 | \code{odp} performs the optimal discovery procedure, which is a framework for 50 | optimally performing many hypothesis tests in a high-dimensional study. When 51 | testing whether a feature is significant, the optimal discovery procedure 52 | uses information across all features when testing for significance. 53 | } 54 | \details{ 55 | The full ODP estimator computationally grows quadratically with respect to 56 | the number of genes. This becomes computationally taxing at a certain point. 57 | Therefore, an alternative method called mODP is used which has been shown to 58 | provide results that are very similar. mODP utilizes a clustering algorithm 59 | where genes are assigned to a cluster based on the Kullback-Leiber distance. 60 | Each gene is assigned an module-average parameter to calculate the ODP score 61 | and it reduces the computations time to approximately linear (see Woo, Leek 62 | and Storey 2010). If the number of clusters is equal to the number of genes 63 | then the original ODP is implemented. Depending on the number of hypothesis 64 | tests, this can take some time. 65 | } 66 | \examples{ 67 | # import data 68 | library(splines) 69 | data(kidney) 70 | age <- kidney$age 71 | sex <- kidney$sex 72 | kidexpr <- kidney$kidexpr 73 | cov <- data.frame(sex = sex, age = age) 74 | 75 | # create models 76 | null_model <- ~sex 77 | full_model <- ~sex + ns(age, df = 4) 78 | 79 | # create deSet object from data 80 | de_obj <- build_models(data = kidexpr, cov = cov, 81 | null.model = null_model, full.model = full_model) 82 | 83 | # odp method 84 | de_odp <- odp(de_obj, bs.its = 30) 85 | 86 | # input a deFit object or ODP parameters ... not necessary 87 | de_fit <- fit_models(de_obj, stat.type = "odp") 88 | de_clust <- kl_clust(de_obj, n.mods = 10) 89 | de_odp <- odp(de_obj, de.fit = de_fit, odp.parms = de_clust, 90 | bs.its = 30) 91 | 92 | # summarize object 93 | summary(de_odp) 94 | } 95 | \author{ 96 | John Storey, Jeffrey Leek, Andrew Bass 97 | } 98 | \references{ 99 | Storey JD. (2007) The optimal discovery procedure: A new approach to 100 | simultaneous significance testing. Journal of the Royal Statistical 101 | Society, Series B, 69: 347-368. 102 | 103 | Storey JD, Dai JY, and Leek JT. (2007) The optimal discovery procedure for 104 | large-scale significance testing, with applications to comparative 105 | microarray experiments. Biostatistics, 8: 414-432. 106 | 107 | Woo S, Leek JT, Storey JD (2010) A computationally efficient modular 108 | optimal discovery procedure. Bioinformatics, 27(4): 509-515. 109 | } 110 | \seealso{ 111 | \code{\link{kl_clust}}, \code{\link{build_models}} and 112 | \code{\linkS4class{deSet}} 113 | } 114 | 115 | -------------------------------------------------------------------------------- /man/qvalueObj.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R, R/setMethods.R 3 | \docType{methods} 4 | \name{qvalueObj} 5 | \alias{qvalueObj} 6 | \alias{qvalueObj,deSet-method} 7 | \alias{qvalueObj<-} 8 | \alias{qvalueObj<-,deSet-method} 9 | \title{Access/set qvalue slot} 10 | \usage{ 11 | qvalueObj(object) 12 | 13 | qvalueObj(object) <- value 14 | 15 | \S4method{qvalueObj}{deSet}(object) 16 | 17 | \S4method{qvalueObj}{deSet}(object) <- value 18 | } 19 | \arguments{ 20 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}} 21 | 22 | \item{value}{S3 \code{object}: \code{\link{qvalue}}} 23 | } 24 | \value{ 25 | \code{qvalueObj} returns a \code{\link{qvalue}} object. 26 | } 27 | \description{ 28 | These generic functions access and set the \code{qvalue} object in the 29 | \code{\linkS4class{deSet}} object. 30 | } 31 | \examples{ 32 | # import data 33 | library(splines) 34 | library(qvalue) 35 | data(kidney) 36 | age <- kidney$age 37 | sex <- kidney$sex 38 | kidexpr <- kidney$kidexpr 39 | cov <- data.frame(sex = sex, age = age) 40 | 41 | # create models 42 | null_model <- ~sex 43 | full_model <- ~sex + ns(age, df = 4) 44 | 45 | # create deSet object from data 46 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 47 | full.model = full_model) 48 | 49 | # run the odp method 50 | de_odp <- odp(de_obj, bs.its = 20) 51 | 52 | # extract out significance results 53 | qval_obj <- qvalueObj(de_odp) 54 | 55 | # run qvalue and assign it to deSet slot 56 | pvals <- qval_obj$pvalues 57 | qval_new <- qvalue(pvals, pfdr = TRUE, fdr.level = 0.1) 58 | qvalueObj(de_odp) <- qval_new 59 | } 60 | \author{ 61 | John Storey, Andrew Bass 62 | } 63 | \seealso{ 64 | \code{\link{lrt}}, \code{\link{odp}} and 65 | \code{\linkS4class{deSet}} 66 | } 67 | 68 | -------------------------------------------------------------------------------- /man/resFull.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R 3 | \docType{methods} 4 | \name{resFull} 5 | \alias{resFull} 6 | \alias{resFull,deFit-method} 7 | \title{Residuals of full model fit} 8 | \usage{ 9 | resFull(object) 10 | 11 | \S4method{resFull}{deFit}(object) 12 | } 13 | \arguments{ 14 | \item{object}{\code{S4 object}: \code{\linkS4class{deFit}}} 15 | } 16 | \value{ 17 | \code{resFull} returns a matrix of residuals from full model. 18 | } 19 | \description{ 20 | Access the fitted full model residuals in an \code{\linkS4class{deFit}} 21 | object. 22 | } 23 | \examples{ 24 | # import data 25 | library(splines) 26 | data(kidney) 27 | age <- kidney$age 28 | sex <- kidney$sex 29 | kidexpr <- kidney$kidexpr 30 | cov <- data.frame(sex = sex, age = age) 31 | 32 | # create models 33 | null_model <- ~sex 34 | full_model <- ~sex + ns(age, df = 4) 35 | 36 | # create deSet object from data 37 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 38 | full.model = full_model) 39 | 40 | # run fit_models to get model fits 41 | de_fit <- fit_models(de_obj) 42 | 43 | # extract out the full residuals from the model fit 44 | res_full <- resFull(de_fit) 45 | } 46 | \author{ 47 | John Storey, Andrew Bass 48 | } 49 | \seealso{ 50 | \code{\link{fit_models}} 51 | } 52 | 53 | -------------------------------------------------------------------------------- /man/resNull.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R 3 | \docType{methods} 4 | \name{resNull} 5 | \alias{resNull} 6 | \alias{resNull,deFit-method} 7 | \title{Residuals of null model fit} 8 | \usage{ 9 | resNull(object) 10 | 11 | \S4method{resNull}{deFit}(object) 12 | } 13 | \arguments{ 14 | \item{object}{\code{S4 object}: \code{\linkS4class{deFit}}} 15 | } 16 | \value{ 17 | \code{resNull} returns a matrix of residuals from null model. 18 | } 19 | \description{ 20 | Access the fitted null model residuals in an \code{\linkS4class{deFit}} 21 | object. 22 | } 23 | \examples{ 24 | # import data 25 | library(splines) 26 | data(kidney) 27 | age <- kidney$age 28 | sex <- kidney$sex 29 | kidexpr <- kidney$kidexpr 30 | cov <- data.frame(sex = sex, age = age) 31 | 32 | # create models 33 | null_model <- ~sex 34 | full_model <- ~sex + ns(age, df = 4) 35 | 36 | # create deSet object from data 37 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 38 | full.model = full_model) 39 | 40 | # run fit_models to get model fits 41 | de_fit <- fit_models(de_obj) 42 | 43 | # extract out the null residuals from the model fits 44 | res_null <- resNull(de_fit) 45 | } 46 | \author{ 47 | John Storey, Andrew Bass 48 | } 49 | \seealso{ 50 | \code{\link{fit_models}} 51 | } 52 | 53 | -------------------------------------------------------------------------------- /man/sType.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R 3 | \docType{methods} 4 | \name{sType} 5 | \alias{sType} 6 | \alias{sType,deFit-method} 7 | \title{Statistic type used in analysis} 8 | \usage{ 9 | sType(object) 10 | 11 | \S4method{sType}{deFit}(object) 12 | } 13 | \arguments{ 14 | \item{object}{\code{S4 object}: \code{\linkS4class{deFit}}} 15 | } 16 | \value{ 17 | \code{sType} returns the statistic type- either "odp" or "lrt". 18 | } 19 | \description{ 20 | Access the statistic type in a \code{\linkS4class{deFit}} object. Can 21 | either be the optimal discovery procedure (odp) or the likelihood ratio 22 | test (lrt). 23 | } 24 | \examples{ 25 | # import data 26 | library(splines) 27 | data(kidney) 28 | age <- kidney$age 29 | sex <- kidney$sex 30 | kidexpr <- kidney$kidexpr 31 | cov <- data.frame(sex = sex, age = age) 32 | 33 | # create models 34 | null_model <- ~sex 35 | full_model <- ~sex + ns(age, df = 4) 36 | 37 | # create deSet object from data 38 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 39 | full.model = full_model) 40 | 41 | # run fit_models to get model fits 42 | de_fit <- fit_models(de_obj) 43 | 44 | # extract the statistic type of model fits 45 | stat_type <- sType(de_fit) 46 | } 47 | \author{ 48 | John Storey, Andrew Bass 49 | } 50 | \seealso{ 51 | \code{\link{fit_models}}, \code{\linkS4class{deFit}} and 52 | \code{\linkS4class{deSet}} 53 | } 54 | \keyword{sType} 55 | 56 | -------------------------------------------------------------------------------- /man/show.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/deFit-methods.R, R/deSet-methods.R 3 | \docType{methods} 4 | \name{show} 5 | \alias{show} 6 | \alias{show,deFit-method} 7 | \alias{show,deSet-method} 8 | \title{Show function for deFit and deSet} 9 | \usage{ 10 | show(object) 11 | 12 | \S4method{show}{deFit}(object) 13 | 14 | \S4method{show}{deSet}(object) 15 | } 16 | \arguments{ 17 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}} 18 | 19 | \item{\dots}{additional parameters} 20 | } 21 | \value{ 22 | Nothing of interest 23 | } 24 | \description{ 25 | Show function for \code{\linkS4class{deFit}} and \code{\linkS4class{deSet}} 26 | objects. 27 | } 28 | \examples{ 29 | # import data 30 | library(splines) 31 | data(kidney) 32 | age <- kidney$age 33 | sex <- kidney$sex 34 | kidexpr <- kidney$kidexpr 35 | cov <- data.frame(sex = sex, age = age) 36 | 37 | # create models 38 | null_model <- ~sex 39 | full_model <- ~sex + ns(age, df = 4) 40 | 41 | # create deSet object from data 42 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 43 | full.model = full_model) 44 | 45 | # get summary 46 | summary(de_obj) 47 | 48 | # run odp and summarize 49 | de_odp <- odp(de_obj, bs.its= 20) 50 | de_odp 51 | } 52 | \author{ 53 | John Storey, Andrew Bass 54 | } 55 | 56 | -------------------------------------------------------------------------------- /man/summary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/AllGenerics.R, R/deFit-methods.R, R/deSet-methods.R 3 | \docType{methods} 4 | \name{summary} 5 | \alias{summary} 6 | \alias{summary,deFit-method} 7 | \alias{summary,deSet-method} 8 | \title{Summary of deFit and deSet} 9 | \usage{ 10 | summary(object, ...) 11 | 12 | \S4method{summary}{deFit}(object) 13 | 14 | \S4method{summary}{deSet}(object, ...) 15 | } 16 | \arguments{ 17 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}} 18 | 19 | \item{\dots}{additional parameters} 20 | } 21 | \value{ 22 | Summary of \code{\linkS4class{deSet}} object 23 | } 24 | \description{ 25 | Summary of \code{\linkS4class{deFit}} and \code{\linkS4class{deSet}} objects. 26 | } 27 | \examples{ 28 | # import data 29 | library(splines) 30 | data(kidney) 31 | age <- kidney$age 32 | sex <- kidney$sex 33 | kidexpr <- kidney$kidexpr 34 | cov <- data.frame(sex = sex, age = age) 35 | 36 | # create models 37 | null_model <- ~sex 38 | full_model <- ~sex + ns(age, df = 4) 39 | 40 | # create deSet object from data 41 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, 42 | full.model = full_model) 43 | 44 | # get summary 45 | summary(de_obj) 46 | 47 | # run odp and summarize 48 | de_odp <- odp(de_obj, bs.its= 20) 49 | summary(de_odp) 50 | } 51 | \author{ 52 | John Storey, Andrew Bass 53 | } 54 | \keyword{summary} 55 | 56 | -------------------------------------------------------------------------------- /src/edge-init.c: -------------------------------------------------------------------------------- 1 | #include "edgeKLODP.h" 2 | #include 3 | 4 | static R_NativePrimitiveArgType odpScoreCluster_t[] = { 5 | REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, REALSXP 6 | }; 7 | 8 | static R_NativePrimitiveArgType kldistance_t[] = { 9 | REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, INTSXP, REALSXP 10 | }; 11 | 12 | R_CMethodDef cMethods[] = { 13 | {"odpScoreCluster", (DL_FUNC) &odpScoreCluster, 9, odpScoreCluster_t}, 14 | {"kldistance", (DL_FUNC) &kldistance, 8, kldistance_t}, 15 | {NULL, NULL, 0} 16 | 17 | }; 18 | 19 | void R_init_edge(DllInfo *info) { 20 | R_registerRoutines(info, cMethods, NULL, NULL, NULL); 21 | } -------------------------------------------------------------------------------- /src/edgeKLODP.c: -------------------------------------------------------------------------------- 1 | #include "edgeKLODP.h" 2 | 3 | /******************************************************************************************** 4 | functions for KLODP: 5 | odpScoreCluster: compute sum of normal densities to be used as numerator or denominator in score 6 | with c.member. 7 | ********************************************************************************************/ 8 | 9 | void odpScoreCluster(double *sumDat, double *mu, double *sigma, int *m, int *n, int *p, int *null, int *cluster, double *scr) { 10 | int i, j, g; 11 | double *first, *middle; 12 | 13 | /* if alternative component, set up a couple of vectors */ 14 | /* allocate memory */ 15 | first = vector(0, *m - 1); 16 | 17 | /* initialize to zero */ 18 | for(i = 0; i < *m; i++) 19 | first[i] = 0.0; 20 | 21 | if(*null == 0) { 22 | /* allocate memory */ 23 | middle = vector(0, *p - 1); 24 | 25 | /* initialize to zero */ 26 | for(i = 0; i < *p; i++) { 27 | middle[i] = 0.0; 28 | } 29 | } 30 | 31 | for(i = 0; i < *m; i++) { 32 | for(j=0; j< *n ; j++){ 33 | first[i] += sumDat[j + i * *n]*sumDat[j + i * *n]; 34 | } 35 | } 36 | 37 | for(i = 0; i < *m; i++) { 38 | scr[i] = 0.0; 39 | 40 | for(g = 0; g < *p; g++) { /* g scans genes */ 41 | /* alternative component */ 42 | if(*null == 0) { 43 | /* middle[j] += 2 * sumDat[i + (l + 1) * *m] * mu[g + l * *m];*/ 44 | for(j=0; j< *n ; j++){ 45 | middle[g] += 2 * sumDat[j + i * *n]*sumDat[j + g * *n + *n * *m]; 46 | } 47 | /*last[g] += nGrp[l] * mu[g + l * *m] * mu[g + l * *m];*/ 48 | scr[i] += pow(1 / sigma[g], *n) * exp(-0.5 / sigma[g] / sigma[g] * (first[i] - middle[g] + mu[g])) * cluster[g]; 49 | } else /* null component */ 50 | scr[i] += pow(1 / sigma[g], *n) * exp(-0.5 / sigma[g] / sigma[g] * first[i]) * cluster[g]; 51 | } 52 | /* reset vectors to zero, if necessary */ 53 | if(*null == 0) { 54 | for(g = 0; g < *p; g++) { 55 | middle[g] = 0.0; 56 | } 57 | } 58 | 59 | } 60 | 61 | /* free memory, if necessary */ 62 | free_vector(first, 0, *m - 1); 63 | 64 | if(*null == 0) { 65 | free_vector(middle, 0, *p - 1); 66 | } 67 | } 68 | 69 | void kldistance(double *centerFit, double *centerVar, double *fit, double *var, int *m, int *nc, int *n, double *kldd) { 70 | int i, j, l; 71 | double sum; 72 | 73 | for(i = 0; i < *m; i++) { 74 | for(j = 0; j < *nc; j++) { /* l scans clusters */ 75 | kldd[j + i* *nc] = 0.0; 76 | sum = 0.0; 77 | for(l=0; l< *n ; l++){ 78 | sum += pow((centerFit[l + j* *n]-fit[l + i* *n]),2); 79 | } 80 | kldd[j + i* *nc] = (sum * (1 / centerVar[j] + 1 / var[i]))/2 + *n * (centerVar[j] / var[i] + var[i] / centerVar[j])/2 - *n; 81 | } 82 | } 83 | } 84 | 85 | 86 | 87 | /* quicksort routine */ 88 | void sortQK(int low, int high, int n, double *w) { 89 | if(low < high) { 90 | int lo = low, hi = high + 1; 91 | double elem = w[low]; 92 | for (;;) { 93 | while ((lo < n) && (w[++lo] < elem)); 94 | while ((hi >= 0) && (w[--hi] > elem)); 95 | if (lo < hi) swapQK(lo, hi, w); 96 | else break; 97 | } 98 | 99 | swapQK(low, hi, w); 100 | sortQK(low, hi - 1, n, w); 101 | sortQK(hi + 1, high, n, w); 102 | } 103 | } 104 | 105 | /* swap function for use with sortQK() */ 106 | void swapQK(int i, int j, double *w) { 107 | double tmp = w[i]; 108 | 109 | w[i] = w[j]; 110 | w[j] = tmp; 111 | } 112 | 113 | /* allocate a int vector with subscript range v[nl...nh] */ 114 | int *ivector(int nl, int nh) { 115 | int *v; 116 | 117 | v = (int *) malloc((size_t)((nh - nl + 1 + NR_END) * sizeof(int))); 118 | if(!v) Rprintf("\n allocation failure in ivector()\n"); 119 | return v - nl + NR_END; 120 | } 121 | 122 | /* free a int vector allocated with ivector() */ 123 | void free_ivector(int *v, int nl, int nh) { 124 | free((FREE_ARG) (v + nl - NR_END)); 125 | } 126 | 127 | /* allocate a int matrix with subscript ranges m[nrl...nrh][ncl...nch] */ 128 | int **imatrix(int nrl, int nrh, int ncl, int nch) { 129 | int i, nrow = nrh - nrl + 1, ncol = nch - ncl + 1; 130 | int **m; 131 | 132 | /* allocate pointers to rows */ 133 | m = (int **) malloc((size_t)((nrow + NR_END) * sizeof(int*))); 134 | if(!m) Rprintf("%s", "allocation fialure\n"); 135 | 136 | m += NR_END; 137 | m -= nrl; 138 | 139 | /* set pointer to rows */ 140 | m[nrl] = (int *) malloc((size_t)((nrow * ncol + NR_END) * sizeof(int))); 141 | if(!m[nrl]) Rprintf("%s", "allocation fialure\n"); 142 | m[nrl] += NR_END; 143 | m[nrl] -= ncl; 144 | 145 | for(i = nrl + 1; i <= nrh; i++) m[i] = m[i - 1] + ncol; 146 | return m; 147 | } 148 | 149 | /* free int matrix allocated with imatrix() */ 150 | void free_imatrix(int **m, int nrl, int nrh, int ncl, int nch) { 151 | free((FREE_ARG) (m[nrl] + ncl - NR_END)); 152 | free((FREE_ARG) (m + nrl - NR_END)); 153 | } 154 | 155 | /* allocate a double matrix with subscript ranges m[nrl...nrh][ncl...nch] */ 156 | double **matrix(int nrl, int nrh, int ncl, int nch) { 157 | int i, nrow = nrh - nrl + 1, ncol = nch - ncl + 1; 158 | double **m; 159 | 160 | /* allocate pointers to rows */ 161 | m = (double **) malloc((size_t)((nrow + NR_END) * sizeof(double*))); 162 | if(!m) Rprintf("%s", "allocation fialure\n"); 163 | 164 | m += NR_END; 165 | m -= nrl; 166 | 167 | /* set pointer to rows */ 168 | m[nrl] = (double *) malloc((size_t)((nrow * ncol + NR_END) * sizeof(double))); 169 | if(!m[nrl]) Rprintf("%s", "allocation fialure\n"); 170 | m[nrl] += NR_END; 171 | m[nrl] -= ncl; 172 | 173 | for(i = nrl + 1; i <= nrh; i++) m[i] = m[i - 1] + ncol; 174 | return m; 175 | } 176 | 177 | /* free double matrix allocated with matrix() */ 178 | void free_matrix(double **m, int nrl, int nrh, int ncl, int nch) { 179 | free((FREE_ARG) (m[nrl] + ncl - NR_END)); 180 | free((FREE_ARG) (m + nrl - NR_END)); 181 | } 182 | 183 | /* allocate a double vector with subscript range v[nl...nh] */ 184 | double *vector(int nl, int nh) { 185 | double *v; 186 | 187 | v = (double *) malloc((size_t) ((nh - nl + 1 + NR_END) * sizeof(double))); 188 | if(!v) Rprintf("\n allocation failure in vector()\n"); 189 | return v - nl + NR_END; 190 | } 191 | 192 | /* free double vector allocated with vector() */ 193 | void free_vector(double *v, int nl, int nh) { 194 | free((FREE_ARG) (v + nl - NR_END)); 195 | } 196 | -------------------------------------------------------------------------------- /src/edgeKLODP.h: -------------------------------------------------------------------------------- 1 | #include /* --------- printf */ 2 | #include /* -------- malloc(), free(), etc. */ 3 | #include /* ---------- log(), exp(), etc. */ 4 | #include /* ------------- R functions */ 5 | #include 6 | 7 | #define NR_END 1 8 | #define FREE_ARG void* 9 | 10 | /*********************************************************************** 11 | EDGE-specific functions 12 | ***********************************************************************/ 13 | void odpScoreCluster(double *, double *, double *, int *, int *, int *, int *, int *, double *); 14 | void kldistance(double *, double *, double *, double *, int *, int *, int *, double *); 15 | 16 | /*********************************************************************** 17 | utility functions 18 | ***********************************************************************/ 19 | void sortQK(int, int, int, double *); 20 | void swapQK(int, int, double *); 21 | double *vector(int, int); 22 | void free_vector(double *, int, int); 23 | int *ivector(int, int); 24 | void free_ivector(int *, int, int); 25 | double **matrix(int, int, int, int); 26 | void free_matrix(double **, int, int, int, int); 27 | int **imatrix(int, int, int, int); 28 | void free_imatrix(int **, int, int, int, int); 29 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(edge) 3 | 4 | test_check("edge") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-edgeSet.R: -------------------------------------------------------------------------------- 1 | library(edge) 2 | context("deSet object methods") 3 | 4 | # create data composed of noise ----------------------------------------------- 5 | dat_noise <- matrix(rnorm(3000), ncol = 10) 6 | cov <- data.frame(grp = c(rep(1, 5), rep(0, 5))) 7 | 8 | # make deSet object --------------------------------------------------------- 9 | de_obj <- build_models(dat_noise, cov = cov, full.model = ~1 + grp, 10 | null.model = ~1) 11 | 12 | de_obj <- lrt(de_obj) 13 | 14 | test_that("get methods", { 15 | expect_equal(fullModel(de_obj), ~1 + grp) 16 | expect_equal(nullModel(de_obj), ~1) 17 | expect_equal(individual(de_obj), factor()) 18 | 19 | expect_equal(class(qvalueObj(de_obj)), "qvalue") 20 | }) 21 | 22 | cov$new_grp <- 1:10 23 | pData(de_obj) <- cov 24 | nullModel(de_obj) <- ~1 + new_grp 25 | fullModel(de_obj) <- ~1 + grp + new_grp 26 | mat_full <- model.matrix(~1 + grp + new_grp, cov) 27 | mat_null <- model.matrix(~1 + new_grp, cov) 28 | individual(de_obj) <- as.factor(1:10) 29 | 30 | test_that("set methods", { 31 | expect_equal(fullModel(de_obj), ~1 + grp + new_grp) 32 | expect_equal(fullMatrix(de_obj), mat_full) 33 | expect_error(fullModel(de_obj) <- ~1 + DNE) 34 | 35 | expect_equal(nullModel(de_obj), ~1 + new_grp) 36 | expect_equal(nullMatrix(de_obj), mat_null) 37 | expect_error(nullModel(de_obj) <- ~1 + DNE) 38 | 39 | expect_equal(individual(de_obj), as.factor(1:10)) 40 | expect_error(individual(de_obj) <- 1:10) 41 | 42 | expect_error(qvalueObj(de_obj) <- 1:10) 43 | }) 44 | 45 | -------------------------------------------------------------------------------- /tests/testthat/test-modelCreation.R: -------------------------------------------------------------------------------- 1 | library(edge) 2 | library(splines) 3 | context("Model creation: deSet/build_models/build_study") 4 | 5 | ngenes <- 100 6 | nobs <- 20 7 | # create data composed of noise ----------------------------------------------- 8 | dat_noise <- matrix(rnorm(ngenes*nobs), ncol = nobs) 9 | cov <- data.frame(grp = c(rep(1, nobs/2), rep(0, nobs/2))) 10 | 11 | # edgeModel ------------------------------------------------------------------- 12 | de_objM <- build_models(dat_noise, cov = cov, full.model = ~1 + grp, null.model = ~1) 13 | de_objMi <- build_models(dat_noise, cov = cov, full.model = ~1 + grp, 14 | null.model = ~1, ind = factor(1:20)) 15 | 16 | # edgeStudy ------------------------------------------------------------------- 17 | de_objS <- build_study(dat_noise, grp = as.factor(cov$grp), sampling = "static") 18 | de_objSi <- build_study(dat_noise, grp = as.factor(cov$grp), sampling = "static", 19 | ind = factor(1:20)) 20 | adj <- rnorm(20) 21 | tme <- rnorm(20) 22 | cov$adj <- adj 23 | cov$tme <- tme 24 | de_objSit <- build_study(dat_noise, grp = as.factor(cov$grp), 25 | adj.var = adj, sampling = "timecourse", 26 | ind = factor(1:20), tme = tme) 27 | 28 | # deSet --------------------------------------------------------------------- 29 | exp_set <- ExpressionSet(assayData = dat_noise, 30 | phenoData = as(cov, "AnnotatedDataFrame")) 31 | de_objE <- deSet(exp_set, full.model = ~1 + grp, null.model = ~1) 32 | de_objEi <- deSet(exp_set, full.model = ~1 + grp, null.model = ~1, 33 | ind = factor(1:20)) 34 | 35 | 36 | test_that("build_models method", { 37 | expect_equal(fullModel(de_objM), ~1 + grp) 38 | expect_equal(nullModel(de_objM), ~1) 39 | 40 | expect_equivalent(fullMatrix(de_objM), model.matrix(~1 + grp, cov)) 41 | 42 | expect_equivalent(exprs(de_objM), dat_noise) 43 | 44 | expect_equivalent(individual(de_objMi), factor(1:20)) 45 | 46 | }) 47 | 48 | test_that("build_study method", { 49 | expect_equal(fullModel(de_objS), ~grp) 50 | expect_equal(nullModel(de_objS), ~1) 51 | 52 | expect_equivalent(fullMatrix(de_objS), model.matrix(~1 + grp, cov)) 53 | 54 | expect_equivalent(exprs(de_objS), dat_noise) 55 | 56 | expect_equivalent(individual(de_objSi), factor(1:20)) 57 | 58 | expect_equivalent(fullMatrix(de_objSit), model.matrix(~adj + grp + ns(tme, df=2, intercept=FALSE) + grp:ns(tme, df = 2, intercept=FALSE), cov)) 59 | 60 | }) 61 | 62 | test_that("deSet method", { 63 | expect_equal(fullModel(de_objE), ~1 + grp) 64 | expect_equal(nullModel(de_objE), ~1) 65 | 66 | expect_equivalent(fullMatrix(de_objE), model.matrix(~1 + grp, cov)) 67 | 68 | expect_equivalent(exprs(de_objE), dat_noise) 69 | 70 | expect_equivalent(individual(de_objEi), factor(1:20)) 71 | 72 | }) 73 | -------------------------------------------------------------------------------- /vignettes/edgecomp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StoreyLab/edge/5f973def65bc536b90d46b78e0a0ef849a81caa3/vignettes/edgecomp.pdf -------------------------------------------------------------------------------- /vignettes/edgerefs.bib: -------------------------------------------------------------------------------- 1 | 2 | @article{Storey:2002fc, 3 | Abstract = {Multiple-hypothesis testing involves guarding against much more complicated errors than single-hypothesis testing. Whereas we typically control the type I error rate for a single-hypothesis test, a compound error rate is controlled for multiple-hypothesis tests. For example, controlling the false discovery rate FDR traditionally involves intricate sequential p-value rejection methods based on the observed data. Whereas a sequential p-value method fixes the error rate and estimates its corresponding rejection region, we propose the opposite approach-we fix the rejection region and then estimate its corresponding error rate. This new approach offers increased applicability, accuracy and power. We apply the methodology to both the positive false discovery rate pFDR and FDR, and provide evidence for its benefits. It is shown that pFDR is probably the quantity of interest over FDR. Also discussed is the calculation of the q-value, the pFDR analogue of the p-value, which eliminates the need to set the error rate beforehand as is traditionally done. Some simple numerical examples are presented that show that this new approach can yield an increase of over eight times in power compared with the Benjamini-Hochberg FDR method.}, 4 | Address = {108 COWLEY RD, OXFORD OX4 1JF, OXON, ENGLAND}, 5 | Author = {Storey, JD}, 6 | Date-Added = {2011-10-30 22:26:25 -0400}, 7 | Date-Modified = {2011-10-30 22:26:25 -0400}, 8 | Isi = {000177425500009}, 9 | Isi-Recid = {126051257}, 10 | Isi-Ref-Recids = {112504863 90155838 115373815 122784094 87253760 119531800 126051258 126051259 119668320 112504865}, 11 | Journal = {Journal of the Royal Statistical Society Series B-Statistical Methodology}, 12 | Keywords = {false discovery rate; multiple comparisons; positive false discovery rate; p-values; q-values; sequential p-value methods; simultaneous inference}, 13 | Pages = {479--498}, 14 | Publisher = {BLACKWELL PUBL LTD}, 15 | Times-Cited = {1132}, 16 | Title = {A direct approach to false discovery rates}, 17 | Volume = {64}, 18 | Year = {2002}, 19 | } 20 | 21 | 22 | @article{Storey:2003il, 23 | Abstract = {With the increase in genomewide experiments and the sequencing of multiple genomes, the analysis of large data sets has become commonplace in biology. It is often the case that thousands of features in a genomewide data set are tested against some null hypothesis, where a number of features are expected to be significant. Here we propose an approach to measuring statistical significance in these genomewide studies based on the concept of the false discovery rate. This approach offers a sensible balance between the number of true and false positives that is automatically calibrated and easily interpreted. In doing so, a measure of statistical significance called the q value is associated with each tested feature. The q value is similar to the well known p value, except it is a measure of significance in terms of the false discovery rate rather than the false positive rate. Our approach avoids a flood of false positive results, while offering a more liberal criterion than what has been used in genome scans for linkage.}, 24 | Author = {Storey, John D and Tibshirani, Robert}, 25 | Date-Added = {2011-10-30 22:16:49 -0400}, 26 | Date-Modified = {2011-10-30 22:16:49 -0400}, 27 | Doi = {10.1073/pnas.1530509100}, 28 | Journal = {Proc Natl Acad Sci U S A}, 29 | Journal-Full = {Proceedings of the National Academy of Sciences of the United States of America}, 30 | Mesh = {Algorithms; Alternative Splicing; Animals; Binding Sites; Exons; Gene Expression Regulation; Genetic Linkage; Genetic Techniques; Genome; Humans; Oligonucleotide Array Sequence Analysis; Statistics as Topic; Transcription, Genetic}, 31 | Month = {Aug}, 32 | Number = {16}, 33 | Pages = {9440-5}, 34 | Pmc = {PMC170937}, 35 | Pmid = {12883005}, 36 | Pst = {ppublish}, 37 | Title = {Statistical significance for genomewide studies}, 38 | Volume = {100}, 39 | Year = {2003}, 40 | } 41 | 42 | 43 | @article{woo:leek:storey:2011, 44 | author = {Woo, Sangsoon and Leek, Jeffrey T. and Storey, John D.}, 45 | title = {A computationally efficient modular optimal discovery procedure}, 46 | volume = {27}, 47 | number = {4}, 48 | pages = {509-515}, 49 | year = {2011}, 50 | doi = {10.1093/bioinformatics/btq701}, 51 | abstract ={Motivation: It is well known that patterns of differential gene expression across biological conditions are often shared by many genes, particularly those within functional groups. Taking advantage of these patterns can lead to increased statistical power and biological clarity when testing for differential expression in a microarray experiment. The optimal discovery procedure (ODP), which maximizes the expected number of true positives for each fixed number of expected false positives, is a framework aimed at this goal. Storey et al. introduced an estimator of the ODP for identifying differentially expressed genes. However, their ODP estimator grows quadratically in computational time with respect to the number of genes. Reducing this computational burden is a key step in making the ODP practical for usage in a variety of high-throughput problems.Results: Here, we propose a new estimate of the ODP called the modular ODP (mODP). The existing ‘full ODP’ requires that the likelihood function for each gene be evaluated according to the parameter estimates for all genes. The mODP assigns genes to modules according to a Kullback–Leibler distance, and then evaluates the statistic only at the module-averaged parameter estimates. We show that the mODP is relatively insensitive to the choice of the number of modules, but dramatically reduces the computational complexity from quadratic to linear in the number of genes. We compare the full ODP algorithm and mODP on simulated data and gene expression data from a recent study of Morrocan Amazighs. The mODP and full ODP algorithm perform very similarly across a range of comparisons.Availability: The mODP methodology has been implemented into EDGE, a comprehensive gene expression analysis software package in R, available at http://genomine.org/edge/.Contact: jstorey@princeton.eduSupplementary information: Supplementary data are available at Bioinformatics online.}, 52 | URL = {http://bioinformatics.oxfordjournals.org/content/27/4/509.abstract}, 53 | eprint = {http://bioinformatics.oxfordjournals.org/content/27/4/509.full.pdf+html}, 54 | journal = {Bioinformatics} 55 | } 56 | 57 | @article {storey:2007, 58 | author = {Storey, John D.}, 59 | title = {The optimal discovery procedure: a new approach to simultaneous significance testing}, 60 | journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, 61 | volume = {69}, 62 | number = {3}, 63 | publisher = {Blackwell Publishing Ltd}, 64 | issn = {1467-9868}, 65 | url = {http://dx.doi.org/10.1111/j.1467-9868.2007.005592.x}, 66 | doi = {10.1111/j.1467-9868.2007.005592.x}, 67 | pages = {347--368}, 68 | keywords = {Classification, False discovery rate, Multiple-hypothesis testing, Optimal discovery procedure, q-value, Single-thresholding procedure}, 69 | year = {2007}, 70 | } 71 | 72 | @article {gibson:2008, 73 | author = {Idaghdour, Y and Storey, JD and Jadallah, SJ and Gibson, G }, 74 | title = {A Genome-Wide Gene Expression Signature of Environmental Geography in Leukocytes of Moroccan Amazighs}, 75 | journal = {PLoS Genetics}, 76 | volume = {4}, 77 | doi = {10.1371/journal.pgen.1000052} 78 | } 79 | 80 | @article{storey:etal:2007, 81 | author = {Storey, John D. and Dai, James Y. and Leek, Jeffrey T.}, 82 | title = {The optimal discovery procedure for large-scale significance testing, with applications to comparative microarray experiments}, 83 | volume = {8}, 84 | number = {2}, 85 | pages = {414-432}, 86 | year = {2007}, 87 | doi = {10.1093/biostatistics/kxl019}, 88 | abstract ={As much of the focus of genetics and molecular biology has shifted toward the systems level, it has become increasingly important to accurately extract biologically relevant signal from thousands of related measurements. The common property among these high-dimensional biological studies is that the measured features have a rich and largely unknown underlying structure. One example of much recent interest is identifying differentially expressed genes in comparative microarray experiments. We propose a new approach aimed at optimally performing many hypothesis tests in a high-dimensional study. This approach estimates the optimal discovery procedure (ODP), which has recently been introduced and theoretically shown to optimally perform multiple significance tests. Whereas existing procedures essentially use data from only one feature at a time, the ODP approach uses the relevant information from the entire data set when testing each feature. In particular, we propose a generally applicable estimate of the ODP for identifying differentially expressed genes in microarray experiments. This microarray method consistently shows favorable performance over five highly used existing methods. For example, in testing for differential expression between two breast cancer tumor types, the ODP provides increases from 72% to 185% in the number of genes called significant at a false discovery rate of 3%. Our proposed microarray method is freely available to academic users in the open-source, point-and-click EDGE software package.}, 89 | URL = {http://biostatistics.oxfordjournals.org/content/8/2/414.abstract}, 90 | eprint = {http://biostatistics.oxfordjournals.org/content/8/2/414.full.pdf+html}, 91 | journal = {Biostatistics} 92 | } 93 | 94 | 95 | 96 | @Article{storey:2005, 97 | author = "Storey, John D. and Xiao, Wenzhong and Leek, Jeffrey T. and Tompkins, Ronald G. and Davis, Ronald W.", 98 | title = "Significance analysis of time course microarray experiments", 99 | volume = "102", 100 | number = "36", 101 | pages = "12837-12842", 102 | year = "2005", 103 | doi = "10.1073/pnas.0504609102", 104 | abstract ="Characterizing the genome-wide dynamic regulation of gene expression is important and will be of much interest in the future. However, there is currently no established method for identifying differentially expressed genes in a time course study. Here we propose a significance method for analyzing time course microarray studies that can be applied to the typical types of comparisons and sampling schemes. This method is applied to two studies on humans. In one study, genes are identified that show differential expression over time in response to in vivo endotoxin administration. By using our method, 7,409 genes are called significant at a 1% false-discovery rate level, whereas several existing approaches fail to identify any genes. In another study, 417 genes are identified at a 10% false-discovery rate level that show expression changing with age in the kidney cortex. Here it is also shown that as many as 47% of the genes change with age in a manner more complex than simple exponential growth or decay. The methodology proposed here has been implemented in the freely distributed and open-source edge software package.", 105 | URL = "http://www.pnas.org/content/102/36/12837.abstract", 106 | eprint = "http://www.pnas.org/content/102/36/12837.full.pdf+html", 107 | journal = "Proceedings of the National Academy of Sciences of the United States of America" 108 | } 109 | 110 | 111 | @article{leek2005, 112 | author = {Leek, Jeffrey T. and Monsen, Eva and Dabney, Alan R. and Storey, John D.}, 113 | title = {EDGE: extraction and analysis of differential gene expression}, 114 | volume = {22}, 115 | number = {4}, 116 | pages = {507-508}, 117 | year = {2006}, 118 | doi = {10.1093/bioinformatics/btk005}, 119 | abstract ={Summary: EDGE (Extraction of Differential Gene Expression) is an open source, point-and-click software program for the significance analysis of DNA microarray experiments. EDGE can perform both standard and time course differential expression analysis. The functions are based on newly developed statistical theory and methods. This document introduces the EDGE software package.Availability: EDGE is freely available for non-commercial users. EDGE can be downloaded for Windows, Macintosh and Linux/UNIX from http://faculty.washington.edu/jstorey/edgeContact: jtleek@u.washington.edu}, 120 | URL = {http://bioinformatics.oxfordjournals.org/content/22/4/507.abstract}, 121 | eprint = {http://bioinformatics.oxfordjournals.org/content/22/4/507.full.pdf+html}, 122 | journal = {Bioinformatics} 123 | } 124 | 125 | 126 | @article{hedenfalk:2001, 127 | author = {Hedenfalk, Ingrid and Duggan, David and Chen, Yidong and Radmacher, Michael and Bittner, Michael and Simon, Richard and Meltzer, Paul and Gusterson, Barry and Esteller, Manel and Raffeld, Mark and Yakhini, Zohar and Ben-Dor, Amir and Dougherty, Edward and Kononen, Juha and Bubendorf, Lukas and Fehrle, Wilfrid and Pittaluga, Stefania and Gruvberger, Sofia and Loman, Niklas and Johannsson, Oskar and Olsson, Håkan and Wilfond, Benjamin and Sauter, Guido and Kallioniemi, Olli-P. and Borg, Åke and Trent, Jeffrey}, 128 | title = {Gene-Expression Profiles in Hereditary Breast Cancer}, 129 | journal = {New England Journal of Medicine}, 130 | volume = {344}, 131 | number = {8}, 132 | pages = {539-548}, 133 | year = {2001}, 134 | doi = {10.1056/NEJM200102223440801}, 135 | note ={PMID: 11207349}, 136 | 137 | URL = { 138 | http://dx.doi.org/10.1056/NEJM200102223440801 139 | 140 | }, 141 | eprint = { 142 | http://dx.doi.org/10.1056/NEJM200102223440801 143 | 144 | } 145 | 146 | } 147 | 148 | 149 | @article{rodwell:2004, 150 | author = {Rodwell, Graham E. J AND Sonu, Rebecca AND Zahn, Jacob M AND Lund, James AND Wilhelmy, Julie AND Wang, Lingli AND Xiao, Wenzhong AND Mindrinos, Michael AND Crane, Emily AND Segal, Eran AND Myers, Bryan D AND Brooks, James D AND Davis, Ronald W AND Higgins, John AND Owen, Art B AND Kim, Stuart K}, 151 | journal = {PLoS Biol}, 152 | publisher = {Public Library of Science}, 153 | title = {A Transcriptional Profile of Aging in the Human Kidney}, 154 | year = {2004}, 155 | month = {11}, 156 | volume = {2}, 157 | pages = {e427}, 158 | number = {12}, 159 | doi = {10.1371/journal.pbio.0020427} 160 | } 161 | 162 | @Article{storey:2003, 163 | Author = "Storey, J. D.", 164 | Title = "The positive false discovery rate: A Bayesian interpretation and the q-value", 165 | Journal = "Annals of Statistics", 166 | Year = 2003, 167 | Volume = 31, 168 | Pages = "2013-2035", 169 | } 170 | 171 | @article{leek:2007, 172 | author = {Leek, Jeffrey T AND Storey, John D}, 173 | journal = {PLoS Genet}, 174 | publisher = {Public Library of Science}, 175 | title = {Capturing Heterogeneity in Gene Expression Studies by Surrogate Variable Analysis}, 176 | year = {2007}, 177 | month = {09}, 178 | volume = {3}, 179 | pages = {e161}, 180 | abstract = {Author Summary<p>In scientific and medical studies, great care must be taken when collecting data to understand the relationship between two variables, such as a drug and its effect on a disease. In any given study there will be many other variables at play, such as the effects of age and sex on the disease. We show that in studies where the expression levels of thousands of genes are measured at once, these issues become surprisingly critical. Due to the complexity of our genomes, environment, and demographic features, there are many sources of variation when analyzing gene expression levels. In any given study, it is impossible to measure every single variable that may be influencing how our genes are expressed. Despite this, we show that by considering all expression levels simultaneously, one can actually recover the effects of these important missed variables and essentially produce an analysis as if all relevant variables were included. As opposed to traditional studies, the massive amount of data available in this setting is what makes the method, called surrogate variable analysis, possible. We hypothesize that surrogate variable analysis will be useful in many large-scale gene expression studies.</p></sec>}, 181 | number = {9}, 182 | doi = {10.1371/journal.pgen.0030161} 183 | } 184 | 185 | 186 | @article{Leek:2008qf, 187 | Abstract = {We develop a general framework for performing large-scale significance testing in the presence of arbitrarily strong dependence. We derive a low-dimensional set of random vectors, called a dependence kernel, that fully captures the dependence structure in an observed high-dimensional dataset. This result shows a surprising reversal of the "curse of dimensionality" in the high-dimensional hypothesis testing setting. We show theoretically that conditioning on a dependence kernel is sufficient to render statistical tests independent regardless of the level of dependence in the observed data. This framework for multiple testing dependence has implications in a variety of common multiple testing problems, such as in gene expression studies, brain imaging, and spatial epidemiology.}, 188 | Author = {Leek, Jeffrey T and Storey, John D}, 189 | Date-Added = {2011-10-30 22:16:12 -0400}, 190 | Date-Modified = {2011-10-30 22:16:12 -0400}, 191 | Doi = {10.1073/pnas.0808709105}, 192 | Journal = {Proc Natl Acad Sci U S A}, 193 | Journal-Full = {Proceedings of the National Academy of Sciences of the United States of America}, 194 | Mesh = {Algorithms; Computer Simulation; Models, Statistical; Software; Statistics as Topic}, 195 | Month = {Dec}, 196 | Number = {48}, 197 | Pages = {18718-23}, 198 | Pmc = {PMC2586646}, 199 | Pmid = {19033188}, 200 | Pst = {ppublish}, 201 | Title = {A general framework for multiple testing dependence}, 202 | Volume = {105}, 203 | Year = {2008}, 204 | Bdsk-Url-1 = {http://dx.doi.org/10.1073/pnas.0808709105} 205 | } 206 | 207 | 208 | @article{mecham:2010, 209 | author = {Mecham, Brigham H. and Nelson, Peter S. and Storey, John D.}, 210 | title = {Supervised normalization of microarrays}, 211 | volume = {26}, 212 | number = {10}, 213 | pages = {1308-1315}, 214 | year = {2010}, 215 | doi = {10.1093/bioinformatics/btq118}, 216 | abstract ={Motivation: A major challenge in utilizing microarray technologies to measure nucleic acid abundances is ‘normalization’, the goal of which is to separate biologically meaningful signal from other confounding sources of signal, often due to unavoidable technical factors. It is intuitively clear that true biological signal and confounding factors need to be simultaneously considered when performing normalization. However, the most popular normalization approaches do not utilize what is known about the study, both in terms of the biological variables of interest and the known technical factors in the study, such as batch or array processing date.Results: We show here that failing to include all study-specific biological and technical variables when performing normalization leads to biased downstream analyses. We propose a general normalization framework that fits a study-specific model employing every known variable that is relevant to the expression study. The proposed method is generally applicable to the full range of existing probe designs, as well as to both single-channel and dual-channel arrays. We show through real and simulated examples that the method has favorable operating characteristics in comparison to some of the most highly used normalization methods.Availability: An R package called snm implementing the methodology will be made available from Bioconductor (http://bioconductor.org).Contact: jstorey@princeton.eduSupplementary information: Supplementary data are available at Bioinformatics online.}, 217 | URL = {http://bioinformatics.oxfordjournals.org/content/26/10/1308.abstract}, 218 | eprint = {http://bioinformatics.oxfordjournals.org/content/26/10/1308.full.pdf+html}, 219 | journal = {Bioinformatics} 220 | } 221 | 222 | @article{calvano:2005, 223 | author = {Calvano, SE and Xiao, W and Richards, DR and Felciano, RM and Baker, HV and Cho, RJ and Chen, RO and Brownstein, BH and Cobb, JP and Tschoeke, SK and Miller-Graziano, C and Moldawer, LL and Mindrinos, MN and Davis, RW and Tompkins, RG and Lowry, SF}, 224 | title = {A network-based analysis of systemic inflammation in humans}, 225 | volume = {437}, 226 | pages = {1032-1037}, 227 | year = {2005}, 228 | doi = {10.1038/nature03985}, 229 | URL = {http://www.nature.com/nature/journal/v437/n7061/full/nature03985.html}, 230 | journal = {Nature} 231 | } --------------------------------------------------------------------------------