├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── R
    ├── AllClasses.R
    ├── AllGenerics.R
    ├── ExpressionSet-methods.R
    ├── deFit-methods.R
    ├── deSet-methods.R
    ├── edge.R
    ├── form_models.R
    ├── getMethods.R
    ├── kl_clust-functions.R
    ├── lrt-functions.R
    ├── misc.R
    ├── odp-functions.R
    ├── setMethods.R
    └── wls.R
├── README.md
├── data
    ├── endotoxin.rda
    ├── gibson.rda
    └── kidney.rda
├── inst
    └── NEWS
├── man
    ├── apply_jackstraw.Rd
    ├── apply_qvalue.Rd
    ├── apply_snm.Rd
    ├── apply_sva.Rd
    ├── betaCoef.Rd
    ├── build_models.Rd
    ├── build_study.Rd
    ├── deFit-class.Rd
    ├── deSet-class.Rd
    ├── deSet.Rd
    ├── edge.Rd
    ├── endotoxin.Rd
    ├── fitFull.Rd
    ├── fitNull.Rd
    ├── fit_models.Rd
    ├── fullMatrix.Rd
    ├── fullModel.Rd
    ├── gibson.Rd
    ├── individual.Rd
    ├── kidney.Rd
    ├── kl_clust.Rd
    ├── lrt.Rd
    ├── nullMatrix.Rd
    ├── nullModel.Rd
    ├── odp.Rd
    ├── qvalueObj.Rd
    ├── resFull.Rd
    ├── resNull.Rd
    ├── sType.Rd
    ├── show.Rd
    └── summary.Rd
├── src
    ├── edge-init.c
    ├── edgeKLODP.c
    └── edgeKLODP.h
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-edgeSet.R
    │   └── test-modelCreation.R
└── vignettes
    ├── edge.Rnw
    ├── edgecomp.pdf
    └── edgerefs.bib


/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: edge
 2 | Type: Package
 3 | Title: Extraction of Differential Gene Expression
 4 | Date: 2015-04-15
 5 | Version: 2.5.3
 6 | Author: John D. Storey, Jeffrey T. Leek and Andrew J. Bass
 7 | Maintainer: John D. Storey <jstorey@princeton.edu>, Andrew J. Bass <ajbass@princeton.edu>
 8 | biocViews: MultipleComparison, DifferentialExpression, TimeCourse,
 9 |     Regression, GeneExpression, DataImport
10 | Description: The edge package implements methods for carrying out differential
11 |     expression analyses of genome-wide gene expression studies. Significance
12 |     testing using the optimal discovery procedure and generalized likelihood
13 |     ratio tests (equivalent to F-tests and t-tests) are implemented for general study
14 |     designs. Special functions are available to facilitate the analysis of
15 |     common study designs, including time course experiments. Other packages
16 |     such as snm, sva, and qvalue are integrated in edge to provide a wide range
17 |     of tools for gene expression analysis.
18 | VignetteBuilder: knitr
19 | Imports:
20 |     methods,
21 |     splines,
22 |     sva,
23 |     snm,
24 |     jackstraw,
25 |     qvalue(>= 1.99.0),
26 |     MASS
27 | Suggests:
28 |     testthat,
29 |     knitr,
30 |     ggplot2,
31 |     reshape2
32 | Depends:
33 |     R(>= 3.1.0),
34 |     Biobase
35 | URL: https://github.com/StoreyLab/edge
36 | BugReports: https://github.com/StoreyLab/edge/issues
37 | LazyData: true
38 | License: MIT + file LICENSE
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2005-2015
2 | COPYRIGHT HOLDER: John D. Storey
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2 (4.1.1): do not edit by hand
 2 | 
 3 | export("fullMatrix<-")
 4 | export("fullModel<-")
 5 | export("individual<-")
 6 | export("nullMatrix<-")
 7 | export("nullModel<-")
 8 | export("qvalueObj<-")
 9 | export(apply_jackstraw)
10 | export(apply_qvalue)
11 | export(apply_snm)
12 | export(apply_sva)
13 | export(betaCoef)
14 | export(build_models)
15 | export(build_study)
16 | export(deSet)
17 | export(fitFull)
18 | export(fitNull)
19 | export(fullMatrix)
20 | export(fullModel)
21 | export(individual)
22 | export(lrt)
23 | export(nullMatrix)
24 | export(odp)
25 | export(qvalueObj)
26 | export(resFull)
27 | export(resNull)
28 | export(show)
29 | export(summary)
30 | exportClasses(deFit)
31 | exportClasses(deSet)
32 | exportMethods(fit_models)
33 | exportMethods(kl_clust)
34 | exportMethods(nullModel)
35 | exportMethods(sType)
36 | import(Biobase)
37 | import(MASS)
38 | import(methods)
39 | import(qvalue)
40 | import(snm)
41 | import(splines)
42 | import(sva)
43 | import(jackstraw)
44 | useDynLib(edge,kldistance)
45 | useDynLib(edge,odpScoreCluster)
46 | 


--------------------------------------------------------------------------------
/R/AllClasses.R:
--------------------------------------------------------------------------------
  1 | # Allows to set qvalue to S4 slot
  2 | setOldClass("qvalue")
  3 | 
  4 | deSetCheck <- function(object) {
  5 |   errors <- character()
  6 |   epsilon <- 10e-8
  7 |   # Allow easy conversion for an ExpressionSet using function 'as'
  8 |   if (is.list(object@null.model) && is.list(object@full.model) &&
  9 |         length(object@individual) == 0) {
 10 |     return(TRUE)
 11 |   }
 12 |   # Name mismatch
 13 |   f.vars <- all.vars(object@full.model)
 14 |   n.vars <- all.vars(object@null.model)
 15 |   names <- unique(c(f.vars, n.vars))
 16 |   if (sum((f.vars %in% c("grp", "bio.var", "time.basis"))) == 0) {
 17 |     if (sum(!(names %in% varLabels(object))) != 0) {
 18 |       msg <- paste("naming mismatch between phenoData covariates and models.")
 19 |       errors <- c(errors, msg)
 20 |       return(errors)
 21 |     }
 22 |   }
 23 |   # Singular matrix
 24 |   xx0 <- model.matrix(object@null.model, data=object)
 25 |   xx1 <- model.matrix(object@full.model, data=object)
 26 |   #  sCheck.null <- min(svd(xx0)$d) < epsilon
 27 |   sCheck.full <- min(svd(xx1)$d) < epsilon
 28 |   #  if (sCheck.null) {
 29 |   #   msg <- paste("null model matrix is near singular.")
 30 |   #   errors <- c(errors, msg)
 31 |   #  }
 32 |   if (sCheck.full) {
 33 |     msg <- paste("full model matrix is near singular.")
 34 |     errors <- c(errors, msg)
 35 |   }
 36 |   # Dimensionality test- this may be impossible to make in deSet
 37 |   dataDim <- dim(exprs(object))
 38 |   if (dataDim[2] != nrow(xx1)) {
 39 |     msg <- paste( "dimension mismatch between full model and assayData.")
 40 |     errors <- c(errors, msg)
 41 |   }
 42 |   if (dataDim[2] != nrow(xx0)) {
 43 |     msg <- paste( "dimension mismatch between null model and assayData.")
 44 |     errors <- c(errors, msg)
 45 |   }
 46 |   # inidividual input size
 47 |   if (length(object@individual) != 0) {
 48 |     if (length(object@individual) != ncol(exprs(object))) {
 49 |       msg <- paste("individual must be the same length as the number of arrays")
 50 |       errors <- c(errors, msg)
 51 |     }
 52 |   }
 53 |   if (length(errors) == 0) {
 54 |     TRUE
 55 |   } else {
 56 |     errors
 57 |   }
 58 | }
 59 | 
 60 | deFitCheck <- function(object) {
 61 |   errors <- character()
 62 |   # Dimensionality test
 63 |   if (!(    (ncol(object@fit.full)==ncol(object@fit.null)
 64 |              && (ncol(object@res.full) == ncol(object@res.null))
 65 |              && (ncol(object@dH.full) == ncol(object@fit.full))
 66 |              && (ncol(object@fit.full) == ncol(object@res.null))))) {
 67 |     msg <- paste("column length of fitted matrices, dH.full and residuals",
 68 |                  "must be the same.")
 69 |     errors <- c(errors, msg)
 70 |   }
 71 |   if (!((nrow(object@fit.full) == nrow(object@fit.null))
 72 |         && (nrow(object@res.full) == nrow(object@res.null))
 73 |         && (nrow(object@res.full) == nrow(object@fit.full)))) {
 74 |     msg <- paste("row length of fitted matrices and residuals",
 75 |                  "must be the same.")
 76 |     errors <- c(errors, msg)
 77 |   }
 78 |   # Correct statistic input check
 79 |   if (!(object@stat.type %in% c("lrt", "odp"))) {
 80 |     msg <- paste("stat.type must be lrt or odp. Inputted stat.type: ",
 81 |                  object@stat.type)
 82 |     errors <- c(errors, msg)
 83 |   }
 84 |   if (length(errors) == 0) {
 85 |     TRUE
 86 |   } else {
 87 |     errors
 88 |   }
 89 | }
 90 | 
 91 | #' The differential expression class (deSet)
 92 | #'
 93 | #' The \code{deSet} class extends the \code{\link{ExpressionSet}} class.
 94 | #' While the \code{ExpressionSet} class contains information about the
 95 | #' experiment, the \code{deSet} class contains both experimental information and
 96 | #' additional information relevant for differential expression analysis, 
 97 | #' explained below in Slots.
 98 | #'
 99 | #' @slot null.model \code{formula}: contains the adjustment variables in the
100 | #' experiment. The null model is used for comparison when fitting the
101 | #' full model.
102 | #' @slot full.model \code{formula}: contains the adjustment variables and the
103 | #' biological variables of interest.
104 | #' @slot null.matrix \code{matrix}: the null model as a matrix.
105 | #' @slot full.matrix \code{matrix}: the full model as a matrix.
106 | #' @slot individual \code{factor}: contains information on which sample
107 | #' is from which individual in the experiment.
108 | #' @slot qvalueObj \code{S3 object}: containing \code{qvalue} object.
109 | #' See \code{\link{qvalue}} for additional details.
110 | #'
111 | #' @section Methods:
112 | #'  \describe{
113 | #'  \item{\code{as(ExpressionSet, "deSet")}}{Coerce objects of
114 | #'  \code{ExpressionSet} to \code{deSet}.}
115 | #'  \item{\code{lrt(deSet, ...)}}{Performs a generalized likelihood ratio test
116 | #'  using the full and null models.}
117 | #'  \item{\code{odp(deSet, ...)}}{Performs the optimal discovery procedure,
118 | #'  which is a new approach for optimally performing many hypothesis tests in
119 | #'  a high-dimensional study.}
120 | #'  \item{\code{kl_clust(deSet, ...)}}{An implementation of mODP that assigns
121 | #'  genes to modules based off of the Kullback-Leibler distance.}
122 | #'  \item{\code{fit_models(deSet, ...)}}{Fits a linear model to each gene by
123 | #'  method of least squares.}
124 | #'  \item{\code{apply_qvalue(deSet, ...)}}{Applies \code{\link{qvalue}}
125 | #'  function.}
126 | #'  \item{\code{apply_snm(deSet, ...)}}{Applies surpervised normalization of
127 | #'   microarrays (\code{\link{snm}}) on gene expression data.}
128 | #'  \item{\code{apply_sva(deSet, ...)}}{Applies surrogate variable analysis
129 | #'  (\code{\link{sva}}).}
130 | #'  \item{\code{fullMatrix(deSet)}}{Access and set full matrix.}
131 | #'  \item{\code{nullMatrix(deSet)}}{Access and set null matrix.}
132 | #'  \item{\code{fullModel(deSet)}}{Access and set full model.}
133 | #'  \item{\code{nullModel(deSet)}}{Access and set null model.}
134 | #'  \item{\code{individual(deSet)}}{Access and set individual slot.}
135 | #'  \item{\code{qvalueObj(deSet)}}{Access \code{qvalue} object.
136 | #'  See \code{\link{qvalue}}.}
137 | #'  \item{\code{validObject(deSet)}}{Check validity of \code{deSet} object.}
138 | #'  }
139 | #'
140 | #' @note
141 | #' See \code{\link{ExpressionSet}} for additional slot information.
142 | #'
143 | #' @author
144 | #' John Storey, Jeffrey Leek, Andrew Bass
145 | #' @inheritParams ExpressionSet
146 | #' @exportClass deSet
147 | setClass("deSet", slots=c(null.model = "formula",
148 |                           full.model = "formula",
149 |                           null.matrix = "matrix",
150 |                           full.matrix = "matrix",
151 |                           individual = "factor",
152 |                           qvalueObj = "qvalue"),
153 |          prototype=prototype(null.model = formula(NULL),
154 |                              full.model = formula(NULL),
155 |                              null.matrix = matrix(),
156 |                              full.matrix = matrix(),
157 |                              individual = as.factor(NULL),
158 |                              qvalueObj = structure(list(),
159 |                                                     class = "qvalue")),
160 |          validity = deSetCheck,
161 |          contains = c("ExpressionSet"))
162 | 
163 | #' The differential expression class for the model fits
164 | #'
165 | #' Object returned from \code{\link{fit_models}} containing information
166 | #' regarding the model fits for the experiment.
167 | #'
168 | #' @slot fit.full \code{matrix}: containing fitted values for the full model.
169 | #' @slot fit.null \code{matrix}: containing fitted values for the null model.
170 | #' @slot res.full \code{matrix}: the residuals of the full model.
171 | #' @slot res.null \code{matrix}: the residuals of the null model.
172 | #' @slot dH.full \code{vector}: contains diagonal elements in the projection
173 | #' matrix for the full model.
174 | #' @slot beta.coef \code{matrix}: fitted coefficients for the full model.
175 | #' @slot stat.type \code{string}: information on the statistic of interest.
176 | #' Currently, the only options are ``lrt'' and ``odp''.
177 | #'
178 | #' @section Methods:
179 | #'  \describe{
180 | #'  \item{\code{fitNull(deFit)}}{Access fitted data from null model.}
181 | #'  \item{\code{fitFull(deFit)}}{Access fitted data from full model.}
182 | #'  \item{\code{resNull(deFit)}}{Access residuals from null model fit.}
183 | #'  \item{\code{resFull(deFit)}}{Access residuals from full model fit.}
184 | #'  \item{\code{betaCoef(deFit)}}{Access beta coefficients in linear model.}
185 | #'  \item{\code{sType(deFit)}}{Access statistic type of model fitting utilized
186 | #'  in function.}
187 | #'  }
188 | #'
189 | #' @author
190 | #' John Storey, Jeffrey Leek, Andrew Bass
191 | #'
192 | #' @exportClass deFit
193 | setClass("deFit", slots=c(fit.full = "matrix",
194 |                           fit.null = "matrix",
195 |                           res.full = "matrix",
196 |                           res.null = "matrix",
197 |                           dH.full = "matrix",
198 |                           beta.coef = "matrix",
199 |                           stat.type = "character"),
200 |          validity = deFitCheck)
201 | 


--------------------------------------------------------------------------------
/R/AllGenerics.R:
--------------------------------------------------------------------------------
   1 | #' Performs F-test (likelihood ratio test using Normal likelihood)
   2 | #'
   3 | #' \code{lrt} performs a generalized likelihood ratio test using the full and
   4 | #' null models.
   5 | #'
   6 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}.
   7 | #' @param de.fit \code{S4 object}: \code{\linkS4class{deFit}}. Optional.
   8 | #' @param nullDistn \code{character}: either "normal" or "bootstrap", If
   9 | #' "normal" then the p-values are calculated using the F distribution. If
  10 | #' "bootstrap" then a bootstrap algorithm is implemented to simulate
  11 | #' statistics from the null distribution. In the "bootstrap" case, empirical
  12 | #' p-values are calculated using the observed and null statistics (see
  13 | #' \code{\link{empPvals}}). Default is "normal".
  14 | #' @param weights \code{matrix}: weights for each observation. Default is NULL.
  15 | #' @param bs.its \code{integer}: number of null statistics generated (only
  16 | #' applicable for "bootstrap" method). Default is 100.
  17 | #' @param seed \code{integer}: set the seed value. Default is NULL.
  18 | #' @param verbose \code{boolean}: print iterations for bootstrap method.
  19 | #' Default is TRUE.
  20 | #' @param mod.F \code{boolean}: Moderated F-test, recommended for experiments
  21 | #' with a small sample size. Default is FALSE.
  22 | #' @param ... Additional arguments for \code{\link{apply_qvalue}} and
  23 | #' \code{\link{empPvals}} function.
  24 | #'
  25 | #' @details \code{lrt} fits the full and null models to each gene using the
  26 | #' function \code{\link{fit_models}} and then performs a likelihood ratio test.
  27 | #' The user has the option to calculate p-values a Normal distribution
  28 | #' assumption or through a bootstrap algorithm. If \code{nullDistn} is
  29 | #' "bootstrap" then empirical p-values will be determined from the
  30 | #' \code{\link{qvalue}} package (see \code{\link{empPvals}}).
  31 | #'
  32 | #' @author John Storey, Andrew Bass
  33 | #'
  34 | #' @return \code{\linkS4class{deSet}} object
  35 | #'
  36 | #' @examples
  37 | #' # import data
  38 | #' library(splines)
  39 | #' data(kidney)
  40 | #' age <- kidney$age
  41 | #' sex <- kidney$sex
  42 | #' kidexpr <- kidney$kidexpr
  43 | #' cov <- data.frame(sex = sex, age = age)
  44 | #'
  45 | #' # create models
  46 | #' null_model <- ~sex
  47 | #' full_model <- ~sex + ns(age, df = 4)
  48 | #'
  49 | #' # create deSet object from data
  50 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
  51 | #' full.model = full_model)
  52 | #'
  53 | #' # lrt method
  54 | #' de_lrt <- lrt(de_obj, nullDistn = "normal")
  55 | #'
  56 | #' # to generate p-values from bootstrap
  57 | #' de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 30)
  58 | #'
  59 | #' # input a deFit object directly
  60 | #' de_fit <- fit_models(de_obj, stat.type = "lrt")
  61 | #' de_lrt <- lrt(de_obj, de.fit = de_fit)
  62 | #'
  63 | #' # summarize object
  64 | #' summary(de_lrt)
  65 | #'
  66 | #' @references
  67 | #' Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance
  68 | #' analysis of time course microarray experiments. Proceedings of the National
  69 | #' Academy of Sciences, 102: 12837-12842.
  70 | #' 
  71 | #' \url{http://en.wikipedia.org/wiki/Likelihood-ratio_test}
  72 | #'
  73 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{build_models}},
  74 | #' \code{\link{odp}}
  75 | #'
  76 | #' @export
  77 | setGeneric("lrt", function(object, de.fit,
  78 |                            nullDistn = c("normal","bootstrap"), weights = NULL,
  79 |                            bs.its = 100, seed = NULL, verbose = TRUE,
  80 |                            mod.F = FALSE, ...)
  81 |   standardGeneric("lrt"))
  82 | 
  83 | 
  84 | #' The optimal discovery procedure
  85 | #'
  86 | #' \code{odp} performs the optimal discovery procedure, which is a framework for
  87 | #' optimally performing many hypothesis tests in a high-dimensional study. When
  88 | #' testing whether a feature is significant, the optimal discovery procedure
  89 | #' uses information across all features when testing for significance.
  90 | #'
  91 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}
  92 | #' @param de.fit \code{S4 object}: \code{\linkS4class{deFit}}. Optional.
  93 | #' @param odp.parms \code{list}: parameters for each cluster. See
  94 | #' \code{\link{kl_clust}}.
  95 | #' @param weights \code{matrix}: weights for each observation. Default is NULL.
  96 | #' @param bs.its \code{numeric}: number of null bootstrap iterations. Default
  97 | #' is 100.
  98 | #' @param n.mods \code{integer}: number of clusters used in
  99 | #' \code{\link{kl_clust}}. Default is 50.
 100 | #' @param seed \code{integer}: set the seed value. Default is NULL.
 101 | #' @param verbose \code{boolean}: print iterations for bootstrap method.
 102 | #' Default is TRUE.
 103 | #' @param ... Additional arguments for \code{\link{qvalue}} and
 104 | #' \code{\link{empPvals}}.
 105 | #'
 106 | #'
 107 | #' @details
 108 | #' The full ODP estimator computationally grows quadratically with respect to
 109 | #' the number of genes. This becomes computationally taxing at a certain point.
 110 | #' Therefore, an alternative method called mODP is used which has been shown to
 111 | #' provide results that are very similar. mODP utilizes a clustering algorithm
 112 | #' where genes are assigned to a cluster based on the Kullback-Leiber distance.
 113 | #' Each gene is assigned an module-average parameter to calculate the ODP score
 114 | #' and it reduces the computations time to approximately linear (see Woo, Leek
 115 | #' and Storey 2010). If the number of clusters is equal to the number of genes
 116 | #' then the original ODP is implemented. Depending on the number of hypothesis
 117 | #' tests, this can take some time.
 118 | #'
 119 | #' @return \code{\linkS4class{deSet}} object
 120 | #'
 121 | #' @examples
 122 | #' # import data
 123 | #' library(splines)
 124 | #' data(kidney)
 125 | #' age <- kidney$age
 126 | #' sex <- kidney$sex
 127 | #' kidexpr <- kidney$kidexpr
 128 | #' cov <- data.frame(sex = sex, age = age)
 129 | #'
 130 | #' # create models
 131 | #' null_model <- ~sex
 132 | #' full_model <- ~sex + ns(age, df = 4)
 133 | #'
 134 | #' # create deSet object from data
 135 | #' de_obj <- build_models(data = kidexpr, cov = cov,
 136 | #' null.model = null_model, full.model = full_model)
 137 | #'
 138 | #' # odp method
 139 | #' de_odp <- odp(de_obj, bs.its = 30)
 140 | #'
 141 | #' # input a deFit object or ODP parameters ... not necessary
 142 | #' de_fit <- fit_models(de_obj, stat.type = "odp")
 143 | #' de_clust <- kl_clust(de_obj, n.mods = 10)
 144 | #' de_odp <- odp(de_obj, de.fit = de_fit, odp.parms = de_clust,
 145 | #' bs.its = 30)
 146 | #'
 147 | #' # summarize object
 148 | #' summary(de_odp)
 149 | #'
 150 | #' @references
 151 | #' Storey JD. (2007) The optimal discovery procedure: A new approach to
 152 | #' simultaneous significance testing. Journal of the Royal Statistical
 153 | #' Society, Series B, 69: 347-368.
 154 | #'
 155 | #' Storey JD, Dai JY, and Leek JT. (2007) The optimal discovery procedure for
 156 | #' large-scale significance testing, with applications to comparative
 157 | #' microarray experiments. Biostatistics, 8: 414-432.
 158 | #'
 159 | #' Woo S, Leek JT, Storey JD (2010) A computationally efficient modular
 160 | #' optimal discovery procedure. Bioinformatics, 27(4): 509-515.
 161 | #'
 162 | #' @author John Storey, Jeffrey Leek, Andrew Bass
 163 | #'
 164 | #' @seealso \code{\link{kl_clust}}, \code{\link{build_models}} and
 165 | #' \code{\linkS4class{deSet}}
 166 | #'
 167 | #' @export
 168 | setGeneric("odp", function(object, de.fit, odp.parms = NULL, weights = NULL, bs.its = 100,
 169 |                            n.mods = 50, seed = NULL, verbose = TRUE, ...)
 170 |   standardGeneric("odp"))
 171 | 
 172 | 
 173 | #' Modular optimal discovery procedure (mODP)
 174 | #'
 175 | #' \code{kl_clust} is an implementation of mODP that assigns genes to modules
 176 | #' based on of the Kullback-Leibler distance.
 177 | #'
 178 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}.
 179 | #' @param de.fit \code{S4 object}: \code{\linkS4class{deFit}}.
 180 | #' @param n.mods \code{integer}: number of modules (i.e., clusters).
 181 | #'
 182 | #' @details mODP utilizes a k-means clustering algorithm where genes are
 183 | #' assigned to a cluster based on the Kullback-Leiber distance. Each gene is
 184 | #' assigned an module-average parameter to calculate the ODP score (See Woo,
 185 | #' Leek and Storey 2010 for more details). The mODP and full ODP produce nearly
 186 | #' exact results but mODP has the advantage of being computationally
 187 | #' faster.
 188 | #'
 189 | #' @note The results are generally insensitive to the number of modules after a 
 190 | #'   certain threshold of about n.mods>=50 in our experience. It is recommended
 191 | #'   that users experiment with the number of modules. If the number of modules
 192 | #'   is equal to the number of genes then the original ODP is implemented.
 193 | #'
 194 | #' @return
 195 | #' A list with the following slots:
 196 | #' \itemize{
 197 | #'   \item {mu.full: cluster averaged fitted values from full model.}
 198 | #'   \item {mu.null: cluster averaged fitted values from null model.}
 199 | #'   \item {sig.full: cluster standard deviations from full model.}
 200 | #'   \item {sig.null: cluster standard deviations from null model.}
 201 | #'   \item {n.per.mod: total members in each cluster.}
 202 | #'   \item {clustMembers: cluster membership for each gene.}
 203 | #' }
 204 | #'
 205 | #' @examples
 206 | #' # import data
 207 | #' library(splines)
 208 | #' data(kidney)
 209 | #' age <- kidney$age
 210 | #' sex <- kidney$sex
 211 | #' kidexpr <- kidney$kidexpr
 212 | #' cov <- data.frame(sex = sex, age = age)
 213 | #'
 214 | #' # create models
 215 | #' null_model <- ~sex
 216 | #' full_model <- ~sex + ns(age, df = 4)
 217 | #'
 218 | #' # create deSet object from data
 219 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
 220 | #' full.model = full_model)
 221 | #'
 222 | #' # mODP method
 223 | #' de_clust <- kl_clust(de_obj)
 224 | #'
 225 | #' # change the number of clusters
 226 | #' de_clust <- kl_clust(de_obj, n.mods = 10)
 227 | #'
 228 | #' # input a deFit object
 229 | #' de_fit <- fit_models(de_obj, stat.type = "odp")
 230 | #' de_clust <- kl_clust(de_obj, de.fit = de_fit)
 231 | #'
 232 | #' @references
 233 | #' Storey JD. (2007) The optimal discovery procedure: A new approach to
 234 | #' simultaneous significance testing. Journal of the Royal Statistical
 235 | #' Society, Series B, 69: 347-368.
 236 | #'
 237 | #' Storey JD, Dai JY, and Leek JT. (2007) The optimal discovery procedure for
 238 | #' large-scale significance testing, with applications to comparative
 239 | #' microarray experiments. Biostatistics, 8: 414-432.
 240 | #'
 241 | #' Woo S, Leek JT, Storey JD (2010) A computationally efficient modular optimal
 242 | #'  discovery procedure. Bioinformatics, 27(4): 509-515.
 243 | #'
 244 | #' @author John Storey, Jeffrey Leek
 245 | #'
 246 | #' @seealso \code{\link{odp}}, \code{\link{fit_models}}
 247 | #'
 248 | #' @exportMethod kl_clust
 249 | setGeneric("kl_clust", function(object, de.fit = NULL, n.mods = 50)
 250 |   standardGeneric("kl_clust"))
 251 | 
 252 | #' Linear regression of the null and full models
 253 | #'
 254 | #' \code{fit_models} fits a model matrix to each gene by using the least
 255 | #' squares method. Model fits can be either statistic type "odp" (optimal
 256 | #' discovery procedure) or "lrt" (likelihood ratio test).
 257 | #'
 258 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}.
 259 | #' @param stat.type \code{character}: type of statistic to be used. Either
 260 | #' "lrt" or "odp". Default is "lrt".
 261 | #' @param weights \code{matrix}: weights for each observation. Default is NULL.
 262 | #'
 263 | #' @details
 264 | #' If "odp" method is implemented then the null model is removed from the full 
 265 | #' model (see Storey 2007).  Otherwise, the statistic type has no affect on the
 266 | #' model fit.
 267 | #'
 268 | #' @note \code{fit_models} does not have to be called by the user to use
 269 | #' \code{\link{odp}}, \code{\link{lrt}} or \code{\link{kl_clust}} as it is an
 270 | #' optional input and is implemented in the methods. The
 271 | #' \code{\linkS4class{deFit}} object can be created by the user if a different
 272 | #' statistical implementation is required.
 273 | #'
 274 | #' @return \code{\linkS4class{deFit}} object
 275 | #'
 276 | #' @examples
 277 | #' # import data
 278 | #' library(splines)
 279 | #' data(kidney)
 280 | #' age <- kidney$age
 281 | #' sex <- kidney$sex
 282 | #' kidexpr <- kidney$kidexpr
 283 | #' cov <- data.frame(sex = sex, age = age)
 284 | #'
 285 | #' # create models
 286 | #' null_model <- ~sex
 287 | #' full_model <- ~sex + ns(age, df = 4)
 288 | #'
 289 | #' # create deSet object from data
 290 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
 291 | #' full.model = full_model)
 292 | #'
 293 | #' # retrieve statistics from linear regression for each gene
 294 | #' fit_lrt <- fit_models(de_obj, stat.type = "lrt") # lrt method
 295 | #' fit_odp <- fit_models(de_obj, stat.type = "odp") # odp method
 296 | #'
 297 | #' # summarize object
 298 | #' summary(fit_odp)
 299 | #'
 300 | #' @references
 301 | #' Storey JD. (2007) The optimal discovery procedure: A new approach to
 302 | #' simultaneous significance testing. Journal of the Royal Statistical
 303 | #' Society, Series B, 69: 347-368.
 304 | #'
 305 | #' Storey JD, Dai JY, and Leek JT. (2007) The optimal discovery procedure for
 306 | #' large-scale significance testing, with applications to comparative
 307 | #' microarray experiments. Biostatistics, 8: 414-432.
 308 | #'
 309 | #' Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance
 310 | #' analysis of time course microarray experiments. Proceedings of the National
 311 | #' Academy of Sciences, 102: 12837-12842.
 312 | #'
 313 | #' @seealso \code{\linkS4class{deFit}}, \code{\link{odp}} and
 314 | #' \code{\link{lrt}}
 315 | #'
 316 | #' @author John Storey
 317 | #' @exportMethod fit_models
 318 | setGeneric("fit_models",
 319 |            function(object, stat.type = c("lrt", "odp"), weights = NULL) {
 320 |              standardGeneric("fit_models")
 321 |            })
 322 | 
 323 | #' Create a deSet object from an ExpressionSet
 324 | #'
 325 | #' Creates a \code{\linkS4class{deSet}} object that extends the
 326 | #' \code{\link{ExpressionSet}} object.
 327 | #'
 328 | #' @param object \code{S4 object}: \code{\link{ExpressionSet}}
 329 | #' @param full.model \code{formula}: full model containing the both the
 330 | #' adjustment and the biological variables for the experiment.
 331 | #' @param null.model \code{formula}: null model containing the adjustment
 332 | #' variables for the experiment.
 333 | #' @param individual \code{factor}: information on repeated samples in
 334 | #' experiment.
 335 | #'
 336 | #' @note It is essential that the null and full models have the same variables
 337 | #' as the ExpressionSet phenoType column names.
 338 | #'
 339 | #' @return \code{\linkS4class{deSet}} object
 340 | #'
 341 | #' @examples
 342 | #' # import data
 343 | #' library(splines)
 344 | #' data(kidney)
 345 | #' age <- kidney$age
 346 | #' sex <- kidney$sex
 347 | #' kidexpr <- kidney$kidexpr
 348 | #' cov <- data.frame(sex = sex, age = age)
 349 | #' pDat <- as(cov, "AnnotatedDataFrame")
 350 | #' exp_set <- ExpressionSet(assayData = kidexpr, phenoData = pDat)
 351 | #'
 352 | #' # create models
 353 | #' null_model <- ~sex
 354 | #' full_model <- ~sex + ns(age, df = 4)
 355 | #'
 356 | #' # create deSet object from data
 357 | #' de_obj <- deSet(exp_set, null.model = null_model,
 358 | #' full.model = full_model)
 359 | #'
 360 | #' # optionally add individuals to experiment, in this case there are 36
 361 | #' # individuals that were sampled twice
 362 | #' indSamples <- as.factor(rep(1:36, each = 2))
 363 | #' de_obj <- deSet(exp_set, null.model = null_model,
 364 | #' full.model = full_model, ind = indSamples)
 365 | #' summary(de_obj)
 366 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{odp}} and
 367 | #' \code{\link{lrt}}
 368 | #'
 369 | #' @author John Storey, Andrew Bass
 370 | #'
 371 | #' @export
 372 | setGeneric("deSet", function(object, full.model, null.model,
 373 |                              individual=NULL) standardGeneric("deSet"))
 374 | 
 375 | #' Estimate the q-values for a given set of p-values
 376 | #'
 377 | #' Runs \code{\link{qvalue}} on a \code{\linkS4class{deSet}} object.
 378 | #'
 379 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}
 380 | #' @param ... Additional arguments for \code{\link{qvalue}}
 381 | #'
 382 | #' @return \code{\linkS4class{deSet}} object with slots updated by \code{\link{qvalue}}
 383 | #'  calculations.
 384 | #'
 385 | #' @examples
 386 | #' # import data
 387 | #' library(splines)
 388 | #' data(kidney)
 389 | #' age <- kidney$age
 390 | #' sex <- kidney$sex
 391 | #' kidexpr <- kidney$kidexpr
 392 | #' cov <- data.frame(sex = sex, age = age)
 393 | #'
 394 | #' # create models
 395 | #' null_model <- ~sex
 396 | #' full_model <- ~sex + ns(age, df = 4)
 397 | #'
 398 | #' # create deSet object from data
 399 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
 400 | #' full.model = full_model)
 401 | #'
 402 | #' # Run lrt (or odp) and apply_qvalue
 403 | #' de_lrt <- lrt(de_obj)
 404 | #' de_lrt <- apply_qvalue(de_lrt, fdr.level = 0.05,
 405 | #' pi0.method = "bootstrap", adj=1.2)
 406 | #' summary(de_lrt)
 407 | #'
 408 | #' @references
 409 | #' Storey JD and Tibshirani R. (2003) Statistical significance for
 410 | #' genome-wide studies. Proceedings of the National Academy of Sciences,
 411 | #' 100: 9440-9445
 412 | #'
 413 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{odp}} and
 414 | #' \code{\link{lrt}}
 415 | #'
 416 | #' @author John Storey, Andrew Bass
 417 | #'
 418 | #' @export
 419 | setGeneric("apply_qvalue", function(object, ...)
 420 |   standardGeneric("apply_qvalue"))
 421 | 
 422 | #' Estimate surrogate variables
 423 | #'
 424 | #' Runs \code{\link{sva}} on the null and full models in
 425 | #' \code{\linkS4class{deSet}}. See \code{\link{sva}} for additional details.
 426 | #'
 427 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}
 428 | #' @param ... Additional arguments for \code{\link{sva}}
 429 | #'
 430 | #' @return \code{\linkS4class{deSet}} object where the surrogate variables 
 431 | #' estimated by \code{\link{sva}} are added to the full model and null model
 432 | #' matrices.
 433 | #'
 434 | #' @examples
 435 | #' # import data
 436 | #' library(splines)
 437 | #' data(kidney)
 438 | #' age <- kidney$age
 439 | #' sex <- kidney$sex
 440 | #' kidexpr <- kidney$kidexpr
 441 | #' cov <- data.frame(sex = sex, age = age)
 442 | #'
 443 | #' # create models
 444 | #' null_model <- ~sex
 445 | #' full_model <- ~sex + ns(age, df = 4)
 446 | #'
 447 | #' # create deSet object from data
 448 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
 449 | #' full.model = full_model)
 450 | #'
 451 | #' # run surrogate variable analysis
 452 | #' de_sva <- apply_sva(de_obj)
 453 | #'
 454 | #' # run odp/lrt with surrogate variables added
 455 | #' de_odp <- odp(de_sva, bs.its = 30)
 456 | #' summary(de_odp)
 457 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{odp}} and
 458 | #' \code{\link{lrt}}
 459 | #'
 460 | #' @references
 461 | #' Leek JT, Storey JD (2007) Capturing Heterogeneity in Gene Expression
 462 | #' Studies by Surrogate Variable Analysis. PLoS Genet 3(9): e161.
 463 | #' doi:10.1371/journal.pgen.0030161
 464 | #' 
 465 | #' Leek JT and Storey JD. (2008) A general framework for multiple testing
 466 | #' dependence. Proceedings of the National Academy of Sciences, 105: 18718-
 467 | #' 18723.
 468 | #'
 469 | #' @author John Storey, Jeffrey Leek, Andrew Bass
 470 | #' @export
 471 | setGeneric("apply_sva", function(object, ...)
 472 |   standardGeneric("apply_sva"))
 473 | 
 474 | #' Supervised normalization of data in edge
 475 | #'
 476 | #' Runs \code{snm} on a deSet object based on the null and full models in
 477 | #' \code{\linkS4class{deSet}}. See \code{\link{snm}} for additional details
 478 | #' on the algorithm.
 479 | #'
 480 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}
 481 | #' @param int.var \code{data frame}: intensity-dependent effects (see 
 482 | #'   \code{\link{snm}} for details)
 483 | #' @param ... Additional arguments for \code{\link{snm}}
 484 | #'
 485 | #' @return \code{apply_snm} returns a \code{\linkS4class{deSet}} object where 
 486 | #' assayData (the expression data) that has been passed to apply_snm is replaced
 487 | #' with the normalized data that \code{\link{snm}} returns.  Specifically, 
 488 | #' \code{exprs(object)} is replaced by \code{$norm.dat} from \code{\link{snm}},
 489 | #' where \code{object} is the \code{\link{deSet}} object.
 490 | #'
 491 | #' @references
 492 | #' Mechan BH, Nelson PS, Storey JD. Supervised normalization of microarrays.
 493 | #' Bioinformatics 2010;26:1308-1315.
 494 | #'
 495 | #' @examples
 496 | #' # simulate data
 497 | #' library(snm)
 498 | #' singleChannel <- sim.singleChannel(12345)
 499 | #' data <- singleChannel$raw.data
 500 | #'
 501 | #' # create deSet object using build_models (can use ExpressionSet see manual)
 502 | #' cov <- data.frame(grp = singleChannel$bio.var[,2])
 503 | #' full_model <- ~grp
 504 | #' null_model <- ~1
 505 | #'
 506 | #' # create deSet object using build_models
 507 | #' de_obj <- build_models(data = data, cov = cov, full.model = full_model,
 508 | #' null.model = null_model)
 509 | #'
 510 | #' # run snm using intensity-dependent adjustment variable
 511 | #' de_snm <- apply_snm(de_obj, int.var = singleChannel$int.var,
 512 | #' verbose = FALSE, num.iter = 1)
 513 | #'
 514 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{odp}} and
 515 | #' \code{\link{lrt}}
 516 | #'
 517 | #' @author John Storey, Andrew Bass
 518 | #' @export
 519 | setGeneric("apply_snm", function(object, int.var=NULL, ...)
 520 |   standardGeneric("apply_snm"))
 521 | 
 522 | 
 523 | #' Non-Parametric Jackstraw for Principal Component Analysis (PCA)
 524 | #'
 525 | #' Estimates statistical significance of association between variables and
 526 | #' their principal components (PCs).
 527 | #'
 528 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}
 529 | #' @param r1 a numeric vector of principal components of interest. Choose a subset of r significant PCs to be used.
 530 | #' @param r a number (a positive integer) of significant principal components.
 531 | #' @param s a number (a positive integer) of synthetic null variables. Out of m variables, s variables are independently permuted.
 532 | #' @param B a number (a positive integer) of resampling iterations. There will be a total of s*B null statistics.
 533 | #' @param covariate a data matrix of covariates with corresponding n observations.
 534 | #' @param verbose a logical indicator as to whether to print the progress.
 535 | #' @param seed a seed for the random number generator.
 536 | #' 
 537 | #' @details 
 538 | #' This function computes m p-values of linear association between m variables
 539 | #' and their PCs. Its resampling strategy accounts for the over-fitting
 540 | #' characteristics due to direct computation of PCs from the observed data
 541 | #' and protects against an anti-conservative bias.
 542 | #' 
 543 | #' Provide the \code{\linkS4class{deSet}},
 544 | #' with m variables as rows and n observations as columns. Given that there are
 545 | #' r significant PCs, this function tests for linear association between m
 546 | #' varibles and their r PCs.
 547 | #'
 548 | #' You could specify a subset of significant PCs
 549 | #' that you are interested in r1. If PC is given, then this function computes
 550 | #' statistical significance of association between m variables and PC, while
 551 | #' adjusting for other PCs (i.e., significant PCs that are not your interest).
 552 | #' For example, if you want to identify variables associated with 1st and 2nd
 553 | #' PCs, when your data contains three significant PCs, set r=3 and r1=c(1,2). 
 554 | #' 
 555 | #' Please take a careful look at your data and use appropriate graphical and
 556 | #' statistical criteria to determine a number of significant PCs, r. The number
 557 | #' of significant PCs depends on the data structure and the context. In a case
 558 | #' when you fail to specify r, it will be estimated from a permutation test
 559 | #' (Buja and Eyuboglu, 1992) using a function \code{\link{permutationPA}}.
 560 | #' 
 561 | #' If s is not supplied, s is set to about 10% of m variables. If B is not
 562 | #' supplied, B is set to m*10/s.
 563 | #' 
 564 | #' @return \code{apply_jackstraw} returns a \code{list} containing the following
 565 | #' slots:
 566 | #' \itemize{
 567 | #' \item{\code{p.value} the m p-values of association tests between variables
 568 | #' and their principal components}
 569 | #' \item{\code{obs.stat} the observed F-test statistics}
 570 | #' \item{\code{null.stat} the s*B null F-test statistics}
 571 | #' }
 572 | #' 
 573 | #'
 574 | #' @references
 575 | #' Chung and Storey (2013) Statistical Significance of
 576 | #' Variables Driving Systematic Variation in
 577 | #' High-Dimensional Data. arXiv:1308.6013 [stat.ME]
 578 | #' \url{http://arxiv.org/abs/1308.6013}
 579 | #'
 580 | #'More information available at \url{http://ncc.name/}
 581 | #'
 582 | #' @examples
 583 | # import data
 584 | #' library(splines)
 585 | #' data(kidney)
 586 | #' age <- kidney$age
 587 | #' sex <- kidney$sex
 588 | #' kidexpr <- kidney$kidexpr
 589 | #' cov <- data.frame(sex = sex, age = age)
 590 | 
 591 | #' # create models
 592 | #' null_model <- ~sex
 593 | #' full_model <- ~sex + ns(age, df = 4)
 594 | 
 595 | #' # create deSet object from data
 596 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
 597 | #'                       full.model = full_model)
 598 | #' ## apply the jackstraw
 599 | #' out = apply_jackstraw(de_obj, r1=1, r=1)
 600 | #' ## Use optional arguments
 601 | #' ## For example, set s and B for a balance between speed of the algorithm and accuracy of p-values
 602 | #' ## out = apply_jackstraw(dat, r1=1, r=1, s=10, B=1000, seed=5678)
 603 | #'
 604 | #' @seealso \code{\link{permutationPA}}
 605 | #'
 606 | #' @author Neo Christopher Chung \email{nc@@princeton.edu}
 607 | #' @import jackstraw
 608 | #' @export
 609 | setGeneric("apply_jackstraw", function(object, r1 = NULL, r = NULL, s = NULL, B = NULL,
 610 |                                        covariate = NULL, verbose = TRUE, seed = NULL)
 611 |   standardGeneric("apply_jackstraw"))
 612 | 
 613 | #' Full model equation
 614 | #'
 615 | #' These generic functions access and set the full model for
 616 | #' \code{\linkS4class{deSet}} object.
 617 | #'
 618 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}
 619 | #' @param value \code{formula}: The experiment design for the full model.
 620 | #'
 621 | #' @examples
 622 | #' # import data
 623 | #' library(splines)
 624 | #' data(kidney)
 625 | #' age <- kidney$age
 626 | #' sex <- kidney$sex
 627 | #' kidexpr <- kidney$kidexpr
 628 | #' cov <- data.frame(sex = sex, age = age)
 629 | #'
 630 | #' # create models
 631 | #' null_model <- ~sex
 632 | #' full_model <- ~sex + ns(age, df = 4)
 633 | #'
 634 | #' # create deSet object from data
 635 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
 636 | #' full.model = full_model)
 637 | #'
 638 | #' # extract out the full model equation
 639 | #' mod_full <- fullModel(de_obj)
 640 | #'
 641 | #' # change the full model in the experiment
 642 | #' fullModel(de_obj) <- ~sex + ns(age, df = 2)
 643 | #'
 644 | #'
 645 | #' @return the formula for the full model.
 646 | #'
 647 | #' @author John Storey, Andrew Bass
 648 | #'
 649 | #' @seealso \code{\linkS4class{deSet}}
 650 | #'
 651 | #' @export
 652 | setGeneric("fullModel", function(object) standardGeneric("fullModel"))
 653 | 
 654 | #' @rdname fullModel
 655 | #' @export
 656 | setGeneric("fullModel<-", function(object, value) {
 657 |   standardGeneric("fullModel<-")
 658 | })
 659 | 
 660 | #' Null model equation from deSet object
 661 | #'
 662 | #' These generic functions access and set the null model for
 663 | #' \code{\linkS4class{deSet}} object.
 664 | #'
 665 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}
 666 | #' @param value \code{formula}: The experiment design for the null model.
 667 | #'
 668 | #' @return \code{nullModel} returns the formula for the null model.
 669 | #'
 670 | #' @examples
 671 | #' # import data
 672 | #' library(splines)
 673 | #' data(kidney)
 674 | #' age <- kidney$age
 675 | #' sex <- kidney$sex
 676 | #' kidexpr <- kidney$kidexpr
 677 | #' cov <- data.frame(sex = sex, age = age)
 678 | #'
 679 | #' # create models
 680 | #' null_model <- ~sex
 681 | #' full_model <- ~sex + ns(age, df = 4)
 682 | #'
 683 | #' # create deSet object from data
 684 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
 685 | #' full.model = full_model)
 686 | #'
 687 | #' # extract the null model equation
 688 | #' mod_null <- nullModel(de_obj)
 689 | #'
 690 | #' # change null model in experiment but must update full model
 691 | #' nullModel(de_obj) <- ~1
 692 | #' fullModel(de_obj) <- ~1 + ns(age, df=4)
 693 | #' @author John Storey, Andrew Bass
 694 | #'
 695 | #' @seealso \code{\linkS4class{deSet}}
 696 | #'
 697 | #' @keywords nullModel, nullModel<-
 698 | #'
 699 | #' @exportMethod nullModel
 700 | setGeneric("nullModel", function(object) standardGeneric("nullModel"))
 701 | 
 702 | #' @rdname nullModel
 703 | #' @export
 704 | setGeneric("nullModel<-", function(object, value) {
 705 |   standardGeneric("nullModel<-")
 706 | })
 707 | 
 708 | #' Matrix representation of null model
 709 | #'
 710 | #' These generic functions access and set the null matrix for
 711 | #' \code{\linkS4class{deSet}} object.
 712 | #'
 713 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}
 714 | #' @param value \code{matrix}: null model matrix where columns are covariates
 715 | #' and rows are observations
 716 | #'
 717 | #' @return \code{nullMatrix} returns the value of the null model matrix.
 718 | #'
 719 | #' @examples
 720 | #' # import data
 721 | #' library(splines)
 722 | #' data(kidney)
 723 | #' age <- kidney$age
 724 | #' sex <- kidney$sex
 725 | #' kidexpr <- kidney$kidexpr
 726 | #' cov <- data.frame(sex = sex, age = age)
 727 | #'
 728 | #' # create models
 729 | #' null_model <- ~sex
 730 | #' full_model <- ~sex + ns(age, df = 4)
 731 | #'
 732 | #' # create deSet object from data
 733 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
 734 | #' full.model = full_model)
 735 | #'
 736 | #' # extract the null model as a matrix
 737 | #' mat_null <- nullMatrix(de_obj)
 738 | #'
 739 | #' @author John Storey, Andrew Bass
 740 | #'
 741 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{fullModel}} and
 742 | #' \code{\link{fullModel}}
 743 | #'
 744 | #' @export
 745 | setGeneric("nullMatrix", function(object) standardGeneric("nullMatrix"))
 746 | 
 747 | #' @rdname nullMatrix
 748 | #' @export
 749 | setGeneric("nullMatrix<-", function(object, value) {
 750 |   standardGeneric("nullMatrix<-")
 751 | })
 752 | 
 753 | #' Matrix representation of full model
 754 | #'
 755 | #' These generic functions access and set the full matrix for
 756 | #' \code{\linkS4class{deSet}} object.
 757 | #'
 758 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}
 759 | #' @param value \code{matrix}: full model matrix where the columns are the
 760 | #' covariates and rows are observations
 761 | #'
 762 | #' @return \code{fullMatrix} returns the value of the full model matrix.
 763 | #'
 764 | #' @examples
 765 | #' # import data
 766 | #' library(splines)
 767 | #' data(kidney)
 768 | #' age <- kidney$age
 769 | #' sex <- kidney$sex
 770 | #' kidexpr <- kidney$kidexpr
 771 | #' cov <- data.frame(sex = sex, age = age)
 772 | #'
 773 | #' # create models
 774 | #' null_model <- ~sex
 775 | #' full_model <- ~sex + ns(age, df = 4)
 776 | #'
 777 | #' # create deSet object from data
 778 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
 779 | #' full.model = full_model)
 780 | #'
 781 | #' # extract the full model equation as a matrix
 782 | #' mat_full <- fullMatrix(de_obj)
 783 | #' @author Andrew Bass, John Storey
 784 | #'
 785 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{fullModel}}
 786 | #'
 787 | #' @export
 788 | setGeneric("fullMatrix", function(object) standardGeneric("fullMatrix"))
 789 | 
 790 | #' @rdname fullMatrix
 791 | #' @export
 792 | setGeneric("fullMatrix<-", function(object, value) {
 793 |   standardGeneric("fullMatrix<-")
 794 | })
 795 | 
 796 | 
 797 | #' Access/set qvalue slot
 798 | #'
 799 | #' These generic functions access and set the \code{qvalue} object in the
 800 | #' \code{\linkS4class{deSet}} object.
 801 | #'
 802 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}
 803 | #' @param value S3 \code{object}: \code{\link{qvalue}}
 804 | #'
 805 | #' @return  \code{qvalueObj} returns a \code{\link{qvalue}} object.
 806 | #'
 807 | #' @examples
 808 | #' # import data
 809 | #' library(splines)
 810 | #' library(qvalue)
 811 | #' data(kidney)
 812 | #' age <- kidney$age
 813 | #' sex <- kidney$sex
 814 | #' kidexpr <- kidney$kidexpr
 815 | #' cov <- data.frame(sex = sex, age = age)
 816 | #'
 817 | #' # create models
 818 | #' null_model <- ~sex
 819 | #' full_model <- ~sex + ns(age, df = 4)
 820 | #'
 821 | #' # create deSet object from data
 822 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
 823 | #' full.model = full_model)
 824 | #'
 825 | #' # run the odp method
 826 | #' de_odp <- odp(de_obj, bs.its = 20)
 827 | #'
 828 | #' # extract out significance results
 829 | #' qval_obj <- qvalueObj(de_odp)
 830 | #'
 831 | #' # run qvalue and assign it to deSet slot
 832 | #' pvals <- qval_obj$pvalues
 833 | #' qval_new <- qvalue(pvals, pfdr = TRUE, fdr.level = 0.1)
 834 | #' qvalueObj(de_odp) <- qval_new
 835 | #'
 836 | #' @author John Storey, Andrew Bass
 837 | #'
 838 | #' @seealso \code{\link{lrt}}, \code{\link{odp}} and
 839 | #' \code{\linkS4class{deSet}}
 840 | #'
 841 | #' @export
 842 | setGeneric("qvalueObj", function(object) standardGeneric("qvalueObj"))
 843 | 
 844 | #' @rdname qvalueObj
 845 | #' @export
 846 | setGeneric("qvalueObj<-", function(object, value) {
 847 |   standardGeneric("qvalueObj<-")
 848 | })
 849 | 
 850 | #' Individuals sampled in experiment
 851 | #'
 852 | #' These generic functions access and set the individual slot in
 853 | #' \code{\linkS4class{deSet}}.
 854 | #'
 855 | #' @param object \code{\linkS4class{deSet}}
 856 | #' @param value \code{factor}: Identifies which samples correspond to which
 857 | #'   individuals. Important if the same individuals are sampled multiple times
 858 | #'   in a longitudinal fashion.
 859 | #'
 860 | #' @return \code{individual} returns information regarding dinstinct individuals
 861 | #'   sampled in the experiment.
 862 | #'
 863 | #' @examples
 864 | #' library(splines)
 865 | #' # import data
 866 | #' data(endotoxin)
 867 | #' ind <- endotoxin$ind
 868 | #' time <- endotoxin$time
 869 | #' class <- endotoxin$class
 870 | #' endoexpr <- endotoxin$endoexpr
 871 | #' cov <- data.frame(individual = ind, time = time, class = class)
 872 | #'
 873 | #' # create ExpressionSet object
 874 | #' pDat <- as(cov, "AnnotatedDataFrame")
 875 | #' exp_set <- ExpressionSet(assayData = endoexpr, phenoData = pDat)
 876 | #'
 877 | #' # formulate null and full models in experiement
 878 | #' # note: interaction term is a way of taking into account group effects
 879 | #' mNull <- ~ns(time, df=4, intercept = FALSE)
 880 | #' mFull <- ~ns(time, df=4, intercept = FALSE) +
 881 | #' ns(time, df=4, intercept = FALSE):class + class
 882 | #'
 883 | #' # create deSet object
 884 | #' de_obj <- deSet(exp_set, full.model = mFull, null.model = mNull,
 885 | #' individual = ind)
 886 | #'
 887 | #' # extract out the individuals factor
 888 | #' ind_exp <- individual(de_obj)
 889 | #'
 890 | #' @author John Storey, Andrew Bass
 891 | #'
 892 | #' @seealso \code{\linkS4class{deSet}}
 893 | #'
 894 | #' @export
 895 | setGeneric("individual", function(object) standardGeneric("individual"))
 896 | 
 897 | #' @rdname individual
 898 | #' @export
 899 | setGeneric("individual<-", function(object, value) {
 900 |   standardGeneric("individual<-")
 901 | })
 902 | 
 903 | #' Regression coefficients from full model fit
 904 | #'
 905 | #' Access the full model fitted coefficients of a
 906 | #' \code{\linkS4class{deFit}} object.
 907 | #'
 908 | #' @param object \code{S4 object}: \code{\linkS4class{deFit}}
 909 | #'
 910 | #' @return \code{betaCoef} returns the regression coefficients for the full
 911 | #'  model fit.
 912 | #'
 913 | #' @author John Storey, Andrew Bass
 914 | #'
 915 | #' @seealso \code{\link{fit_models}}
 916 | #'
 917 | #' @examples
 918 | #' # import data
 919 | #' library(splines)
 920 | #' data(kidney)
 921 | #' age <- kidney$age
 922 | #' sex <- kidney$sex
 923 | #' kidexpr <- kidney$kidexpr
 924 | #' cov <- data.frame(sex = sex, age = age)
 925 | #'
 926 | #' # create models
 927 | #' null_model <- ~sex
 928 | #' full_model <- ~sex + ns(age, df = 4)
 929 | #'
 930 | #' # create deSet object from data
 931 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
 932 | #' full.model = full_model)
 933 | #'
 934 | #' # run fit_models to get model fits
 935 | #' de_fit <- fit_models(de_obj)
 936 | #'
 937 | #' # extract beta coefficients
 938 | #' beta <- betaCoef(de_fit)
 939 | #'
 940 | #' @export
 941 | setGeneric("betaCoef", function(object) standardGeneric("betaCoef"))
 942 | 
 943 | #' Statistic type used in analysis
 944 | #'
 945 | #' Access the statistic type in a \code{\linkS4class{deFit}} object. Can
 946 | #' either be the optimal discovery procedure (odp) or the likelihood ratio
 947 | #' test (lrt).
 948 | #'
 949 | #' @param object \code{S4 object}: \code{\linkS4class{deFit}}
 950 | #'
 951 | #' @examples
 952 | #' # import data
 953 | #' library(splines)
 954 | #' data(kidney)
 955 | #' age <- kidney$age
 956 | #' sex <- kidney$sex
 957 | #' kidexpr <- kidney$kidexpr
 958 | #' cov <- data.frame(sex = sex, age = age)
 959 | #'
 960 | #' # create models
 961 | #' null_model <- ~sex
 962 | #' full_model <- ~sex + ns(age, df = 4)
 963 | #'
 964 | #' # create deSet object from data
 965 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
 966 | #' full.model = full_model)
 967 | #'
 968 | #' # run fit_models to get model fits
 969 | #' de_fit <- fit_models(de_obj)
 970 | #'
 971 | #' # extract the statistic type of model fits
 972 | #' stat_type <- sType(de_fit)
 973 | #'
 974 | #' @return \code{sType} returns the statistic type- either "odp" or "lrt".
 975 | #'
 976 | #' @author John Storey, Andrew Bass
 977 | #'
 978 | #' @seealso \code{\link{fit_models}}, \code{\linkS4class{deFit}} and
 979 | #' \code{\linkS4class{deSet}}
 980 | #'
 981 | #' @keywords sType
 982 | #'
 983 | #' @exportMethod sType
 984 | setGeneric("sType", function(object) standardGeneric("sType"))
 985 | 
 986 | #' Fitted data from the full model
 987 | #'
 988 | #' Access the fitted data from the full model in a
 989 | #' \code{\linkS4class{deFit}} object.
 990 | #'
 991 | #' @param object \code{S4 object}: \code{\linkS4class{deFit}}
 992 | #'
 993 | #' @usage fitFull(object)
 994 | #'
 995 | #' @return \code{fitFull} returns a matrix of fitted values from full model.
 996 | #'
 997 | #' @examples
 998 | #' # import data
 999 | #' library(splines)
1000 | #' data(kidney)
1001 | #' age <- kidney$age
1002 | #' sex <- kidney$sex
1003 | #' kidexpr <- kidney$kidexpr
1004 | #' cov <- data.frame(sex = sex, age = age)
1005 | #'
1006 | #' # create models
1007 | #' null_model <- ~sex
1008 | #' full_model <- ~sex + ns(age, df = 4)
1009 | #'
1010 | #' # create deSet object from data
1011 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
1012 | #' full.model = full_model)
1013 | #'
1014 | #' # run fit_models to get model fits
1015 | #' de_fit <- fit_models(de_obj)
1016 | #'
1017 | #' # extract fitted values for full model
1018 | #' fitted_full <- fitFull(de_fit)
1019 | #'
1020 | #' @author John Storey, Andrew Bass
1021 | #'
1022 | #' @seealso \code{\link{fit_models}}
1023 | #'
1024 | #' @export
1025 | setGeneric("fitFull", function(object) standardGeneric("fitFull"))
1026 | 
1027 | #' Fitted data from the null model
1028 | #'
1029 | #' Access the fitted data from the null model in an
1030 | #' \code{\linkS4class{deFit}} object.
1031 | #'
1032 | #' @param object \code{S4 object}: \code{\linkS4class{deFit}}
1033 | #'
1034 | #' @usage fitNull(object)
1035 | #'
1036 | #' @return \code{fitNull} returns a matrix of fitted values from null model.
1037 | #'
1038 | #' @examples
1039 | #' # import data
1040 | #' library(splines)
1041 | #' data(kidney)
1042 | #' age <- kidney$age
1043 | #' sex <- kidney$sex
1044 | #' kidexpr <- kidney$kidexpr
1045 | #' cov <- data.frame(sex = sex, age = age)
1046 | #'
1047 | #' # create models
1048 | #' null_model <- ~sex
1049 | #' full_model <- ~sex + ns(age, df = 4)
1050 | #'
1051 | #' # create deSet object from data
1052 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
1053 | #' full.model = full_model)
1054 | #'
1055 | #' # run fit_models to get model fits
1056 | #' de_fit <- fit_models(de_obj)
1057 | #'
1058 | #' # extract fitted values from null model
1059 | #' fitted_null <- fitNull(de_fit)
1060 | #'
1061 | #' @author  John Storey, Andrew Bass
1062 | #'
1063 | #' @seealso \code{\link{fit_models}}
1064 | #'
1065 | #' @export
1066 | setGeneric("fitNull", function(object) standardGeneric("fitNull"))
1067 | 
1068 | #' Residuals of full model fit
1069 | #'
1070 | #' Access the fitted full model residuals in an \code{\linkS4class{deFit}}
1071 | #' object.
1072 | #'
1073 | #' @param object \code{S4 object}: \code{\linkS4class{deFit}}
1074 | #'
1075 | #' @usage resFull(object)
1076 | #'
1077 | #' @return \code{resFull} returns a matrix of residuals from full model.
1078 | #'
1079 | #' @examples
1080 | #' # import data
1081 | #' library(splines)
1082 | #' data(kidney)
1083 | #' age <- kidney$age
1084 | #' sex <- kidney$sex
1085 | #' kidexpr <- kidney$kidexpr
1086 | #' cov <- data.frame(sex = sex, age = age)
1087 | #'
1088 | #' # create models
1089 | #' null_model <- ~sex
1090 | #' full_model <- ~sex + ns(age, df = 4)
1091 | #'
1092 | #' # create deSet object from data
1093 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
1094 | #' full.model = full_model)
1095 | #'
1096 | #' # run fit_models to get model fits
1097 | #' de_fit <- fit_models(de_obj)
1098 | #'
1099 | #' # extract out the full residuals from the model fit
1100 | #' res_full <- resFull(de_fit)
1101 | #'
1102 | #' @author John Storey, Andrew Bass
1103 | #'
1104 | #' @seealso \code{\link{fit_models}}
1105 | #'
1106 | #' @export
1107 | setGeneric("resFull", function(object) standardGeneric("resFull"))
1108 | 
1109 | #' Residuals of null model fit
1110 | #'
1111 | #' Access the fitted null model residuals in an \code{\linkS4class{deFit}}
1112 | #' object.
1113 | #'
1114 | #' @param object \code{S4 object}: \code{\linkS4class{deFit}}
1115 | #'
1116 | #' @usage resNull(object)
1117 | #'
1118 | #' @return \code{resNull} returns a matrix of residuals from null model.
1119 | #'
1120 | #' @examples
1121 | #' # import data
1122 | #' library(splines)
1123 | #' data(kidney)
1124 | #' age <- kidney$age
1125 | #' sex <- kidney$sex
1126 | #' kidexpr <- kidney$kidexpr
1127 | #' cov <- data.frame(sex = sex, age = age)
1128 | #'
1129 | #' # create models
1130 | #' null_model <- ~sex
1131 | #' full_model <- ~sex + ns(age, df = 4)
1132 | #'
1133 | #' # create deSet object from data
1134 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
1135 | #' full.model = full_model)
1136 | #'
1137 | #' # run fit_models to get model fits
1138 | #' de_fit <- fit_models(de_obj)
1139 | #'
1140 | #' # extract out the null residuals from the model fits
1141 | #' res_null <- resNull(de_fit)
1142 | #' @author John Storey, Andrew Bass
1143 | #'
1144 | #' @seealso  \code{\link{fit_models}}
1145 | #'
1146 | #' @export
1147 | setGeneric("resNull", function(object) standardGeneric("resNull"))
1148 | 
1149 | #' Summary of deFit and deSet
1150 | #'
1151 | #' Summary of \code{\linkS4class{deFit}} and \code{\linkS4class{deSet}} objects.
1152 | #'
1153 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}
1154 | #' @param \dots additional parameters
1155 | #'
1156 | #' @examples
1157 | #' # import data
1158 | #' library(splines)
1159 | #' data(kidney)
1160 | #' age <- kidney$age
1161 | #' sex <- kidney$sex
1162 | #' kidexpr <- kidney$kidexpr
1163 | #' cov <- data.frame(sex = sex, age = age)
1164 | #'
1165 | #' # create models
1166 | #' null_model <- ~sex
1167 | #' full_model <- ~sex + ns(age, df = 4)
1168 | #'
1169 | #' # create deSet object from data
1170 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
1171 | #' full.model = full_model)
1172 | #'
1173 | #' # get summary
1174 | #' summary(de_obj)
1175 | #'
1176 | #' # run odp and summarize
1177 | #' de_odp <- odp(de_obj, bs.its= 20)
1178 | #' summary(de_odp)
1179 | #' @author John Storey, Andrew Bass
1180 | #'
1181 | #' @return
1182 | #' Summary of \code{\linkS4class{deSet}} object
1183 | #'
1184 | #' @keywords summary
1185 | #'
1186 | #' @export summary
1187 | setGeneric("summary")
1188 | 
1189 | #' Show function for deFit and deSet
1190 | #'
1191 | #' Show function for \code{\linkS4class{deFit}} and \code{\linkS4class{deSet}}
1192 | #' objects.
1193 | #'
1194 | #' @param object \code{S4 object}: \code{\linkS4class{deSet}}
1195 | #' @param \dots additional parameters
1196 | #'
1197 | #' @examples
1198 | #' # import data
1199 | #' library(splines)
1200 | #' data(kidney)
1201 | #' age <- kidney$age
1202 | #' sex <- kidney$sex
1203 | #' kidexpr <- kidney$kidexpr
1204 | #' cov <- data.frame(sex = sex, age = age)
1205 | #'
1206 | #' # create models
1207 | #' null_model <- ~sex
1208 | #' full_model <- ~sex + ns(age, df = 4)
1209 | #'
1210 | #' # create deSet object from data
1211 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
1212 | #' full.model = full_model)
1213 | #'
1214 | #' # get summary
1215 | #' summary(de_obj)
1216 | #'
1217 | #' # run odp and summarize
1218 | #' de_odp <- odp(de_obj, bs.its= 20)
1219 | #' de_odp
1220 | #' @author John Storey, Andrew Bass
1221 | #'
1222 | #' @return
1223 | #' Nothing of interest
1224 | #'
1225 | #' @export
1226 | setGeneric("show")
1227 | 


--------------------------------------------------------------------------------
/R/ExpressionSet-methods.R:
--------------------------------------------------------------------------------
 1 | setAs("ExpressionSet", "deSet", function(from, to) updateOldExpSet(from, "deSet"))
 2 | 
 3 | updateOldExpSet <- function(from, toClass, ...) {  # to deSet
 4 |   # new object
 5 |   object <- new(toClass,
 6 |                 assayData = from@assayData,
 7 |                 phenoData = from@phenoData,
 8 |                 featureData = annotatedDataFrameFrom(from@assayData,
 9 |                                                      byrow  = TRUE),
10 |                 experimentData = from@experimentData,
11 |                 annotation = from@annotation)
12 |   validObject(object)
13 |   object
14 | }
15 | #' @rdname deSet
16 | setMethod("deSet",
17 |           signature = signature(object = "ExpressionSet"),
18 |           function(object,
19 |                    full.model,
20 |                    null.model,
21 |                    individual = NULL) {
22 |             deObj <- as(object, "deSet")
23 |             # Input checks
24 |             if (!is.null(individual)) {
25 |               if (length(individual) != ncol(exprs(object))) {
26 |                 stop("ind must be the same length as the number of arrays")
27 |               }
28 |             }
29 |             if (missing(full.model) || missing(null.model)) {
30 |               stop("provide both full and null models")
31 |             }
32 |             createSet(deObj,
33 |                       nMod = null.model,
34 |                       fMod= full.model,
35 |                       ind = individual)
36 |           })
37 | 


--------------------------------------------------------------------------------
/R/deFit-methods.R:
--------------------------------------------------------------------------------
 1 | #' @rdname summary
 2 | setMethod("summary",
 3 |           signature=signature(object="deFit"),
 4 |           function(object) {
 5 |             cat('\n'); cat('deFit Summary', '\n', '\n')
 6 |             #  cat('Models:', '\n')
 7 |             #  print(object@fitted.models)
 8 |             cat('fit.full:', '\n')
 9 |             print(signif(object@fit.full[(1:min(3, nrow(object@fit.full))), ]), digits=3)
10 |             cat('\nfit.null:', '\n')
11 |             print(signif(object@fit.null[(1:min(3, nrow(object@fit.null))), ]), digits=3)
12 |             cat('\nres.full:', '\n')
13 |             print(signif(object@res.full[(1:min(3, nrow(object@res.full))), ]), digits=3)
14 |             cat('\nres.null:', '\n')
15 |             print(signif(object@res.null[(1:min(3, nrow(object@res.null))), ]), digits=3)
16 |             cat('\nbeta.coef:', '\n')
17 |             print(signif(object@beta.coef[(1:min(3, nrow(object@beta.coef))), ]), digits=3)
18 |             cat('\nstat.type:', '\n')
19 |             print(object@stat.type)
20 |           })
21 | #'@rdname show
22 | setMethod("show",
23 |           signature=signature(object="deFit"),
24 |           function(object) {
25 |             cat('\n'); cat('deFit Summary', '\n', '\n')
26 |           #  cat('Models:', '\n')
27 |           #  print(object@fitted.models)
28 |             cat('fit.full:', '\n')
29 |             print(signif(object@fit.full[(1:min(2, nrow(object@fit.full))), ]), digits=3)
30 |             cat('\nfit.null:', '\n')
31 |             print(signif(object@fit.null[(1:min(2, nrow(object@fit.null))), ]), digits=3)
32 |             cat('\nres.full:', '\n')
33 |             print(signif(object@res.full[(1:min(2, nrow(object@res.full))), ]), digits=3)
34 |             cat('\nres.null:', '\n')
35 |             print(signif(object@res.null[(1:min(2, nrow(object@res.null))), ]), digits=3)
36 |             cat('\nbeta.coef:', '\n')
37 |             print(signif(object@beta.coef[(1:min(5, nrow(object@beta.coef))), ]), digits=3)
38 |             cat('\nstat.type:', '\n')
39 |             print(object@stat.type)
40 |           })
41 | 


--------------------------------------------------------------------------------
/R/deSet-methods.R:
--------------------------------------------------------------------------------
  1 | #' @rdname fit_models
  2 | setMethod("fit_models",
  3 |           "deSet",
  4 |           function(object, stat.type = c("lrt", "odp"), weights = NULL) {
  5 |             # Initializations
  6 |             if (!is.null(weights)) return(fit_wmodels(object, stat.type = stat.type, w = weights))
  7 |             stat.var <- match.arg(stat.type, c("lrt", "odp"))
  8 |             exprsData <- exprs(object)
  9 |             n <- ncol(exprsData)
 10 |             null.matrix <- object@null.matrix
 11 |             full.matrix <- object@full.matrix
 12 |             # Rescale if there are group individual factors
 13 |             if (length(object@individual) != 0) {
 14 |               ind.matrix <- model.matrix(~-1 + as.factor(object@individual))
 15 |               Hi <- projMatrix(ind.matrix)
 16 |               fitInd <- t(Hi %*% t(exprsData))
 17 |               exprsData <- exprsData - fitInd
 18 |               full.matrix <- full.matrix - Hi %*% full.matrix
 19 |               null.matrix <- null.matrix - Hi %*% null.matrix
 20 |               full.matrix <- rm.zero.cols(full.matrix)
 21 |               null.matrix <- rm.zero.cols(null.matrix)
 22 |             }
 23 |             # Fitted exprsData and statistics under null model and full model
 24 |             H.null <- projMatrix(null.matrix)
 25 |             fitNull <- t(H.null %*% t(exprsData))
 26 |             resNull <- exprsData - fitNull
 27 |             if (stat.var == "odp") {
 28 |               full.matrix <- full.matrix - H.null %*% full.matrix
 29 |               full.matrix <- rm.zero.cols(full.matrix)
 30 |               H.full <- projMatrix(full.matrix)
 31 |               B.coef <- resNull %*% full.matrix %*% ginv(t(full.matrix) %*% full.matrix)
 32 |               dHFull <- diag(H.full)
 33 |               fitFull <- t(H.full %*% t(resNull))
 34 |               resFull <- resNull - fitFull
 35 |             } else {
 36 |               H.full <- projMatrix(full.matrix)
 37 |               dHFull <- diag(H.full)
 38 |               B.coef <- exprsData %*% full.matrix %*% ginv(t(full.matrix) %*% full.matrix)
 39 |               fitFull <- t(H.full %*% t(exprsData))
 40 |               resFull <- exprsData - fitFull
 41 |             }
 42 |             efObj <- new("deFit", fit.full = fitFull, fit.null = fitNull,
 43 |                          dH.full =  matrix(dHFull, nrow = nrow(resFull), ncol = length(dHFull), byrow = T), res.full = resFull,
 44 |                          res.null = resNull, beta.coef = B.coef,
 45 |                          stat.type = stat.var)
 46 |             return(efObj)
 47 |           })
 48 | 
 49 | #' @rdname odp
 50 | setMethod("odp",
 51 |           signature = signature(object = "deSet", de.fit = "missing"),
 52 |           function(object, de.fit, odp.parms = NULL, weights = NULL, bs.its = 100,
 53 |                    n.mods = 50, seed = NULL, verbose = TRUE, ...)  {
 54 |             de.fit <- fit_models(object,
 55 |                                  stat.type = "odp", weights = weights)
 56 |             results <- odp(object, de.fit,
 57 |                            odp.parms = odp.parms,
 58 |                            n.mods = n.mods,
 59 |                            bs.its = bs.its,
 60 |                            seed = seed,
 61 |                            verbose = verbose, ...)
 62 |             return(results)
 63 |           })
 64 | 
 65 | #' @rdname odp
 66 | setMethod("odp",
 67 |           signature = signature(object = "deSet", de.fit = "deFit"),
 68 |           function(object, de.fit, odp.parms = NULL, weights = NULL, bs.its = 100,
 69 |                    n.mods = 50, seed = NULL, verbose = TRUE, ...) {
 70 |             if (!is.null(seed)) {
 71 |               set.seed(seed)
 72 |             }
 73 |             if (is.null(odp.parms)) {
 74 |               odp.parms <- kl_clust(object, de.fit = de.fit, 
 75 |                                     n.mods = n.mods)
 76 |             } else if (sum(!(names(odp.parms) %in% c("mu.full", "sig.full",
 77 |                                                      "mu.null", "sig.null",
 78 |                                                      "n.per.mod",
 79 |                                                      "clustMembers"))) != 0) {
 80 |               stop("Not a correct ODP parameter list. See kl_clust documentation")
 81 |             }
 82 |             odp.stat <- odpStat(n.res = de.fit@res.null,
 83 |                                 clustParms = odp.parms)
 84 |             null.stat <- bootstrap(object = object,
 85 |                                    obs.fit = de.fit,
 86 |                                    clustParms = odp.parms,
 87 |                                    bs.its = bs.its,
 88 |                                    verbose = verbose)
 89 |             pval <- empPvals(stat = odp.stat,
 90 |                              stat0 = null.stat)
 91 |             qval <- qvalue(pval, ...)
 92 |             qval$stat0 <- null.stat
 93 |             qval$stat <- odp.stat
 94 |             qvalueObj(object) <- qval
 95 |             return(object)
 96 |           })
 97 | 
 98 | #' @rdname lrt
 99 | setMethod("lrt",
100 |           signature = signature(object = "deSet", de.fit = "missing"),
101 |           function(object, de.fit, nullDistn = c("normal", "bootstrap"), weights = NULL,
102 |                    bs.its = 100, seed = NULL, verbose = TRUE, mod.F = FALSE, ...) {
103 |             de.fit <- fit_models(object,
104 |                                  stat.type = "lrt", weights = weights)
105 |             results <- lrt(object,
106 |                            de.fit = de.fit,
107 |                            nullDistn = nullDistn,
108 |                            bs.its = bs.its,
109 |                            seed = seed,
110 |                            verbose = verbose,
111 |                            mod.F = mod.F, ...)
112 |             return(results)
113 |           })
114 | 
115 | #' @rdname lrt
116 | setMethod("lrt",
117 |           signature = signature(object = "deSet", de.fit = "deFit"),
118 |           function(object, de.fit, nullDistn = c("normal", "bootstrap"), weights = NULL,
119 |                    bs.its = 100, seed = NULL, verbose = TRUE, mod.F = FALSE, ...) {
120 |             # Initilizations
121 |             nFull <- ncol(object@full.matrix)
122 |             nNull <- ncol(object@null.matrix)
123 |             n <- ncol(object)
124 |             m <- nrow(object)
125 |             post.var <- out<- NULL
126 |             if (!is.null(seed)) {
127 |               set.seed(seed)
128 |             }
129 |             nullDistn <- match.arg(nullDistn, c("normal", "bootstrap"))
130 |             # lrt observed stat
131 |             if (mod.F) {
132 |               df_full <- n - nFull
133 |               var_full <- rowSums(de.fit@res.full ^ 2) / df_full
134 |               out <- squeezeVar(var_full, df_full, covariate = rowMeans(exprs(object)))
135 |               post.var <- out$var.post
136 |               prior.df <- out$df.prior
137 |               df2 = (n - nFull) + prior.df
138 |             }
139 |             stat <- lrtStat(resNull = de.fit@res.null,
140 |                             resFull = de.fit@res.full, 
141 |                             post.var = post.var)
142 |             # If nullDistn is normal then return p-values from F-test else
143 |             # return empirical p-values from qvalue package
144 |             if (nullDistn == "normal") {
145 |               if (mod.F) {
146 |                 df1 <- nFull - nNull  
147 |                 stat = stat / df1
148 |               } else {
149 |                 df1 = nFull - nNull
150 |                 df2 = n - nFull
151 |                 stat = stat * df2 / df1
152 |               }
153 |               pval <- pf(q = stat, df1 = df1, df2 = df2, lower.tail = FALSE)
154 |               qval <- qvalue(pval, ...)
155 |               qval$stat <- stat
156 |               qval$df2 <- df2
157 |               qval$df1 <- df1
158 |               qvalueObj(object) <- qval
159 |               return(object)
160 |             } else {
161 |               null.stat <- bootstrap(object = object,
162 |                                      obs.fit = de.fit,
163 |                                      bs.its = bs.its,
164 |                                      verbose = verbose,
165 |                                      mod.F = mod.F,
166 |                                      post.var = out)
167 |               pval <- empPvals(stat = stat,
168 |                                stat0 = null.stat)
169 |               qval <- qvalue(pval, ...)
170 |               qval$stat0 <- null.stat
171 |               qval$stat <- stat
172 |               qvalueObj(object) <- qval
173 |               return(object)
174 |             }
175 |           })
176 | 
177 | #' @rdname kl_clust
178 | setMethod("kl_clust",
179 |           signature = signature(object = "deSet", de.fit = "missing"),
180 |           function(object, de.fit,  n.mods = 50)  {
181 |             de.fit <- fit_models(object, stat.type = "odp")
182 |             results <- kl_clust(object, de.fit,
183 |                                n.mods = n.mods)
184 |             return(results)
185 |           })
186 | 
187 | #' @rdname kl_clust
188 | setMethod("kl_clust",
189 |           signature = signature(object = "deSet", de.fit = "deFit"),
190 |           function(object, de.fit, n.mods = 50) {
191 |             nf <- mod.df(object@full.matrix)
192 |             nn <- mod.df(object@null.matrix)
193 |             mod.member <- klmod(de.fit, nf = nf,
194 |                                 n.mods = n.mods)
195 |             return(mod.parms(de.fit, nf = nf, nn = nn,
196 |                              clMembers = mod.member))
197 |           })
198 | 
199 | #' @rdname summary
200 | setMethod("summary",
201 |           signature = signature(object="deSet"),
202 |           function(object, ...) {
203 |             cat('\n')
204 |             cat('ExpressionSet Summary', '\n', '\n')
205 |             tmp <- as(object, "ExpressionSet")
206 |             print(tmp)
207 |             cat('\n')
208 |             cat('de Analysis Summary', '\n', '\n')
209 |             cat('Total number of arrays:', ncol(exprs(object)), '\n')
210 |             cat('Total number of probes:', nrow(exprs(object)), '\n', '\n')
211 |             cat('Biological variables:', '\n')
212 |             cat('\tNull Model:')
213 |             print(nullModel(object))
214 |             cat('\n\tFull Model:')
215 |             print(fullModel(object))
216 |             cat('\n')
217 |             if (length(object@individual) != 0) {
218 |               cat('Individuals:', '\n')
219 |               ind <- as.numeric(object@individual)
220 |               print(individual(object))
221 |               cat('\n')
222 |             }
223 |             cat('.......', '\n', '\n')
224 |             if (!is.null(object@qvalueObj$pvalues)) {
225 |               cuts <- c(0.0001, 0.001, 0.01, 0.025, 0.05, 0.10, 1)
226 |               digits <- getOption("digits")
227 |               cat("\nStatistical significance summary:\n")
228 |               cat("pi0:", format(object@qvalueObj$pi0, digits = digits),
229 |                   "\n", sep = "\t")
230 |               cat("\n")
231 |               cat("Cumulative number of significant calls:\n")
232 |               cat("\n")
233 |               counts <- sapply(cuts, function(x) c("p-value" = sum(object@qvalueObj$pvalues < x),
234 |                                                    "q-value" = sum(object@qvalueObj$qvalues < x),
235 |                                                    "local fdr" = sum(object@qvalueObj$lfdr < x)))
236 |               colnames(counts) <- paste("<", cuts, sep = "")
237 |               print(counts)
238 |               cat("\n")
239 |             }
240 |           })
241 | #' @rdname show
242 | setMethod("show",
243 |           signature = signature(object="deSet"),
244 |           function(object) {
245 |             cat('\n')
246 |             cat('ExpressionSet Summary', '\n', '\n')
247 |             tmp <- as(object, "ExpressionSet")
248 |             print(tmp)
249 |             cat('\n')
250 |             cat('de Analysis Summary', '\n', '\n')
251 |             cat('Total number of arrays:', ncol(exprs(object)), '\n')
252 |             cat('Total number of probes:', nrow(exprs(object)), '\n', '\n')
253 |             cat('Biological variables:', '\n')
254 |             cat('\tNull Model: ')
255 |             print(nullModel(object))
256 |             cat('\tFull Model: ')
257 |             print(fullModel(object))
258 |             cat('\n')
259 |             if (length(object@individual) != 0) {
260 |               cat('Individuals:', '\n')
261 |               ind <- as.numeric(object@individual)
262 |               print(matrix(apply(((1:length(ind)) * t((ind))), 2, sum),
263 |                            nrow=1))
264 |               cat('\n')
265 |             }
266 |             cat('Expression data:', '\n')
267 |             print(signif(exprs(object)[(1:min(5, nrow(exprs(object)))), ]),
268 |                   digits = 3)
269 |             cat('.......','\n','\n')
270 |             if (!is.null(object@qvalueObj$pvalues)) {
271 |               cuts <- c(0.0001, 0.001, 0.01, 0.025, 0.05, 0.10, 1)
272 |               digits <- getOption("digits")
273 |               cat("\nStatistical significance summary:\n")
274 |               cat("pi0:", format(object@qvalueObj$pi0, digits = digits), "\n",
275 |                   sep = "\t")
276 |               cat("\n")
277 |               cat("Cumulative number of significant calls:\n")
278 |               cat("\n")
279 |               counts <- sapply(cuts, function(x) c("p-value" = sum(object@qvalueObj$pvalues < x),
280 |                                                    "q-value" = sum(object@qvalueObj$qvalues < x),
281 |                                                    "local fdr" = sum(object@qvalueObj$lfdr < x)))
282 |               colnames(counts) <- paste("<", cuts, sep="")
283 |               print(counts)
284 |               cat("\n")
285 |             }
286 |           })
287 | 
288 | #' @rdname apply_qvalue
289 | setMethod("apply_qvalue",
290 |           signature = signature(object="deSet"),
291 |           function(object, ...) {
292 |             if (length(object@qvalueObj) == 0) {
293 |               stop("qvalueObj is empty- need to run either odp or lrt")
294 |             }
295 |             qvalueObj(object) <- qvalue(object@qvalueObj$pvalues, ...)
296 |             validObject(object)
297 |             object
298 |           })
299 | 
300 | #' @rdname apply_sva
301 | setMethod("apply_sva",
302 |           signature = signature(object="deSet"),
303 |           function(object, ...) {
304 |             full.matrix <- object@full.matrix
305 |             null.matrix <- object@null.matrix
306 |             sv.sva <- sva(exprs(object),
307 |                           mod0 = null.matrix,
308 |                           mod = full.matrix, ...)$sv
309 |             colnames(sv.sva) <- paste("SV", 1:ncol(sv.sva), sep="")
310 |             pData(object) <- cbind(pData(object), sv.sva)
311 |             fullModel(object) <- as.formula(paste("~",
312 |                                                   paste(c(colnames(sv.sva),
313 |                                                            attr(terms(fullModel(object)),
314 |                                                                 "term.labels")),
315 |                                                          collapse=" + "),
316 |                                                   sep=""))
317 |             nullModel(object) <-  as.formula(paste("~",paste(c(colnames(sv.sva),
318 |                                                                 attr(terms(nullModel(object)),
319 |                                                                      "term.labels")),
320 |                                                              collapse=" + "),
321 |                                                    sep=""))
322 |             validObject(object)
323 |             object
324 |           })
325 | 
326 | #' @rdname apply_snm
327 | setMethod("apply_snm",
328 |           signature = signature(object="deSet"),
329 |           function(object, int.var=NULL, ...) {
330 |             full.matrix <- object@full.matrix
331 |             null.matrix <- object@null.matrix
332 |             full.matrix <- full.matrix - projMatrix(null.matrix) %*% full.matrix
333 |             full.matrix <- as.matrix(rm.zero.cols(full.matrix))
334 |             if(is.null(int.var)) {
335 |               int.var <- 1:ncol(exprs(object))
336 |               warning("Setting int.var=1:n where n is number of samples.")
337 |             }
338 |             exprs(object) <- snm(exprs(object),
339 |                                        bio.var = full.matrix,
340 |                                        adj.var = null.matrix,
341 |                                        int.var = int.var, ...)$norm.dat
342 |             validObject(object)
343 |             object
344 |           })
345 | 
346 | 
347 | #' @rdname apply_jackstraw
348 | setMethod("apply_jackstraw",
349 |           signature = signature(object="deSet"),
350 |           function(object, r1 = NULL, r = NULL, s = NULL, B = NULL,
351 |                    covariate = NULL, verbose = TRUE, seed = NULL) {
352 |             dat <- exprs(object)
353 |             js <- jackstraw::jackstraw_pca(dat, r1 = r1, r = r, s = s, B = B,
354 |                       covariate = covariate, verbose = verbose, seed = seed)
355 |             return(js)
356 |           })
357 | 


--------------------------------------------------------------------------------
/R/edge.R:
--------------------------------------------------------------------------------
  1 | #' @title
  2 | #' Extraction of Differential Gene Expression
  3 | #'
  4 | #' @description
  5 | #' The edge package implements methods for carrying out differential
  6 | #' expression analyses of genome-wide gene expression studies. Significance
  7 | #' testing using the optimal discovery procedure and generalized likelihood
  8 | #' ratio tests (equivalent to F-tests and t-tests) are implemented for general study
  9 | #' designs. Special functions are available to facilitate the analysis of
 10 | #' common study designs, including time course experiments. Other packages
 11 | #' such as snm, sva, and qvalue are integrated in edge to provide a wide range
 12 | #' of tools for gene expression analysis.
 13 | #'
 14 | #' @examples
 15 | #' \dontrun{
 16 | #' browseVignettes("edge")
 17 | #' }
 18 | #' @name edge
 19 | #' @author John Storey, Jeffrey Leek, Andrew Bass
 20 | #' @docType package
 21 | #' @import Biobase methods splines sva snm qvalue MASS
 22 | #' @useDynLib edge odpScoreCluster kldistance
 23 | NULL
 24 | 
 25 | #' @title Gene expression dataset from Calvano et al. (2005) Nature
 26 | #'
 27 | #' @description
 28 | #' The data provide gene expression measurements in an endotoxin study where
 29 | #' four subjects were given endotoxin and four subjects were given a placebo.
 30 | #' Blood samples were collected and leukocytes were isolated from the samples
 31 | #' before infusion and at times 2, 4, 6, 9, 24 hours.
 32 | #'
 33 | #' @usage data(endotoxin)
 34 | #' @format
 35 | #' \itemize{
 36 | #'   \item endoexpr: A 500 rows by 46 columns data frame containing expression
 37 | #'   values.
 38 | #'   \item class: A vector of length 46 containing information about which
 39 | #'   individuals were given endotoxin.
 40 | #'   \item ind: A vector of length 46 providing indexing measurements for each
 41 | #'   individual in the experiment.
 42 | #'   \item time: A vector of length 46 indicating time measurements.
 43 | #' }
 44 | #'
 45 | #' @note
 46 | #' The data is a random subset of 500 genes from the full dataset. To
 47 | #' download the full data set, go to \url{http://genomine.org/edge/}.
 48 | #'
 49 | #' @references
 50 | #' Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance
 51 | #' analysis of time course microarray experiments. PNAS, 102: 12837-12842. \cr
 52 | #' \url{http://www.pnas.org/content/100/16/9440.full}
 53 | #'
 54 | #' @examples
 55 | #' library(splines)
 56 | #' # import data
 57 | #' data(endotoxin)
 58 | #' ind <- endotoxin$ind
 59 | #' class <- endotoxin$class
 60 | #' time <- endotoxin$time
 61 | #' endoexpr <- endotoxin$endoexpr
 62 | #' cov <- data.frame(individual = ind, time = time, class = class)
 63 | #'
 64 | #' # formulate null and full models in experiement
 65 | #' # note: interaction term is a way of taking into account group effects
 66 | #' mNull <- ~ns(time, df=4, intercept = FALSE) + class
 67 | #' mFull <- ~ns(time, df=4, intercept = FALSE) +
 68 | #'           ns(time, df=4, intercept = FALSE):class + class
 69 | #'
 70 | #' # create deSet object
 71 | #' de_obj <- build_models(endoexpr, cov = cov, full.model = mFull,
 72 | #'                        null.model = mNull, ind = ind)
 73 | #'
 74 | #' # Perform ODP/lrt statistic to determine significant genes in study
 75 | #' de_odp <- odp(de_obj, bs.its = 10)
 76 | #' de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 10)
 77 | #'
 78 | #' # summarize significance results
 79 | #' summary(de_odp)
 80 | #' @name endotoxin
 81 | #' @return endotoxin dataset
 82 | #' @docType data
 83 | #' @keywords datasets
 84 | NULL
 85 | 
 86 | #' @title Gene expression dataset from Rodwell et al. (2004)
 87 | #'
 88 | #' @usage
 89 | #' data(kidney)
 90 | #'
 91 | #' @description
 92 | #' Gene expression measurements from kidney samples were obtained from 72
 93 | #' human subjects ranging in age from 27 to 92 years. Only one array was
 94 | #' obtained per individual, and the age and sex of each individual were
 95 | #' recorded.
 96 | #'
 97 | #' @format
 98 | #' \itemize{
 99 | #'   \item kidcov: A 133 rows by 6 columns data frame detailing the study
100 | #'   design.
101 | #'   \item kidexpr: A 500 rows by 133 columns matrix of gene expression values,
102 | #'   where each row corresponds to a different probe-set and each column to a
103 | #'   different tissue sample.
104 | #'   \item age: A vector of length 133 giving the age of each sample.
105 | #'   \item sex: A vector of length 133 giving the sex of each sample.
106 | #' }
107 | #' @note
108 | #' These data are a random subset of 500 probe-sets from the total number of 
109 | #' probe-sets in the original data set. To download the full data set, go to 
110 | #' \url{http://genomine.org/edge/}. The \code{age} and \code{sex} are contained
111 | #' in \code{kidcov} data frame.
112 | #'
113 | #' @references
114 | #' Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance
115 | #' analysis of time course microarray experiments. PNAS, 102: 12837-12842. \cr
116 | #' \url{http://www.pnas.org/content/100/16/9440.full}
117 | #'
118 | #' @examples
119 | #' # import data
120 | #' data(kidney)
121 | #' sex <- kidney$sex
122 | #' age <- kidney$age
123 | #' kidexpr <- kidney$kidexpr
124 | #'
125 | #' # create model
126 | #' de_obj <- build_study(data = kidexpr, adj.var = sex, tme = age,
127 | #' sampling = "timecourse", basis.df = 4)
128 | #'
129 | #' # use the ODP/lrt method to determine significant genes
130 | #' de_odp <- odp(de_obj, bs.its=10)
131 | #' de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 10)
132 | #'
133 | #' # summarize significance results
134 | #' summary(de_odp)
135 | #' @name kidney
136 | #' @return kidney dataset
137 | #' @docType data
138 | #' @keywords datasets
139 | NULL
140 | 
141 | #' @title Gene expression dataset from Idaghdour et al. (2008)
142 | #'
143 | #' @usage
144 | #' data(gibson)
145 | #'
146 | #' @description
147 | #' The data provide gene expression measurements in peripheral blood leukocyte
148 | #' samples from three Moroccan groups leading distinct ways of life:
149 | #' desert nomadic (DESERT), mountain agrarian (VILLAGE), and coastal urban
150 | #' (AGADIR).
151 | #'
152 | #' @format
153 | #' \itemize{
154 | #'   \item batch: Batches in experiment.
155 | #'   \item location: Environment/lifestyle of Moroccan Amazigh groups.
156 | #'   \item gender: Sex of individuals.
157 | #'   \item gibexpr: A 500 rows by 46 columns matrix of gene expression values.
158 | #' }
159 | #'
160 | #' @note
161 | #' These data are a random subset of 500 genes from the total number of genes
162 | #' in the original data set. To download the full data set, go to
163 | #' \url{http://genomine.org/de/}.
164 | #'
165 | #' @references
166 | #' Idaghdour Y, Storey JD, Jadallah S, and Gibson G. (2008) A genome-wide gene
167 | #' expression signature of lifestyle in peripheral blood of Moroccan Amazighs.
168 | #' PLoS Genetics, 4: e1000052.
169 | #'
170 | #' @examples
171 | #' # import
172 | #' data(gibson)
173 | #' batch <- gibson$batch
174 | #' gender <- gibson$gender
175 | #' location <- gibson$location
176 | #' gibexpr <- gibson$gibexpr
177 | #' cov <- data.frame(Batch = batch, Gender = gender,
178 | #' Location = location)
179 | #'
180 | #' # create deSet for experiment- static experiment
181 | #' mNull <- ~Gender + Batch
182 | #' mFull <- ~Gender + Batch + Location
183 | #'
184 | #' # create deSet object
185 | #' de_obj <- build_models(gibexpr, cov = cov, full.model = mFull,
186 | #' null.model = mNull)
187 | #'
188 | #' # Perform ODP/lrt statistic to determine significant genes in study
189 | #' de_odp <- odp(de_obj, bs.its = 10)
190 | #' de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 10)
191 | #'
192 | #' # summarize significance results
193 | #' summary(de_odp)
194 | #' @name gibson
195 | #' @return gibson dataset
196 | #' @docType data
197 | #' @keywords datasets
198 | NULL
199 | 


--------------------------------------------------------------------------------
/R/form_models.R:
--------------------------------------------------------------------------------
  1 | #' Formulates the experimental models
  2 | #'
  3 | #' \code{build_study} generates the full and null models for users unfamiliar
  4 | #' with building models in R. There are two types of experimental designs:
  5 | #' static and time-course. For more details, refer to the vignette.
  6 | #'
  7 | #' @param data \code{matrix}: gene expression data (rows are genes, columns are
  8 | #'   samples).
  9 | #' @param sampling \code{string}: type of study. Either "static" or 
 10 | #'   "timecourse". Default is "static".
 11 | #' @param grp \code{vector}: group assignement in the study (for K-class 
 12 | #'   studies). Optional.
 13 | #' @param tme \code{vector}: time variable in a time course study. Optional.
 14 | #' @param ind \code{factor}: individual factor for repeated observations of the
 15 | #'   same individuals. Optional.
 16 | #' @param bio.var \code{matrix}: biological variables. Optional.
 17 | #' @param basis.df \code{numeric}: degrees of freedom of the basis for time 
 18 | #'   course study. Default is 2.
 19 | #' @param basis.type \code{string}: either "ncs" (natural cubic spline) or "ps"
 20 | #'   (polynomial spline) basis for time course study. Default is "ncs".
 21 | #' @param adj.var \code{matrix}: adjustment variables. Optional.
 22 | #'
 23 | #' @return \code{\linkS4class{deSet}} object
 24 | #'
 25 | #' @examples
 26 | #' # create ExpressionSet object from kidney dataset
 27 | #' library(splines)
 28 | #' data(kidney)
 29 | #' age <- kidney$age
 30 | #' sex <- kidney$sex
 31 | #' kidexpr <- kidney$kidexpr
 32 | #'
 33 | #' # create deSet object from data
 34 | #' de_obj <- build_study(data = kidexpr, adj.var = sex, tme = age,
 35 | #' sampling = "timecourse", basis.df = 4)
 36 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{build_models}}
 37 | #' @author John Storey, Andy Bass
 38 | #' @export
 39 | build_study = function(data, grp = NULL, adj.var = NULL, bio.var = NULL,
 40 |                      tme = NULL, ind = NULL,
 41 |                      sampling = c("static", "timecourse"), basis.df = 2,
 42 |                      basis.type = c("ncs", "poly")) {
 43 |   n <- ncol(data)
 44 |   m <- nrow(data)
 45 |   if (!is.matrix(data)) {
 46 |     stop("data must be a matrix")
 47 |   }
 48 |   if (!is.null(tme)) {
 49 |     if (is.matrix(tme) | is.vector(tme)) {
 50 |       tme <- data.frame(tme)
 51 |     } else {
 52 |       stop("tme must be a matrix")
 53 |     }
 54 |    # intercept <- !apply(tme, 2, var)
 55 |    # tme <- subset(tme, select=!intercept)
 56 |   }
 57 |   if (!is.null(adj.var)) {
 58 |     if (is.matrix(adj.var) | is.vector(adj.var) | is.factor(adj.var)) {
 59 |       adj.var <- data.frame(adj.var)
 60 |     } else {
 61 |       stop("adj.var must be a matrix")
 62 |     }
 63 |     #intercept <- !apply(adj.var, 2, var)
 64 |    # adj.var <- subset(adj.var, select=!intercept)
 65 |   }
 66 |   if (!is.null(bio.var)) {
 67 | #    sampling <- "notApplicable"
 68 |     if (is.matrix(bio.var)| is.vector(bio.var) | is.factor(bio.var)) {
 69 |       bio.var <- data.frame(bio.var)
 70 |     } else {
 71 |       stop("bio.var must be a matrix")
 72 |     }
 73 |     #intercept <- !apply(bio.var, 2, var)
 74 |    # bio.var <- subset(bio.var, select=!intercept)
 75 |     # Create models
 76 |     if (is.null(adj.var)) {
 77 |       pdat <- data.frame(bio.var)
 78 |       fmod <- paste("~", paste(names(pdat), collapse=" + "))
 79 |       nmod <- "~1"
 80 |     } else {
 81 |       pdat <- data.frame(adj.var, bio.var)
 82 |       fmod <- paste("~", paste(names(pdat), collapse=" + "))
 83 |       nmod <- paste("~", paste(names(adj.var), collapse=" + "))
 84 |     }
 85 |   } else {
 86 |     sampling <- match.arg(sampling, choices=c("static", "timecourse"))
 87 |     if (!is.null(grp)) {
 88 |       if (is.factor(grp)) {
 89 |         grp <- data.frame(grp = as.factor(grp))
 90 |       } else {
 91 |         stop("grp must be a factor")
 92 |       }
 93 |      # intercept <- !apply(grp, 2, var)
 94 |       #grp <- subset(grp, select=!intercept)
 95 |     } else {
 96 |       if(sampling == "static") {
 97 |         stop("grp variable cannot be missing for static sampling.")
 98 |       }
 99 |       grp <- data.frame(grp=rep(1,n))
100 |     }
101 |     g <- nrow(unique(grp))
102 |     if (sampling == "static") {
103 |       if (g==1) {
104 |         stop("grp must have more than one unique value for static sampling.")
105 |       }
106 |       if (is.null(adj.var)) {
107 |         pdat <- data.frame(grp)
108 |         nmod <- "~1"
109 |         fmod <- paste("~", paste(names(pdat), collapse=" + "))
110 |       } else {
111 |         pdat <- data.frame(adj.var, grp)
112 |         fmod <- paste("~", paste(names(pdat), collapse=" + "))
113 |         nmod <- paste("~", paste(names(adj.var), collapse=" + "))
114 |       }
115 |     }
116 | 
117 |     if (sampling == "timecourse") {
118 |       basis.type <- match.arg(basis.type)
119 |       varName <- colnames(data.frame(tme))
120 |       if (length(varName) != 1) stop("Only one time variable is allowed. See ?deSet for information on how to create complicated models")
121 |       if (basis.type == "ncs") {
122 |         time.basis <- paste("ns(", varName,", df=", basis.df,", intercept=FALSE)", sep="")
123 |       } else if (basis.type == "poly") {
124 |         time.basis <- paste("bs(", varName,", df=", basis.df,", intercept=FALSE)", sep="")
125 |       }
126 |       if (g == 1) {
127 |         # time course with no groups
128 |         if (is.null(adj.var)) {
129 |           pdat <- data.frame(tme)
130 |           nmod <- "~1"
131 |           fmod <- paste("~", time.basis)
132 |         } else {
133 |           pdat <- data.frame(adj.var, tme)
134 |           fmod <- paste("~", paste(names(adj.var), collapse=" + "), "+", time.basis)
135 |           nmod <- paste("~", paste(names(adj.var), collapse=" + "))
136 |         }
137 |       } else {
138 |         if (is.null(adj.var)) {
139 |           pdat <- data.frame(tme, grp)
140 |         } else {
141 |           pdat <- data.frame(tme, adj.var, grp)
142 |         }
143 |         # time course with groups
144 |         nmod <- paste(paste("~", paste(names(pdat)[-1], collapse=" + ")), "+", time.basis)
145 |         fmod <- paste(paste("~", paste(names(pdat)[-1], collapse=" + ")),"+",time.basis,"+", paste( "(", paste(names(pdat)[ncol(pdat)], collapse=" + ", sep=""), ")", ":", time.basis))  }
146 |     }
147 |   }
148 |   rownames(pdat) <- colnames(data)
149 |   exp_set <- ExpressionSet(as.matrix(data), AnnotatedDataFrame(pdat))
150 |   edgeObj <- deSet(exp_set, full.model=as.formula(fmod),
151 |                      null.model=as.formula(nmod), individual=ind)
152 |   return(edgeObj)
153 | }
154 | 
155 | #' Generate a deSet object with full and null models
156 | #'
157 | #' \code{build_models} creates a \code{\link{deSet}} object. The user inputs
158 | #' the full and null models.
159 | #'
160 | #' @param data \code{matrix}: gene expression data.
161 | #' @param cov \code{data.frame}: the covariates in the study.
162 | #' @param full.model \code{formula}: the adjustment and the biological
163 | #' variables of interest.
164 | #' @param null.model \code{formula}: the adjustment variables.
165 | #' @param ind \code{factor}: individuals sampled in the study. Default is
166 | #' NULL. Optional.
167 | #'
168 | #' @return \code{\linkS4class{deSet}} object
169 | #'
170 | #' @examples
171 | #' # create ExpressionSet object from kidney dataset
172 | #' library(splines)
173 | #' data(kidney)
174 | #' age <- kidney$age
175 | #' sex <- kidney$sex
176 | #' kidexpr <- kidney$kidexpr
177 | #' cov <- data.frame(sex = sex, age = age)
178 | #'
179 | #' # create models
180 | #' null.model <- ~sex
181 | #' full.model <- ~sex + ns(age, df=4)
182 | #'
183 | #' # create deSet object from data
184 | #' de_obj <- build_models(data = kidexpr, cov = cov, null.model = null.model,
185 | #' full.model = full.model)
186 | #' @seealso \code{\linkS4class{deSet}}, \code{\link{build_study}}
187 | #' @author John Storey, Andy Bass
188 | #' @export
189 | build_models <- function(data, cov, full.model = NULL, null.model = NULL,
190 |                       ind = NULL) {
191 |   n <- ncol(data)
192 |   m <- nrow(data)
193 |   if (!is.matrix(data)) {
194 |     stop("data must be a matrix")
195 |   } else if (!is.data.frame(cov)) {
196 |     stop("cov must be a data frame")
197 |   } else if (is.null(full.model)) {
198 |     stop("need an alternative model")
199 |   }
200 |   if (is.null(null.model)) {
201 |     null.model <- ~1
202 |   }
203 |   if (!is(full.model, "formula") | !is(null.model, "formula")) {
204 |     stop("alternative and null models must be formatted as a formula")
205 |   }
206 | 
207 |   exp_set <- ExpressionSet(data, AnnotatedDataFrame(cov))
208 |   edgeObj <- deSet(exp_set, full.model = full.model, null.model = null.model,
209 |                    individual = ind)
210 |   return(edgeObj)
211 | }
212 | 


--------------------------------------------------------------------------------
/R/getMethods.R:
--------------------------------------------------------------------------------
 1 | #' @rdname sType
 2 | setMethod("sType",
 3 |           signature = signature(object = "deFit"),
 4 |           function(object) {
 5 |             slot(object, "stat.type")
 6 |           })
 7 | 
 8 | #' @rdname betaCoef
 9 | setMethod("betaCoef",
10 |           signature = signature(object = "deFit"),
11 |           function(object) {
12 |             slot(object, "beta.coef")
13 |           })
14 | #' @rdname resFull
15 | setMethod("resFull",
16 |           signature = signature(object = "deFit"),
17 |           function(object) {
18 |             slot(object, "res.full")
19 |           })
20 | #' @rdname resNull
21 | setMethod("resNull",
22 |           signature = signature(object = "deFit"),
23 |           function(object) {
24 |             slot(object, "res.null")
25 |           })
26 | #' @rdname fitFull
27 | setMethod("fitFull",
28 |           signature = signature(object = "deFit"),
29 |           function(object) {
30 |             slot(object, "fit.full")
31 |           })
32 | #' @rdname fitNull
33 | setMethod("fitNull",
34 |           signature = signature(object = "deFit"),
35 |           function(object) {
36 |             slot(object, "fit.null")
37 |           })
38 | #' @rdname fullModel
39 | setMethod("fullModel",
40 |           signature = signature(object = "deSet"),
41 |           function(object) {
42 |             slot(object, "full.model")
43 |           })
44 | #' @rdname nullModel
45 | setMethod("nullModel",
46 |           signature = signature(object = "deSet"),
47 |           function(object) {
48 |             slot(object, "null.model")
49 |           })
50 | #' @rdname fullMatrix
51 | setMethod("fullMatrix",
52 |           signature = signature(object = "deSet"),
53 |           function(object) {
54 |             slot(object, "full.matrix")
55 |           })
56 | #' @rdname nullMatrix
57 | setMethod("nullMatrix",
58 |           signature = signature(object = "deSet"),
59 |           function(object) {
60 |             slot(object, "null.matrix")
61 |           })
62 | #' @rdname individual
63 | setMethod("individual",
64 |           signature = signature(object = "deSet"),
65 |           function(object) {
66 |             slot(object, "individual")
67 |           })
68 | #' @rdname qvalueObj
69 | setMethod("qvalueObj",
70 |           signature = signature(object = "deSet"),
71 |           function(object) {
72 |             slot(object, "qvalueObj")
73 |           })
74 | 


--------------------------------------------------------------------------------
/R/kl_clust-functions.R:
--------------------------------------------------------------------------------
  1 | klmod <- function(de.fit, nf, n.mods = 50) {
  2 |   m <- nrow(de.fit@fit.full)
  3 |   n <- ncol(de.fit@fit.full)
  4 |   if (m <= n.mods) {
  5 |     mod.member <- as.factor(1:m)
  6 |     return(mod.member)
  7 |   }
  8 |   sigma2 <- rowSums(de.fit@res.full ^ 2) / (n - nf)
  9 |   int.n.mods <- n.mods
 10 |   orig.n.mods <- n.mods
 11 |   int.center <- sample(x = m,
 12 |                        size = n.mods, replace = FALSE)
 13 |   center.fitFull <- de.fit@fit.full[int.center, ]
 14 |   center.var <- sigma2[int.center]
 15 | 
 16 |   eps <- 0.1
 17 |   mod.member <- NULL
 18 |   KL <- matrix(nrow = m,
 19 |                ncol = n.mods)
 20 |   itr <- 0
 21 |   KL.cutoff <- 1
 22 | 
 23 |   pos.center.fitFull <- center.fitFull
 24 |   pos.center.var <- center.var
 25 |   while (KL.cutoff > eps) {
 26 |     itr <- itr + 1
 27 |     pre.center.fitFull <- pos.center.fitFull
 28 |     pre.center.var <- pos.center.var
 29 | 
 30 |     temp.center.fitFull <- as.vector(t(center.fitFull))
 31 |     temp.fitFull <- as.vector(t(de.fit@fit.full))
 32 | 
 33 |     kldd <- t(matrix(kl(temp.center.fitFull, temp.fitFull, center.var,
 34 |                         sigma2, n=n), ncol=m))
 35 |     mod.member = apply(kldd, 1, function(x) which.min(x))
 36 | 
 37 |     # First of all, we check whether there is any cluster that does not
 38 |     # include any gene. For this case, we exclude this cluster from the
 39 |     # original clusters. Therefore, it reduces the number of clusters
 40 |     notempty <- 1:n.mods %in% unique(mod.member)
 41 |     #    notempty <- sort(unique(mod.member))
 42 |     # all.equal(notempty, notempty2)
 43 |     center.fitFull <- center.fitFull[notempty, ]
 44 |     center.var <- center.var[notempty]
 45 |     KL <- KL[notempty, ]
 46 | 
 47 |     # Once the number of clusters were decided, we need to find new centers
 48 |     # for each cluster
 49 |     if (any(!notempty)) {
 50 |       n.mods <- sum(!notempty)
 51 |     }
 52 | 
 53 |     # Average the mean and variance over genes included in each cluster
 54 |     l <- 1
 55 |     for (i in 1:orig.n.mods) {
 56 |       ntmp <- sum(mod.member == i)
 57 |       if (ntmp == 0) {
 58 |         next
 59 |       } else {
 60 |         if (ntmp == 1) {
 61 |           center.fitFull[l, ] <- de.fit@fit.full[mod.member == i, ]
 62 |         } else {
 63 |           center.fitFull[l, ] <- colMeans(de.fit@fit.full[mod.member == i, ])
 64 |         }
 65 |         center.var[l] <- drop(sum(sigma2[mod.member == i]) / ntmp)
 66 |         l <- l + 1
 67 |       }
 68 |     }
 69 | 
 70 |     pos.center.fitFull <- center.fitFull
 71 |     pos.center.var <- center.var
 72 |     if (length(pos.center.var) != length(pre.center.var)) {
 73 |       # if the n.mods is reduced
 74 |       KL.cutoff <- 1
 75 |     } else {
 76 |       KL.cutoff <- NULL
 77 |       res2 <- rowSums((pos.center.fitFull - pre.center.fitFull) ^ 2)
 78 |       normconst <- 1 / pos.center.var + 1 / pre.center.var
 79 |       centerconst <- n * ((pos.center.var / pre.center.var + pre.center.var / pos.center.var) / 2 - 1)
 80 |       KL.cutoff <- res2 / normconst + centerconst
 81 |     }
 82 |     KL.cutoff <- max(KL.cutoff)
 83 |   }
 84 |   return(as.factor(mod.member))
 85 | }
 86 | 
 87 | mod.parms <- function(de.fit, nf, nn, clMembers) {
 88 |   # Initlizations
 89 |   n <- ncol(de.fit@res.full)
 90 |   varFull <- rowSums(de.fit@res.full ^ 2) / (n - nf)
 91 |   varNull <- rowSums(de.fit@res.null ^ 2) / (n - nn)
 92 |   mod.membership <- clMembers
 93 |   n.mods <- length(unique(mod.membership))
 94 | 
 95 |   mod.fitFull <- matrix(nrow = n.mods,
 96 |                         ncol = n)
 97 |   n.per.mod <- vector(length = n.mods)
 98 |   mod.varNull <- vector(length = n.mods)
 99 |   mod.varFull <- vector(length = n.mods)
100 |   # Calculate statistics (variance and mean) for each cluster
101 |   for (i in 1:n.mods) {
102 |     if(length(mod.membership[mod.membership == i]) == 1) {
103 |       n.per.mod[i] <- 1
104 |       mod.fitFull[i, ] <- de.fit@fit.full[mod.membership==i, ]
105 |     } else {
106 |       n.per.mod[i] <- sum(mod.membership == i)
107 |       mod.fitFull[i, ] <- colMeans(de.fit@fit.full[mod.membership == i, ])
108 |     }
109 |     mod.varNull[i] <- mean(varNull[mod.membership == i])
110 |     mod.varFull[i] <- mean(varFull[mod.membership == i])
111 |   }
112 |   mod.fitNull <- 0*mod.fitFull
113 |   # Assign slots
114 |   return(list(mu.full = mod.fitFull, sig.full = sqrt(mod.varFull),
115 |               mu.null = mod.fitNull, sig.null = sqrt(mod.varNull),
116 |               n.per.mod = n.per.mod, clustMembers = clMembers))
117 | }
118 | 
119 | kl <- function(temp.center.fitFull, temp.fitFull, center.var, sigma2, n) {
120 |   # Initializations
121 |   m <- length(sigma2)
122 |   n.cluster <- length(center.var)
123 |   # C function to calculate kl distance
124 |   kldd <- .C("kldistance",
125 |              centerFit=as.double(temp.center.fitFull),
126 |              centerVar=as.double(center.var),
127 |              fit=as.double(temp.fitFull),
128 |              var=as.double(sigma2),
129 |              m=as.integer(m),
130 |              nc=as.integer(n.cluster),
131 |              n=as.integer(n),
132 |              kldd=double(m * n.cluster))$kldd
133 |   return(kldd)
134 | }
135 | 
136 | mod.df = function(x) {
137 |   df = try(sum(diag(x%*%solve(t(x)%*%x)%*%t(x))), silent=TRUE)
138 |   df
139 | }
140 | 


--------------------------------------------------------------------------------
/R/lrt-functions.R:
--------------------------------------------------------------------------------
  1 | lrtStat <- function(resNull, resFull, post.var = NULL) {
  2 |   rss.full <-  rowSums(resFull ^ 2)
  3 |   rss.null <- rowSums(resNull ^ 2)
  4 | 
  5 |   # F-statistic
  6 |   if (is.null(post.var)) {
  7 |     stat <- (rss.null - rss.full) / rss.full
  8 |   } else {
  9 |     stat <- (rss.null - rss.full) / post.var
 10 |   }
 11 |   return(stat)
 12 | }
 13 | 
 14 | #	EMPIRICAL BAYES SQUEEZING OF VARIANCES
 15 | 
 16 | squeezeVar <- function(var, df, covariate=NULL, winsor.tail.p=c(0.05,0.1))
 17 |   #	Empirical Bayes posterior variances
 18 |   #	Gordon Smyth
 19 |   #	2 March 2004.  Last modified 2 Dec 2013.
 20 | {
 21 |   n <- length(var)
 22 |   if(n == 0) stop("var is empty")
 23 |   if(n == 1) return(list(var.post=var,var.prior=var,df.prior=0))
 24 |   if(length(df)==1) { 
 25 |     df <- rep.int(df,n)
 26 |   } else {
 27 |     if(length(df) != n) stop("lengths differ")
 28 |   }
 29 |   
 30 |   #	Estimate prior var and df
 31 |   fit <- fitFDist(var, df1=df, covariate=covariate)
 32 |   
 33 |   #	Prior var will be vector if robust=TRUE, otherwise scalar
 34 |   var.prior <- fit$scale
 35 |   
 36 |   #	Prior df will be vector if covariate is non-NULL, otherwise scalar
 37 |   df.prior <- fit$df2.shrunk
 38 |   if(is.null(df.prior)) df.prior <- fit$df2
 39 |   
 40 |   #	Check estimated prior df
 41 |   if(is.null(df.prior) || any(is.na(df.prior))) stop("Could not estimate prior df")
 42 |   
 43 |   #	Squeeze the posterior variances
 44 |   df.total <- df + df.prior
 45 |   var[df==0] <- 0 # guard against missing or infinite values
 46 |   Infdf <- df.prior==Inf
 47 |   if(any(Infdf)) {
 48 |     var.post <- rep(var.prior,length.out=n)
 49 |     i <- which(!Infdf)
 50 |     if(length(i)) {
 51 |       if(is.null(covariate))
 52 |         s02 <- var.prior
 53 |       else
 54 |         s02 <- var.prior[i]
 55 |       var.post[i] <- (df[i]*var[i] + df.prior[i]*s02) / df.total[i]
 56 |     }
 57 |   } else {
 58 |     var.post <- (df*var + df.prior*var.prior) / df.total
 59 |   }
 60 |   
 61 |   list(df.prior=df.prior,var.prior=var.prior,var.post=var.post)
 62 | }
 63 | 
 64 | fitFDist <- function(x,df1,covariate=NULL)
 65 |   #	Moment estimation of the parameters of a scaled F-distribution
 66 |   #	The first degrees of freedom are given
 67 |   #	Gordon Smyth and Belinda Phipson
 68 |   #	8 Sept 2002.  Last revised 27 Oct 2012.
 69 | {
 70 |   #	Check covariate
 71 |   if(!is.null(covariate)) {
 72 |     if(length(covariate) != length(x)) stop("covariate and x must be of same length")
 73 |     if(any(is.na(covariate))) stop("NA covariate values not allowed")
 74 |     isfin <- is.finite(covariate)
 75 |     if(!all(isfin)) {
 76 |       if(!any(isfin))
 77 |         covariate <- sign(covariate)
 78 |       else {
 79 |         r <- range(covariate[isfin])
 80 |         covariate[covariate == -Inf] <- r[1]-1
 81 |         covariate[covariate == Inf] <- r[2]+1
 82 |       }
 83 |     }
 84 |     splinedf <- min(4,length(unique(covariate)))
 85 |     if(splinedf < 2) covariate <- NULL
 86 |   }
 87 |   #	Remove missing or infinite values and zero degrees of freedom
 88 |   ok <- is.finite(x) & is.finite(df1) & (x > -1e-15) & (df1 > 1e-15)
 89 |   notallok <- !all(ok)
 90 |   if(notallok) {
 91 |     x <- x[ok]
 92 |     df1 <- df1[ok]
 93 |     if(!is.null(covariate)) {
 94 |       covariate2 <- covariate[!ok]
 95 |       covariate <- covariate[ok]
 96 |     }
 97 |   }
 98 |   n <- length(x)
 99 |   if(n==0) return(list(scale=NA,df2=NA))
100 |   
101 |   #	Avoid exactly zero values
102 |   x <- pmax(x,0)
103 |   m <- median(x)
104 |   if(m==0) {
105 |     warning("More than half of residual variances are exactly zero: eBayes unreliable")
106 |     m <- 1
107 |   } else {
108 |     if(any(x==0)) warning("Zero sample variances detected, have been offset",call.=FALSE)
109 |   }
110 |   x <- pmax(x, 1e-5 * m)
111 |   
112 |   #	Better to work on with log(F)
113 |   z <- log(x)
114 |   e <- z-digamma(df1/2)+log(df1/2)
115 |   
116 |   if(is.null(covariate)) {
117 |     emean <- mean(e)
118 |     evar <- sum((e-emean)^2)/(n-1)
119 |   } else {
120 |     if(!requireNamespace("splines",quietly=TRUE)) stop("splines package required but is not available")
121 |     design <- try(splines::ns(covariate,df=splinedf,intercept=TRUE),silent=TRUE)
122 |     if(is(design,"try-error")) stop("Problem with covariate")
123 |     fit <- lm.fit(design,e)
124 |     if(notallok) {
125 |       design2 <- predict(design,newx=covariate2)
126 |       emean <- rep.int(0,n+length(covariate2))
127 |       emean[ok] <- fit$fitted
128 |       emean[!ok] <- design2 %*% fit$coefficients
129 |     } else {
130 |       emean <- fit$fitted
131 |     }
132 |     evar <- mean(fit$residuals[-(1:fit$rank)]^2)
133 |   }
134 |   evar <- evar - mean(trigamma(df1/2))
135 |   if(evar > 0) {
136 |     df2 <- 2*trigammaInverse(evar)
137 |     s20 <- exp(emean+digamma(df2/2)-log(df2/2))
138 |   } else {
139 |     df2 <- Inf
140 |     s20 <- exp(emean)
141 |   }
142 |   list(scale=s20,df2=df2)
143 | }
144 | 
145 | trigammaInverse <- function(x) {
146 |   #	Solve trigamma(y) = x for y
147 |   #	Gordon Smyth
148 |   #	8 Sept 2002.  Last revised 12 March 2004.
149 |   
150 |   #	Non-numeric or zero length input
151 |   if(!is.numeric(x)) stop("Non-numeric argument to mathematical function")
152 |   if(length(x)==0) return(numeric(0))
153 |   
154 |   #	Treat out-of-range values as special cases
155 |   omit <- is.na(x)
156 |   if(any(omit)) {
157 |     y <- x
158 |     if(any(!omit)) y[!omit] <- Recall(x[!omit])
159 |     return(y)
160 |   }
161 |   omit <- (x < 0)
162 |   if(any(omit)) {
163 |     y <- x
164 |     y[omit] <- NaN
165 |     warning("NaNs produced")
166 |     if(any(!omit)) y[!omit] <- Recall(x[!omit])
167 |     return(y)
168 |   }
169 |   omit <- (x > 1e7)
170 |   if(any(omit)) {
171 |     y <- x
172 |     y[omit] <- 1/sqrt(x[omit])
173 |     if(any(!omit)) y[!omit] <- Recall(x[!omit])
174 |     return(y)
175 |   }
176 |   omit <- (x < 1e-6)
177 |   if(any(omit)) {
178 |     y <- x
179 |     y[omit] <- 1/x[omit]
180 |     if(any(!omit)) y[!omit] <- Recall(x[!omit])
181 |     return(y)
182 |   }
183 |   
184 |   #	Newton's method
185 |   #	1/trigamma(y) is convex, nearly linear and strictly > y-0.5,
186 |   #	so iteration to solve 1/x = 1/trigamma is monotonically convergent
187 |   y <- 0.5+1/x
188 |   iter <- 0
189 |   repeat {
190 |     iter <- iter+1
191 |     tri <- trigamma(y)
192 |     dif <- tri*(1-tri/x)/psigamma(y,deriv=2)
193 |     y <- y+dif
194 |     if(max(-dif/y) < 1e-8) break
195 |     if(iter > 50) {
196 |       warning("Iteration limit exceeded")
197 |       break
198 |     }
199 |   }
200 |   y
201 | }
202 | 
203 | 


--------------------------------------------------------------------------------
/R/misc.R:
--------------------------------------------------------------------------------
  1 | bootstrap <- function(object, obs.fit, clustParms = NULL, bs.its = 100,
  2 |                       verbose = TRUE, mod.F = FALSE, post.var = NULL) {
  3 |   n.probes <- nrow(obs.fit@res.full)
  4 |   nf <- mod.df(object@full.matrix)
  5 |   null.stat <- matrix(nrow = n.probes,
  6 |                       ncol = bs.its)
  7 |   sType <- obs.fit@stat.type
  8 |   for (i in 1:bs.its) {
  9 |     if (verbose) {
 10 |       cat("\r", "Null iteration: ", i)
 11 |       if (i == bs.its) cat("\n")
 12 |     }
 13 |     exprs(object) <- null(obs.fit = obs.fit, nf = nf,
 14 |                           ind = object@individual)
 15 |     null.fit <- fit_models(object,
 16 |                            stat.type = sType)
 17 |     if (sType == "lrt") {
 18 |       if (!is.null(post.var)) {
 19 |         nFull <- ncol(object@full.matrix)
 20 |         n <- ncol(object)
 21 |         df_full <- n - nFull
 22 |         var_full <- rowSums(null.fit@res.full ^ 2) / df_full
 23 |         pv <- (df_full*var_full + post.var$df.prior*post.var$var.prior) / (df_full + post.var$df.prior)
 24 |       } else {
 25 |         pv <- NULL
 26 |       }
 27 |       null.stat[, i] <- lrtStat(resNull = null.fit@res.null,
 28 |                                 resFull = null.fit@res.full,
 29 |                                 post.var = pv)
 30 |       
 31 |     }
 32 |     else {
 33 |       null.stat[, i]  <- odpStat(n.res = null.fit@res.null,
 34 |                                  clustParms = clustParms)
 35 |     }
 36 |   }
 37 |   return(null.stat)
 38 | }
 39 | rescale <- function(x, sig) {
 40 |   means <- rowMeans(x)
 41 |   n <- ncol(x)
 42 |   rowsds <- sqrt((rowMeans(x ^ 2) - means ^ 2) * n / (n - 1))
 43 |   ret <- (x - means) * sig / rowsds + means
 44 |   return(ret)
 45 | }
 46 | null <- function(obs.fit, nf, ind) {
 47 |   stat.var <- obs.fit@stat.type
 48 |   n <- ncol(obs.fit@res.full)
 49 |   if (sum(!is.na(ind[1])) > 0) {
 50 |     ind <- model.matrix(~-1 + as.factor(ind))
 51 |     wts <- sqrt(1 - diag(ind %*% solve(t(ind) %*% ind) %*% t(ind)))
 52 |   } else {
 53 |     ind <- NULL
 54 |     wts <- rep(1, n)
 55 |   }
 56 |   wts <- t(t(sqrt(1 - obs.fit@dH.full)) * wts)
 57 |   res.full <- obs.fit@res.full * wts ^ (-1)
 58 |   # Random mix columns of residuals from full model
 59 |   vv <- sample(1:n, replace = TRUE)
 60 |   bs.res <- res.full[, vv]
 61 |   # Add random residuals to null data
 62 |   if (stat.var == "lrt") {
 63 |     null.dat <- obs.fit@fit.null + bs.res
 64 |   } else {
 65 |     sig1 <- sqrt(rowSums(obs.fit@res.full ^ 2) / (n - nf))
 66 |     bs.res <- rescale(x = bs.res,
 67 |                       sig = sig1)
 68 |     null.dat <- obs.fit@fit.null + bs.res
 69 |   }
 70 |   return(null.dat)
 71 | }
 72 | mod.df <- function(x) {
 73 |   df <- try(sum(diag(x %*% solve(t(x) %*% x) %*% t(x))), silent=TRUE)
 74 |   return(df)
 75 | }
 76 | 
 77 | createSet <- function(object, nMod=NULL, fMod=NULL, ind=NULL, grp=factor(NA)) {
 78 |   # Create deSet
 79 |   #  require(splines)
 80 |   object@null.model <- nMod
 81 |   object@full.model <- fMod
 82 |   mmf <- model.matrix(object = fMod, data = object)
 83 |   mmn <- model.matrix(object = nMod, data = object)
 84 |   colnames(mmf) <- NULL
 85 |   colnames(mmn) <- NULL
 86 |   object@null.matrix <- mmn
 87 |   object@full.matrix <- mmf
 88 |   object@individual <- as.factor(ind)
 89 |   validObject(object)
 90 |   object
 91 | }
 92 | 
 93 | rm.zero.cols <- function(x, eps = 10e-12) {
 94 |   return(x[, colSums(abs(x)) > eps])
 95 | }
 96 | 
 97 | 
 98 | projMatrix <- function(x) {
 99 |   H <- x %*% ginv(t(x) %*% x) %*% t(x)
100 |   H
101 | }
102 | 


--------------------------------------------------------------------------------
/R/odp-functions.R:
--------------------------------------------------------------------------------
 1 | #' @useDynLib edge odpScoreCluster
 2 | odp.score <- function(s.dat.cl, mu, sigma, null, m, n, cluster) {
 3 |   # Determines ODP score
 4 |   #
 5 |   # Args:
 6 |   #   s.dat.cl: Matrix of fitted data by full model
 7 |   #   mu: Vector means of clusters
 8 |   #   sigma: Vector of sd of clusters
 9 |   #   null: Boolean whether NULL model or not
10 |   #   m: Number of genes
11 |   #   n: Number of probes/arrays
12 |   #   cluster: Vector of the number of members in each cluster
13 |   #
14 |   # Returns:
15 |   #   scr: Vector of ODP score of each gene
16 |   # Initilizations
17 |   p <- length(sigma)
18 | 
19 |   # Call to C file to compute ODP score
20 |   scr <- .C("odpScoreCluster",
21 |             sumDat = as.double(s.dat.cl),
22 |             mu = as.double(mu),
23 |             sigma = as.double(sigma),
24 |             m = as.integer(m),
25 |             n = as.integer(n),
26 |             p = as.integer(p),
27 |             null = as.integer(null),
28 |             cluster = as.integer(cluster),
29 |             scr = double(m))$scr
30 | 
31 |   return(scr)
32 | }
33 | 
34 | odpStat <- function(n.res, clustParms) {
35 |   # Determines ODP statistic
36 |   #
37 |   # Args:
38 |   #   n.res: null residuals
39 |   #   clustParms: clustering parameters
40 |   #
41 |   # Returns:
42 |   #   matrix of null statistics
43 |   # Probabilities of alt and null distributions
44 |   s.dat1 = c(t(n.res), t(clustParms$mu.full))
45 |   s.dat0 = c(t(n.res), t(clustParms$mu.null))
46 |   cl.den <- odp.score(s.dat0,
47 |                       mu = rep(0, length(clustParms$sig.null)),
48 |                       sigma = clustParms$sig.null,
49 |                       null = TRUE,
50 |                       m = nrow(n.res),
51 |                       n = ncol(n.res),
52 |                       cluster = clustParms$n.per.mod)
53 |   cl.num <- odp.score(s.dat1,
54 |                       mu = rowSums(clustParms$mu.full ^ 2),
55 |                       sigma = clustParms$sig.full,
56 |                       null = FALSE,
57 |                       m = nrow(n.res),
58 |                       n = ncol(n.res),
59 |                       cluster = clustParms$n.per.mod)
60 | 
61 |   # ODP statistic
62 |   odp.stat <-  2 * cl.num / (cl.den + cl.num)
63 |   return(odp.stat)
64 | }
65 | 


--------------------------------------------------------------------------------
/R/setMethods.R:
--------------------------------------------------------------------------------
 1 | #' @rdname individual
 2 | setReplaceMethod("individual",
 3 |                  signature = signature(object = "deSet"),
 4 |                  function(object, value) {
 5 |                    object@individual <- value
 6 |                    validObject(object)
 7 |                    object
 8 |                  })
 9 | #' @rdname qvalueObj
10 | setReplaceMethod("qvalueObj",
11 |                  signature = signature(object = "deSet"),
12 |                  function(object, value) {
13 |                    object@qvalueObj <- value
14 |                    validObject(object)
15 |                    object
16 |                  })
17 | #' @rdname fullModel
18 | setReplaceMethod("fullModel",
19 |                  signature = signature(object = "deSet"),
20 |                  function(object, value) {
21 |                    object@full.model <- value
22 |                    fullMatrix(object) <- model.matrix(object = value, data = object)
23 |                    validObject(object)
24 |                    object
25 |                  })
26 | #' @rdname nullModel
27 | setReplaceMethod("nullModel",
28 |                  signature = signature(object = "deSet"),
29 |                  function(object, value) {
30 |                    object@null.model <- value
31 |                    nullMatrix(object) <- model.matrix(object = value, data = object)
32 |                    validObject(object)
33 |                    object
34 |                  })
35 | #' @rdname fullMatrix
36 | setReplaceMethod("fullMatrix",
37 |                  signature = signature(object = "deSet"),
38 |                  function(object, value) {
39 |                    object@full.matrix <- value
40 |                    validObject(object)
41 |                    object
42 |                  })
43 | #' @rdname nullMatrix
44 | setReplaceMethod("nullMatrix",
45 |                  signature = signature(object = "deSet"),
46 |                  function(object, value) {
47 |                    object@null.matrix <- value
48 |                    validObject(object)
49 |                    object
50 |                  })
51 | 


--------------------------------------------------------------------------------
/R/wls.R:
--------------------------------------------------------------------------------
 1 | fit_wmodels <- function(object, w = NULL,  stat.type = c("lrt", "odp")) {
 2 |   exprsData <- exprs(object)
 3 |   n <- ncol(exprsData)
 4 |   nr <- nrow(exprsData)
 5 |   stat.var <- match.arg(stat.type, c("lrt", "odp"))
 6 |   null.matrix <- object@null.matrix
 7 |   full.matrix <- object@full.matrix
 8 |   if (length(object@individual) != 0) {
 9 |     ind.matrix <- model.matrix(~-1 + as.factor(object@individual))
10 |     Hi <- projMatrix(ind.matrix)
11 |     fitInd <- t(Hi %*% t(exprsData))
12 |     exprsData <- exprsData - fitInd
13 |     full.matrix <- full.matrix - Hi %*% full.matrix
14 |     null.matrix <- null.matrix - Hi %*% null.matrix
15 |     full.matrix <- rm.zero.cols(full.matrix)
16 |     null.matrix <- rm.zero.cols(null.matrix)
17 |   }
18 |   fitFull <- fitNull <- resNull <- resFull <- dHFull <- matrix(nrow=nr, ncol=n)
19 |   for (i in 1:nr) {
20 |     wlm_null <- lm.wfit(x = null.matrix, y = exprsData[i,], w = w[i,])
21 |     fitNull[i,] <- wlm_null$fitted.values
22 |     resNull[i,] <- wlm_null$residuals * sqrt(wlm_null$weights)
23 |     if (stat.var != "odp") {
24 |       wlm_full <- lm.wfit(x = full.matrix, y = exprsData[i,], w = w[i,])
25 |       dHFull[i,] <- diag(projMatrix(sqrt(w[i,]) * full.matrix))# double check
26 |       fitFull[i,] <- wlm_full$fitted.values
27 |       B.coef <- matrix(NA, ncol = length(w[i,]))#wlm_full$coefficients
28 |       resFull[i,] <- wlm_full$residuals * sqrt(wlm_full$weights)
29 |     } else {
30 |       # W <- diag(sqrt(w[i,]))
31 |       w_sqrt <- sqrt(w[i,])
32 |       f.matrix.scaled <- full.matrix * w_sqrt
33 |       H.null <- projMatrix(null.matrix * w_sqrt)
34 |       f.matrix.scaled <- f.matrix.scaled - H.null %*% f.matrix.scaled
35 |       f.matrix.scaled <- rm.zero.cols(f.matrix.scaled)
36 |       H.full <- projMatrix(f.matrix.scaled)
37 |       res.n <- wlm_null$residuals * w_sqrt
38 |       B.coef <- matrix(NA, ncol = length(w_sqrt))#res.n %*% full.matrix.scaled %*% ginv(t(full.matrix.scaled) %*% full.matrix.scaled)
39 |       dHFull[i,] <- diag(H.full)
40 |       fitFull[i,] <- H.full %*% res.n
41 |       resFull[i,] <- res.n - fitFull[i,]
42 |     }
43 |   }
44 |   efObj <- new("deFit", fit.full = fitFull, fit.null = fitNull,
45 |                dH.full = dHFull, res.full = resFull,
46 |                res.null = resNull, beta.coef = B.coef,
47 |                stat.type = stat.var)
48 |   return(efObj)
49 | }
50 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <a href="http://www.bioconductor.org/packages/release/bioc/html/edge.html#since"><img border="0" src="http://www.bioconductor.org/shields/years-in-bioc/edge.svg" title="How long since the package was first in a released Bioconductor version (or is it in devel only)."></a> <a href="http://bioconductor.org/packages/stats/bioc/edge.html"><img border="0" src="http://www.bioconductor.org/shields/downloads/edge.svg" title="Percentile (top 5/20/50% or 'available') of downloads over last 6 full months. Comparison is done across all package categories (software, annotation, experiment)."></a> <a href="https://support.bioconductor.org/t/edge/"><img border="0" src="http://www.bioconductor.org/shields/posts/edge.svg" title="Support site activity, last 6 months: tagged questions/avg. answers per question/avg. comments per question/accepted answers, or 0 if no tagged posts."></a> <a href="http://www.bioconductor.org/packages/release/bioc/html/edge.html#svn_source"><img border="0" src="http://www.bioconductor.org/shields/commits/bioc/edge.svg" title="average Subversion commits (to the devel branch) per month for the last 6 months"></a>
  2 | edge: Extraction of Differential Gene Expression
  3 | ====
  4 | 
  5 | Introduction
  6 | ------
  7 | The edge package implements methods for carrying out differential
  8 | expression analyses of genome-wide gene expression studies. Significance
  9 | testing using the optimal discovery procedure and generalized likelihood
 10 | ratio tests (equivalent to F-tests and t-tests) are implemented for general study
 11 | designs. Special functions are available to facilitate the analysis of
 12 | common study designs, including time course experiments. Other packages
 13 | such as [snm](http://www.bioconductor.org/packages/release/bioc/html/snm.html), [sva](http://www.bioconductor.org/packages/release/bioc/html/sva.html), and [qvalue](https://github.com/jdstorey/qvalue) are integrated in edge to provide a wide range
 14 | of tools for gene expression analysis.
 15 | 
 16 | 
 17 | ### Installation and Documentation
 18 | 
 19 | To install the Bioconductor release version, open R and type:
 20 | ```R
 21 | source("http://bioconductor.org/biocLite.R")
 22 | biocLite("edge")
 23 | ```
 24 | 
 25 | To install the development version, open R and type:
 26 | ```R
 27 | install.packages("devtools")
 28 | library("devtools")
 29 | install_github(c("jdstorey/qvalue","jdstorey/edge"), build_vignettes = TRUE)
 30 | ```
 31 | 
 32 | Instructions on using edge can be viewed by typing:
 33 | ```R
 34 | library("edge")
 35 | browseVignettes("edge")
 36 | ```
 37 | 
 38 | ### Main functions
 39 | * `build_models`
 40 | * `build_study`
 41 | * `odp`
 42 | * `lrt`
 43 | * `fit_models`
 44 | * `kl_clust`
 45 | * `apply_sva`
 46 | * `apply_snm`
 47 | * `apply_qvalue`
 48 | 
 49 | ### Quick start guide
 50 | 
 51 | To get started, first load the kidney dataset included in the package:
 52 | ```R
 53 | library(edge)
 54 | data(kidney)
 55 | names(kidney)
 56 | ```
 57 | The kidney study is interested in determining differentially expressed genes with respect to age in kidney tissue. The `age` variable is the age of the subjects and the `sex` variable is whether the subjects were male or female. The expression values for the genes are contained in the `kidexpr` variable.
 58 | ```R
 59 | kidexpr <- kidney$kidexpr
 60 | age <- kidney$age
 61 | sex <- kidney$sex
 62 | ```
 63 | 
 64 | Once the data has been loaded, the user has two options to create the experimental models: `build_models` or `build_study`. If the experiment models are unknown to the user, `build_study` can be used to create the models:
 65 | ```R
 66 | edge_obj <- build_study(data = kidexpr, adj.var = sex, tme = age, sampling = "timecourse")
 67 | full_model <- fullModel(edge_obj)
 68 | null_model <- nullModel(edge_obj)
 69 | ```
 70 | 
 71 | The variable `sampling` describes the type of experiment performed, `adj.var` is the adjustment variable and `tme` is the time variable in the study. If the experiment is more complex then type `?build_study` for additional arguments.
 72 | 
 73 | If the alternative and null models are known to the user then `build_models` can be used to make a deSet object:
 74 | ```R
 75 | library(splines)
 76 | cov <- data.frame(sex = sex, age = age)
 77 | null_model <- ~sex
 78 | full_model <- ~sex + ns(age, df=4)
 79 | edge_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model, full.model = full_model)
 80 | ```
 81 | 
 82 | The `cov` is a data frame of covariates, the `null.model` is the null model and the `full.model` is the alternative model. The input `cov` is a data frame with the column names the same as the variables in the alternative and null models. Once the models have been generated, it is often useful to normalize the gene expression matrix using `apply_snm` and/or adjust for unmodelled variables using `apply_sva`.
 83 | ```R
 84 | edge_norm <- apply_snm(edge_obj, int.var=1:ncol(exprs(edge_obj)), diagnose=FALSE)
 85 | edge_sva <- apply_sva(edge_norm)
 86 | 
 87 | ```
 88 | 
 89 | The `odp` or `lrt` function can be used on `edge_sva` to implement either the optimal discovery procedure or the likelihood ratio test, respectively:
 90 | ```R
 91 | # optimal discovery procedure
 92 | edge_odp <- odp(edge_sva, bs.its = 30, verbose=FALSE)
 93 | # likelihood ratio test
 94 | edge_lrt <- lrt(edge_sva)
 95 | ```
 96 | 
 97 | To access the proportional of null p-values estimate, p-values, q-values and local false discovery rates for each gene, use the function `qvalueObj`:
 98 | ```R
 99 | qval_obj <- qvalueObj(edge_odp)
100 | qvals <- qval_obj$qvalues
101 | pvals <- qval_obj$pvalues
102 | lfdr <- qval_obj$lfdr
103 | pi0 <- qval_obj$pi0
104 | ```
105 | 
106 | See the vignette for more detailed explanations of the edge package.
107 | 
108 | 


--------------------------------------------------------------------------------
/data/endotoxin.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StoreyLab/edge/5f973def65bc536b90d46b78e0a0ef849a81caa3/data/endotoxin.rda


--------------------------------------------------------------------------------
/data/gibson.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StoreyLab/edge/5f973def65bc536b90d46b78e0a0ef849a81caa3/data/gibson.rda


--------------------------------------------------------------------------------
/data/kidney.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StoreyLab/edge/5f973def65bc536b90d46b78e0a0ef849a81caa3/data/kidney.rda


--------------------------------------------------------------------------------
/inst/NEWS:
--------------------------------------------------------------------------------
 1 | edge 2.1.1:
 2 | - Moderated F-test has been added for likelihood ratio test
 3 | - Weights can be inputted into odp/lrt which allows it to work for RNA-Seq experiments with low samples
 4 | - added function apply_jackstraw
 5 | - fixed bug in build_study
 6 | 
 7 | edge 2.0.0:
 8 | 
 9 | The edge package was first released in 2005 and described in the publication:
10 | 
11 | Jeffrey T. Leek, Eva Monsen, Alan R. Dabney, and John D. Storey. Edge:
12 | extraction and analysis of differential gene expression. Bioinformatics,
13 | 22(4):507–508, 2006.
14 | http://bioinformatics.oxfordjournals.org/content/22/4/507.abstract
15 | 
16 | It was an independently released R package by the John Storey Lab, which
17 | included multi-threading and a graphical user interface.  However, edge has been
18 | updated and will now be made available through Bioconductor; edge >=2.0.0 is the 
19 | new version released through Bioconductor.
20 | 


--------------------------------------------------------------------------------
/man/apply_jackstraw.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2 (4.1.1): do not edit by hand
  2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R
  3 | \docType{methods}
  4 | \name{apply_jackstraw}
  5 | \alias{apply_jackstraw}
  6 | \alias{apply_jackstraw,deSet-method}
  7 | \title{Non-Parametric Jackstraw for Principal Component Analysis (PCA)}
  8 | \usage{
  9 | apply_jackstraw(object, PC = NULL, r = NULL, s = NULL, B = NULL,
 10 |   covariate = NULL, verbose = TRUE, seed = NULL)
 11 | 
 12 | \S4method{apply_jackstraw}{deSet}(object, PC = NULL, r = NULL, s = NULL,
 13 |   B = NULL, covariate = NULL, verbose = TRUE, seed = NULL)
 14 | }
 15 | \arguments{
 16 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}}
 17 | 
 18 | \item{PC}{a numeric vector of principal components of interest. Choose a subset of r significant PCs to be used.}
 19 | 
 20 | \item{r}{a number (a positive integer) of significant principal components.}
 21 | 
 22 | \item{s}{a number (a positive integer) of synthetic null variables. Out of m variables, s variables are independently permuted.}
 23 | 
 24 | \item{B}{a number (a positive integer) of resampling iterations. There will be a total of s*B null statistics.}
 25 | 
 26 | \item{covariate}{a data matrix of covariates with corresponding n observations.}
 27 | 
 28 | \item{verbose}{a logical indicator as to whether to print the progress.}
 29 | 
 30 | \item{seed}{a seed for the random number generator.}
 31 | }
 32 | \value{
 33 | \code{apply_jackstraw} returns a \code{list} containing the following
 34 | slots:
 35 | \itemize{
 36 | \item{\code{p.value} the m p-values of association tests between variables
 37 | and their principal components}
 38 | \item{\code{obs.stat} the observed F-test statistics}
 39 | \item{\code{null.stat} the s*B null F-test statistics}
 40 | }
 41 | }
 42 | \description{
 43 | Estimates statistical significance of association between variables and
 44 | their principal components (PCs).
 45 | }
 46 | \details{
 47 | This function computes m p-values of linear association between m variables
 48 | and their PCs. Its resampling strategy accounts for the over-fitting
 49 | characteristics due to direct computation of PCs from the observed data
 50 | and protects against an anti-conservative bias.
 51 | 
 52 | Provide the \code{\linkS4class{deSet}},
 53 | with m variables as rows and n observations as columns. Given that there are
 54 | r significant PCs, this function tests for linear association between m
 55 | varibles and their r PCs.
 56 | 
 57 | You could specify a subset of significant PCs
 58 | that you are interested in (PC). If PC is given, then this function computes
 59 | statistical significance of association between m variables and PC, while
 60 | adjusting for other PCs (i.e., significant PCs that are not your interest).
 61 | For example, if you want to identify variables associated with 1st and 2nd
 62 | PCs, when your data contains three significant PCs, set r=3 and PC=c(1,2).
 63 | 
 64 | Please take a careful look at your data and use appropriate graphical and
 65 | statistical criteria to determine a number of significant PCs, r. The number
 66 | of significant PCs depends on the data structure and the context. In a case
 67 | when you fail to specify r, it will be estimated from a permutation test
 68 | (Buja and Eyuboglu, 1992) using a function \code{\link{permutationPA}}.
 69 | 
 70 | If s is not supplied, s is set to about 10% of m variables. If B is not
 71 | supplied, B is set to m*10/s.
 72 | }
 73 | \examples{
 74 | library(splines)
 75 | data(kidney)
 76 | age <- kidney$age
 77 | sex <- kidney$sex
 78 | kidexpr <- kidney$kidexpr
 79 | cov <- data.frame(sex = sex, age = age)
 80 | # create models
 81 | null_model <- ~sex
 82 | full_model <- ~sex + ns(age, df = 4)
 83 | # create deSet object from data
 84 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
 85 |                       full.model = full_model)
 86 | ## apply the jackstraw
 87 | out = apply_jackstraw(de_obj, PC=1, r=1)
 88 | ## Use optional arguments
 89 | ## For example, set s and B for a balance between speed of the algorithm and accuracy of p-values
 90 | ## out = apply_jackstraw(dat, PC=1, r=1, s=10, B=1000, seed=5678)
 91 | }
 92 | \author{
 93 | Neo Christopher Chung \email{nc@princeton.edu}
 94 | }
 95 | \references{
 96 | Chung and Storey (2013) Statistical Significance of
 97 | Variables Driving Systematic Variation in
 98 | High-Dimensional Data. arXiv:1308.6013 [stat.ME]
 99 | \url{http://arxiv.org/abs/1308.6013}
100 | 
101 | More information available at \url{http://ncc.name/}
102 | }
103 | \seealso{
104 | \code{\link{permutationPA}}
105 | }
106 | 
107 | 


--------------------------------------------------------------------------------
/man/apply_qvalue.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R
 3 | \docType{methods}
 4 | \name{apply_qvalue}
 5 | \alias{apply_qvalue}
 6 | \alias{apply_qvalue,deSet-method}
 7 | \title{Estimate the q-values for a given set of p-values}
 8 | \usage{
 9 | apply_qvalue(object, ...)
10 | 
11 | \S4method{apply_qvalue}{deSet}(object, ...)
12 | }
13 | \arguments{
14 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}}
15 | 
16 | \item{...}{Additional arguments for \code{\link{qvalue}}}
17 | }
18 | \value{
19 | \code{\linkS4class{deSet}} object with slots updated by \code{\link{qvalue}}
20 |  calculations.
21 | }
22 | \description{
23 | Runs \code{\link{qvalue}} on a \code{\linkS4class{deSet}} object.
24 | }
25 | \examples{
26 | # import data
27 | library(splines)
28 | data(kidney)
29 | age <- kidney$age
30 | sex <- kidney$sex
31 | kidexpr <- kidney$kidexpr
32 | cov <- data.frame(sex = sex, age = age)
33 | 
34 | # create models
35 | null_model <- ~sex
36 | full_model <- ~sex + ns(age, df = 4)
37 | 
38 | # create deSet object from data
39 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
40 | full.model = full_model)
41 | 
42 | # Run lrt (or odp) and apply_qvalue
43 | de_lrt <- lrt(de_obj)
44 | de_lrt <- apply_qvalue(de_lrt, fdr.level = 0.05,
45 | pi0.method = "bootstrap", adj=1.2)
46 | summary(de_lrt)
47 | }
48 | \author{
49 | John Storey, Andrew Bass
50 | }
51 | \references{
52 | Storey JD and Tibshirani R. (2003) Statistical significance for
53 | genome-wide studies. Proceedings of the National Academy of Sciences,
54 | 100: 9440-9445
55 | }
56 | \seealso{
57 | \code{\linkS4class{deSet}}, \code{\link{odp}} and
58 | \code{\link{lrt}}
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/man/apply_snm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R
 3 | \docType{methods}
 4 | \name{apply_snm}
 5 | \alias{apply_snm}
 6 | \alias{apply_snm,deSet-method}
 7 | \title{Supervised normalization of data in edge}
 8 | \usage{
 9 | apply_snm(object, int.var = NULL, ...)
10 | 
11 | \S4method{apply_snm}{deSet}(object, int.var = NULL, ...)
12 | }
13 | \arguments{
14 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}}
15 | 
16 | \item{int.var}{\code{data frame}: intensity-dependent effects (see
17 | \code{\link{snm}} for details)}
18 | 
19 | \item{...}{Additional arguments for \code{\link{snm}}}
20 | }
21 | \value{
22 | \code{apply_snm} returns a \code{\linkS4class{deSet}} object where
23 | assayData (the expression data) that has been passed to apply_snm is replaced
24 | with the normalized data that \code{\link{snm}} returns.  Specifically,
25 | \code{exprs(object)} is replaced by \code{$norm.dat} from \code{\link{snm}},
26 | where \code{object} is the \code{\link{deSet}} object.
27 | }
28 | \description{
29 | Runs \code{snm} on a deSet object based on the null and full models in
30 | \code{\linkS4class{deSet}}. See \code{\link{snm}} for additional details
31 | on the algorithm.
32 | }
33 | \examples{
34 | # simulate data
35 | library(snm)
36 | singleChannel <- sim.singleChannel(12345)
37 | data <- singleChannel$raw.data
38 | 
39 | # create deSet object using build_models (can use ExpressionSet see manual)
40 | cov <- data.frame(grp = singleChannel$bio.var[,2])
41 | full_model <- ~grp
42 | null_model <- ~1
43 | 
44 | # create deSet object using build_models
45 | de_obj <- build_models(data = data, cov = cov, full.model = full_model,
46 | null.model = null_model)
47 | 
48 | # run snm using intensity-dependent adjustment variable
49 | de_snm <- apply_snm(de_obj, int.var = singleChannel$int.var,
50 | verbose = FALSE, num.iter = 1)
51 | }
52 | \author{
53 | John Storey, Andrew Bass
54 | }
55 | \references{
56 | Mechan BH, Nelson PS, Storey JD. Supervised normalization of microarrays.
57 | Bioinformatics 2010;26:1308-1315.
58 | }
59 | \seealso{
60 | \code{\linkS4class{deSet}}, \code{\link{odp}} and
61 | \code{\link{lrt}}
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/man/apply_sva.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R
 3 | \docType{methods}
 4 | \name{apply_sva}
 5 | \alias{apply_sva}
 6 | \alias{apply_sva,deSet-method}
 7 | \title{Estimate surrogate variables}
 8 | \usage{
 9 | apply_sva(object, ...)
10 | 
11 | \S4method{apply_sva}{deSet}(object, ...)
12 | }
13 | \arguments{
14 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}}
15 | 
16 | \item{...}{Additional arguments for \code{\link{sva}}}
17 | }
18 | \value{
19 | \code{\linkS4class{deSet}} object where the surrogate variables
20 | estimated by \code{\link{sva}} are added to the full model and null model
21 | matrices.
22 | }
23 | \description{
24 | Runs \code{\link{sva}} on the null and full models in
25 | \code{\linkS4class{deSet}}. See \code{\link{sva}} for additional details.
26 | }
27 | \examples{
28 | # import data
29 | library(splines)
30 | data(kidney)
31 | age <- kidney$age
32 | sex <- kidney$sex
33 | kidexpr <- kidney$kidexpr
34 | cov <- data.frame(sex = sex, age = age)
35 | 
36 | # create models
37 | null_model <- ~sex
38 | full_model <- ~sex + ns(age, df = 4)
39 | 
40 | # create deSet object from data
41 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
42 | full.model = full_model)
43 | 
44 | # run surrogate variable analysis
45 | de_sva <- apply_sva(de_obj)
46 | 
47 | # run odp/lrt with surrogate variables added
48 | de_odp <- odp(de_sva, bs.its = 30)
49 | summary(de_odp)
50 | }
51 | \author{
52 | John Storey, Jeffrey Leek, Andrew Bass
53 | }
54 | \references{
55 | Leek JT, Storey JD (2007) Capturing Heterogeneity in Gene Expression
56 | Studies by Surrogate Variable Analysis. PLoS Genet 3(9): e161.
57 | doi:10.1371/journal.pgen.0030161
58 | 
59 | Leek JT and Storey JD. (2008) A general framework for multiple testing
60 | dependence. Proceedings of the National Academy of Sciences, 105: 18718-
61 | 18723.
62 | }
63 | \seealso{
64 | \code{\linkS4class{deSet}}, \code{\link{odp}} and
65 | \code{\link{lrt}}
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/man/betaCoef.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R
 3 | \docType{methods}
 4 | \name{betaCoef}
 5 | \alias{betaCoef}
 6 | \alias{betaCoef,deFit-method}
 7 | \title{Regression coefficients from full model fit}
 8 | \usage{
 9 | betaCoef(object)
10 | 
11 | \S4method{betaCoef}{deFit}(object)
12 | }
13 | \arguments{
14 | \item{object}{\code{S4 object}: \code{\linkS4class{deFit}}}
15 | }
16 | \value{
17 | \code{betaCoef} returns the regression coefficients for the full
18 |  model fit.
19 | }
20 | \description{
21 | Access the full model fitted coefficients of a
22 | \code{\linkS4class{deFit}} object.
23 | }
24 | \examples{
25 | # import data
26 | library(splines)
27 | data(kidney)
28 | age <- kidney$age
29 | sex <- kidney$sex
30 | kidexpr <- kidney$kidexpr
31 | cov <- data.frame(sex = sex, age = age)
32 | 
33 | # create models
34 | null_model <- ~sex
35 | full_model <- ~sex + ns(age, df = 4)
36 | 
37 | # create deSet object from data
38 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
39 | full.model = full_model)
40 | 
41 | # run fit_models to get model fits
42 | de_fit <- fit_models(de_obj)
43 | 
44 | # extract beta coefficients
45 | beta <- betaCoef(de_fit)
46 | }
47 | \author{
48 | John Storey, Andrew Bass
49 | }
50 | \seealso{
51 | \code{\link{fit_models}}
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/man/build_models.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/form_models.R
 3 | \name{build_models}
 4 | \alias{build_models}
 5 | \title{Generate a deSet object with full and null models}
 6 | \usage{
 7 | build_models(data, cov, full.model = NULL, null.model = NULL, ind = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{\code{matrix}: gene expression data.}
11 | 
12 | \item{cov}{\code{data.frame}: the covariates in the study.}
13 | 
14 | \item{full.model}{\code{formula}: the adjustment and the biological
15 | variables of interest.}
16 | 
17 | \item{null.model}{\code{formula}: the adjustment variables.}
18 | 
19 | \item{ind}{\code{factor}: individuals sampled in the study. Default is
20 | NULL. Optional.}
21 | }
22 | \value{
23 | \code{\linkS4class{deSet}} object
24 | }
25 | \description{
26 | \code{build_models} creates a \code{\link{deSet}} object. The user inputs
27 | the full and null models.
28 | }
29 | \examples{
30 | # create ExpressionSet object from kidney dataset
31 | library(splines)
32 | data(kidney)
33 | age <- kidney$age
34 | sex <- kidney$sex
35 | kidexpr <- kidney$kidexpr
36 | cov <- data.frame(sex = sex, age = age)
37 | 
38 | # create models
39 | null.model <- ~sex
40 | full.model <- ~sex + ns(age, df=4)
41 | 
42 | # create deSet object from data
43 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null.model,
44 | full.model = full.model)
45 | }
46 | \author{
47 | John Storey, Andy Bass
48 | }
49 | \seealso{
50 | \code{\linkS4class{deSet}}, \code{\link{build_study}}
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/man/build_study.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/form_models.R
 3 | \name{build_study}
 4 | \alias{build_study}
 5 | \title{Formulates the experimental models}
 6 | \usage{
 7 | build_study(data, grp = NULL, adj.var = NULL, bio.var = NULL,
 8 |   tme = NULL, ind = NULL, sampling = c("static", "timecourse"),
 9 |   basis.df = 2, basis.type = c("ncs", "poly"))
10 | }
11 | \arguments{
12 | \item{data}{\code{matrix}: gene expression data (rows are genes, columns are
13 | samples).}
14 | 
15 | \item{grp}{\code{vector}: group assignement in the study (for K-class
16 | studies). Optional.}
17 | 
18 | \item{adj.var}{\code{matrix}: adjustment variables. Optional.}
19 | 
20 | \item{bio.var}{\code{matrix}: biological variables. Optional.}
21 | 
22 | \item{tme}{\code{vector}: time variable in a time course study. Optional.}
23 | 
24 | \item{ind}{\code{factor}: individual factor for repeated observations of the
25 | same individuals. Optional.}
26 | 
27 | \item{sampling}{\code{string}: type of study. Either "static" or
28 | "timecourse". Default is "static".}
29 | 
30 | \item{basis.df}{\code{numeric}: degrees of freedom of the basis for time
31 | course study. Default is 2.}
32 | 
33 | \item{basis.type}{\code{string}: either "ncs" (natural cubic spline) or "ps"
34 | (polynomial spline) basis for time course study. Default is "ncs".}
35 | }
36 | \value{
37 | \code{\linkS4class{deSet}} object
38 | }
39 | \description{
40 | \code{build_study} generates the full and null models for users unfamiliar
41 | with building models in R. There are two types of experimental designs:
42 | static and time-course. For more details, refer to the vignette.
43 | }
44 | \examples{
45 | # create ExpressionSet object from kidney dataset
46 | library(splines)
47 | data(kidney)
48 | age <- kidney$age
49 | sex <- kidney$sex
50 | kidexpr <- kidney$kidexpr
51 | 
52 | # create deSet object from data
53 | de_obj <- build_study(data = kidexpr, adj.var = sex, tme = age,
54 | sampling = "timecourse", basis.df = 4)
55 | }
56 | \author{
57 | John Storey, Andy Bass
58 | }
59 | \seealso{
60 | \code{\linkS4class{deSet}}, \code{\link{build_models}}
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/man/deFit-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllClasses.R
 3 | \docType{class}
 4 | \name{deFit-class}
 5 | \alias{deFit-class}
 6 | \title{The differential expression class for the model fits}
 7 | \description{
 8 | Object returned from \code{\link{fit_models}} containing information
 9 | regarding the model fits for the experiment.
10 | }
11 | \section{Slots}{
12 | 
13 | \describe{
14 | \item{\code{fit.full}}{\code{matrix}: containing fitted values for the full model.}
15 | 
16 | \item{\code{fit.null}}{\code{matrix}: containing fitted values for the null model.}
17 | 
18 | \item{\code{res.full}}{\code{matrix}: the residuals of the full model.}
19 | 
20 | \item{\code{res.null}}{\code{matrix}: the residuals of the null model.}
21 | 
22 | \item{\code{dH.full}}{\code{vector}: contains diagonal elements in the projection
23 | matrix for the full model.}
24 | 
25 | \item{\code{beta.coef}}{\code{matrix}: fitted coefficients for the full model.}
26 | 
27 | \item{\code{stat.type}}{\code{string}: information on the statistic of interest.
28 | Currently, the only options are ``lrt'' and ``odp''.}
29 | }}
30 | \section{Methods}{
31 | 
32 |  \describe{
33 |  \item{\code{fitNull(deFit)}}{Access fitted data from null model.}
34 |  \item{\code{fitFull(deFit)}}{Access fitted data from full model.}
35 |  \item{\code{resNull(deFit)}}{Access residuals from null model fit.}
36 |  \item{\code{resFull(deFit)}}{Access residuals from full model fit.}
37 |  \item{\code{betaCoef(deFit)}}{Access beta coefficients in linear model.}
38 |  \item{\code{sType(deFit)}}{Access statistic type of model fitting utilized
39 |  in function.}
40 |  }
41 | }
42 | \author{
43 | John Storey, Jeffrey Leek, Andrew Bass
44 | }
45 | 
46 | 


--------------------------------------------------------------------------------
/man/deSet-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllClasses.R
 3 | \docType{class}
 4 | \name{deSet-class}
 5 | \alias{deSet-class}
 6 | \title{The differential expression class (deSet)}
 7 | \description{
 8 | The \code{deSet} class extends the \code{\link{ExpressionSet}} class.
 9 | While the \code{ExpressionSet} class contains information about the
10 | experiment, the \code{deSet} class contains both experimental information and
11 | additional information relevant for differential expression analysis,
12 | explained below in Slots.
13 | }
14 | \section{Slots}{
15 | 
16 | \describe{
17 | \item{\code{null.model}}{\code{formula}: contains the adjustment variables in the
18 | experiment. The null model is used for comparison when fitting the
19 | full model.}
20 | 
21 | \item{\code{full.model}}{\code{formula}: contains the adjustment variables and the
22 | biological variables of interest.}
23 | 
24 | \item{\code{null.matrix}}{\code{matrix}: the null model as a matrix.}
25 | 
26 | \item{\code{full.matrix}}{\code{matrix}: the full model as a matrix.}
27 | 
28 | \item{\code{individual}}{\code{factor}: contains information on which sample
29 | is from which individual in the experiment.}
30 | 
31 | \item{\code{qvalueObj}}{\code{S3 object}: containing \code{qvalue} object.
32 | See \code{\link{qvalue}} for additional details.}
33 | }}
34 | \note{
35 | See \code{\link{ExpressionSet}} for additional slot information.
36 | }
37 | \section{Methods}{
38 | 
39 |  \describe{
40 |  \item{\code{as(ExpressionSet, "deSet")}}{Coerce objects of
41 |  \code{ExpressionSet} to \code{deSet}.}
42 |  \item{\code{lrt(deSet, ...)}}{Performs a generalized likelihood ratio test
43 |  using the full and null models.}
44 |  \item{\code{odp(deSet, ...)}}{Performs the optimal discovery procedure,
45 |  which is a new approach for optimally performing many hypothesis tests in
46 |  a high-dimensional study.}
47 |  \item{\code{kl_clust(deSet, ...)}}{An implementation of mODP that assigns
48 |  genes to modules based off of the Kullback-Leibler distance.}
49 |  \item{\code{fit_models(deSet, ...)}}{Fits a linear model to each gene by
50 |  method of least squares.}
51 |  \item{\code{apply_qvalue(deSet, ...)}}{Applies \code{\link{qvalue}}
52 |  function.}
53 |  \item{\code{apply_snm(deSet, ...)}}{Applies surpervised normalization of
54 |   microarrays (\code{\link{snm}}) on gene expression data.}
55 |  \item{\code{apply_sva(deSet, ...)}}{Applies surrogate variable analysis
56 |  (\code{\link{sva}}).}
57 |  \item{\code{fullMatrix(deSet)}}{Access and set full matrix.}
58 |  \item{\code{nullMatrix(deSet)}}{Access and set null matrix.}
59 |  \item{\code{fullModel(deSet)}}{Access and set full model.}
60 |  \item{\code{nullModel(deSet)}}{Access and set null model.}
61 |  \item{\code{individual(deSet)}}{Access and set individual slot.}
62 |  \item{\code{qvalueObj(deSet)}}{Access \code{qvalue} object.
63 |  See \code{\link{qvalue}}.}
64 |  \item{\code{validObject(deSet)}}{Check validity of \code{deSet} object.}
65 |  }
66 | }
67 | \author{
68 | John Storey, Jeffrey Leek, Andrew Bass
69 | }
70 | 
71 | 


--------------------------------------------------------------------------------
/man/deSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/ExpressionSet-methods.R
 3 | \docType{methods}
 4 | \name{deSet}
 5 | \alias{deSet}
 6 | \alias{deSet,ExpressionSet-method}
 7 | \title{Create a deSet object from an ExpressionSet}
 8 | \usage{
 9 | deSet(object, full.model, null.model, individual = NULL)
10 | 
11 | \S4method{deSet}{ExpressionSet}(object, full.model, null.model,
12 |   individual = NULL)
13 | }
14 | \arguments{
15 | \item{object}{\code{S4 object}: \code{\link{ExpressionSet}}}
16 | 
17 | \item{full.model}{\code{formula}: full model containing the both the
18 | adjustment and the biological variables for the experiment.}
19 | 
20 | \item{null.model}{\code{formula}: null model containing the adjustment
21 | variables for the experiment.}
22 | 
23 | \item{individual}{\code{factor}: information on repeated samples in
24 | experiment.}
25 | }
26 | \value{
27 | \code{\linkS4class{deSet}} object
28 | }
29 | \description{
30 | Creates a \code{\linkS4class{deSet}} object that extends the
31 | \code{\link{ExpressionSet}} object.
32 | }
33 | \note{
34 | It is essential that the null and full models have the same variables
35 | as the ExpressionSet phenoType column names.
36 | }
37 | \examples{
38 | # import data
39 | library(splines)
40 | data(kidney)
41 | age <- kidney$age
42 | sex <- kidney$sex
43 | kidexpr <- kidney$kidexpr
44 | cov <- data.frame(sex = sex, age = age)
45 | pDat <- as(cov, "AnnotatedDataFrame")
46 | exp_set <- ExpressionSet(assayData = kidexpr, phenoData = pDat)
47 | 
48 | # create models
49 | null_model <- ~sex
50 | full_model <- ~sex + ns(age, df = 4)
51 | 
52 | # create deSet object from data
53 | de_obj <- deSet(exp_set, null.model = null_model,
54 | full.model = full_model)
55 | 
56 | # optionally add individuals to experiment, in this case there are 36
57 | # individuals that were sampled twice
58 | indSamples <- as.factor(rep(1:36, each = 2))
59 | de_obj <- deSet(exp_set, null.model = null_model,
60 | full.model = full_model, ind = indSamples)
61 | summary(de_obj)
62 | }
63 | \author{
64 | John Storey, Andrew Bass
65 | }
66 | \seealso{
67 | \code{\linkS4class{deSet}}, \code{\link{odp}} and
68 | \code{\link{lrt}}
69 | }
70 | 
71 | 


--------------------------------------------------------------------------------
/man/edge.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/edge.R
 3 | \docType{package}
 4 | \name{edge}
 5 | \alias{edge}
 6 | \alias{edge-package}
 7 | \title{Extraction of Differential Gene Expression}
 8 | \description{
 9 | The edge package implements methods for carrying out differential
10 | expression analyses of genome-wide gene expression studies. Significance
11 | testing using the optimal discovery procedure and generalized likelihood
12 | ratio tests (equivalent to F-tests and t-tests) are implemented for general study
13 | designs. Special functions are available to facilitate the analysis of
14 | common study designs, including time course experiments. Other packages
15 | such as snm, sva, and qvalue are integrated in edge to provide a wide range
16 | of tools for gene expression analysis.
17 | }
18 | \examples{
19 | \dontrun{
20 | browseVignettes("edge")
21 | }
22 | }
23 | \author{
24 | John Storey, Jeffrey Leek, Andrew Bass
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/man/endotoxin.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/edge.R
 3 | \docType{data}
 4 | \name{endotoxin}
 5 | \alias{endotoxin}
 6 | \title{Gene expression dataset from Calvano et al. (2005) Nature}
 7 | \format{\itemize{
 8 |   \item endoexpr: A 500 rows by 46 columns data frame containing expression
 9 |   values.
10 |   \item class: A vector of length 46 containing information about which
11 |   individuals were given endotoxin.
12 |   \item ind: A vector of length 46 providing indexing measurements for each
13 |   individual in the experiment.
14 |   \item time: A vector of length 46 indicating time measurements.
15 | }}
16 | \usage{
17 | data(endotoxin)
18 | }
19 | \value{
20 | endotoxin dataset
21 | }
22 | \description{
23 | The data provide gene expression measurements in an endotoxin study where
24 | four subjects were given endotoxin and four subjects were given a placebo.
25 | Blood samples were collected and leukocytes were isolated from the samples
26 | before infusion and at times 2, 4, 6, 9, 24 hours.
27 | }
28 | \note{
29 | The data is a random subset of 500 genes from the full dataset. To
30 | download the full data set, go to \url{http://genomine.org/edge/}.
31 | }
32 | \examples{
33 | library(splines)
34 | # import data
35 | data(endotoxin)
36 | ind <- endotoxin$ind
37 | class <- endotoxin$class
38 | time <- endotoxin$time
39 | endoexpr <- endotoxin$endoexpr
40 | cov <- data.frame(individual = ind, time = time, class = class)
41 | 
42 | # formulate null and full models in experiement
43 | # note: interaction term is a way of taking into account group effects
44 | mNull <- ~ns(time, df=4, intercept = FALSE) + class
45 | mFull <- ~ns(time, df=4, intercept = FALSE) +
46 |           ns(time, df=4, intercept = FALSE):class + class
47 | 
48 | # create deSet object
49 | de_obj <- build_models(endoexpr, cov = cov, full.model = mFull,
50 |                        null.model = mNull, ind = ind)
51 | 
52 | # Perform ODP/lrt statistic to determine significant genes in study
53 | de_odp <- odp(de_obj, bs.its = 10)
54 | de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 10)
55 | 
56 | # summarize significance results
57 | summary(de_odp)
58 | }
59 | \references{
60 | Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance
61 | analysis of time course microarray experiments. PNAS, 102: 12837-12842. \cr
62 | \url{http://www.pnas.org/content/100/16/9440.full}
63 | }
64 | \keyword{datasets}
65 | 
66 | 


--------------------------------------------------------------------------------
/man/fitFull.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R
 3 | \docType{methods}
 4 | \name{fitFull}
 5 | \alias{fitFull}
 6 | \alias{fitFull,deFit-method}
 7 | \title{Fitted data from the full model}
 8 | \usage{
 9 | fitFull(object)
10 | 
11 | \S4method{fitFull}{deFit}(object)
12 | }
13 | \arguments{
14 | \item{object}{\code{S4 object}: \code{\linkS4class{deFit}}}
15 | }
16 | \value{
17 | \code{fitFull} returns a matrix of fitted values from full model.
18 | }
19 | \description{
20 | Access the fitted data from the full model in a
21 | \code{\linkS4class{deFit}} object.
22 | }
23 | \examples{
24 | # import data
25 | library(splines)
26 | data(kidney)
27 | age <- kidney$age
28 | sex <- kidney$sex
29 | kidexpr <- kidney$kidexpr
30 | cov <- data.frame(sex = sex, age = age)
31 | 
32 | # create models
33 | null_model <- ~sex
34 | full_model <- ~sex + ns(age, df = 4)
35 | 
36 | # create deSet object from data
37 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
38 | full.model = full_model)
39 | 
40 | # run fit_models to get model fits
41 | de_fit <- fit_models(de_obj)
42 | 
43 | # extract fitted values for full model
44 | fitted_full <- fitFull(de_fit)
45 | }
46 | \author{
47 | John Storey, Andrew Bass
48 | }
49 | \seealso{
50 | \code{\link{fit_models}}
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/man/fitNull.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R
 3 | \docType{methods}
 4 | \name{fitNull}
 5 | \alias{fitNull}
 6 | \alias{fitNull,deFit-method}
 7 | \title{Fitted data from the null model}
 8 | \usage{
 9 | fitNull(object)
10 | 
11 | \S4method{fitNull}{deFit}(object)
12 | }
13 | \arguments{
14 | \item{object}{\code{S4 object}: \code{\linkS4class{deFit}}}
15 | }
16 | \value{
17 | \code{fitNull} returns a matrix of fitted values from null model.
18 | }
19 | \description{
20 | Access the fitted data from the null model in an
21 | \code{\linkS4class{deFit}} object.
22 | }
23 | \examples{
24 | # import data
25 | library(splines)
26 | data(kidney)
27 | age <- kidney$age
28 | sex <- kidney$sex
29 | kidexpr <- kidney$kidexpr
30 | cov <- data.frame(sex = sex, age = age)
31 | 
32 | # create models
33 | null_model <- ~sex
34 | full_model <- ~sex + ns(age, df = 4)
35 | 
36 | # create deSet object from data
37 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
38 | full.model = full_model)
39 | 
40 | # run fit_models to get model fits
41 | de_fit <- fit_models(de_obj)
42 | 
43 | # extract fitted values from null model
44 | fitted_null <- fitNull(de_fit)
45 | }
46 | \author{
47 | John Storey, Andrew Bass
48 | }
49 | \seealso{
50 | \code{\link{fit_models}}
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/man/fit_models.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R
 3 | \docType{methods}
 4 | \name{fit_models}
 5 | \alias{fit_models}
 6 | \alias{fit_models,deSet-method}
 7 | \title{Linear regression of the null and full models}
 8 | \usage{
 9 | fit_models(object, stat.type = c("lrt", "odp"), weights = NULL)
10 | 
11 | \S4method{fit_models}{deSet}(object, stat.type = c("lrt", "odp"),
12 |   weights = NULL)
13 | }
14 | \arguments{
15 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}.}
16 | 
17 | \item{stat.type}{\code{character}: type of statistic to be used. Either
18 | "lrt" or "odp". Default is "lrt".}
19 | 
20 | \item{weights}{\code{matrix}: weights for each observation. Default is NULL.}
21 | }
22 | \value{
23 | \code{\linkS4class{deFit}} object
24 | }
25 | \description{
26 | \code{fit_models} fits a model matrix to each gene by using the least
27 | squares method. Model fits can be either statistic type "odp" (optimal
28 | discovery procedure) or "lrt" (likelihood ratio test).
29 | }
30 | \details{
31 | If "odp" method is implemented then the null model is removed from the full
32 | model (see Storey 2007).  Otherwise, the statistic type has no affect on the
33 | model fit.
34 | }
35 | \note{
36 | \code{fit_models} does not have to be called by the user to use
37 | \code{\link{odp}}, \code{\link{lrt}} or \code{\link{kl_clust}} as it is an
38 | optional input and is implemented in the methods. The
39 | \code{\linkS4class{deFit}} object can be created by the user if a different
40 | statistical implementation is required.
41 | }
42 | \examples{
43 | # import data
44 | library(splines)
45 | data(kidney)
46 | age <- kidney$age
47 | sex <- kidney$sex
48 | kidexpr <- kidney$kidexpr
49 | cov <- data.frame(sex = sex, age = age)
50 | 
51 | # create models
52 | null_model <- ~sex
53 | full_model <- ~sex + ns(age, df = 4)
54 | 
55 | # create deSet object from data
56 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
57 | full.model = full_model)
58 | 
59 | # retrieve statistics from linear regression for each gene
60 | fit_lrt <- fit_models(de_obj, stat.type = "lrt") # lrt method
61 | fit_odp <- fit_models(de_obj, stat.type = "odp") # odp method
62 | 
63 | # summarize object
64 | summary(fit_odp)
65 | }
66 | \author{
67 | John Storey
68 | }
69 | \references{
70 | Storey JD. (2007) The optimal discovery procedure: A new approach to
71 | simultaneous significance testing. Journal of the Royal Statistical
72 | Society, Series B, 69: 347-368.
73 | 
74 | Storey JD, Dai JY, and Leek JT. (2007) The optimal discovery procedure for
75 | large-scale significance testing, with applications to comparative
76 | microarray experiments. Biostatistics, 8: 414-432.
77 | 
78 | Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance
79 | analysis of time course microarray experiments. Proceedings of the National
80 | Academy of Sciences, 102: 12837-12842.
81 | }
82 | \seealso{
83 | \code{\linkS4class{deFit}}, \code{\link{odp}} and
84 | \code{\link{lrt}}
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/man/fullMatrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R, R/setMethods.R
 3 | \docType{methods}
 4 | \name{fullMatrix}
 5 | \alias{fullMatrix}
 6 | \alias{fullMatrix,deSet-method}
 7 | \alias{fullMatrix<-}
 8 | \alias{fullMatrix<-,deSet-method}
 9 | \title{Matrix representation of full model}
10 | \usage{
11 | fullMatrix(object)
12 | 
13 | fullMatrix(object) <- value
14 | 
15 | \S4method{fullMatrix}{deSet}(object)
16 | 
17 | \S4method{fullMatrix}{deSet}(object) <- value
18 | }
19 | \arguments{
20 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}}
21 | 
22 | \item{value}{\code{matrix}: full model matrix where the columns are the
23 | covariates and rows are observations}
24 | }
25 | \value{
26 | \code{fullMatrix} returns the value of the full model matrix.
27 | }
28 | \description{
29 | These generic functions access and set the full matrix for
30 | \code{\linkS4class{deSet}} object.
31 | }
32 | \examples{
33 | # import data
34 | library(splines)
35 | data(kidney)
36 | age <- kidney$age
37 | sex <- kidney$sex
38 | kidexpr <- kidney$kidexpr
39 | cov <- data.frame(sex = sex, age = age)
40 | 
41 | # create models
42 | null_model <- ~sex
43 | full_model <- ~sex + ns(age, df = 4)
44 | 
45 | # create deSet object from data
46 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
47 | full.model = full_model)
48 | 
49 | # extract the full model equation as a matrix
50 | mat_full <- fullMatrix(de_obj)
51 | }
52 | \author{
53 | Andrew Bass, John Storey
54 | }
55 | \seealso{
56 | \code{\linkS4class{deSet}}, \code{\link{fullModel}}
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/man/fullModel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R, R/setMethods.R
 3 | \docType{methods}
 4 | \name{fullModel}
 5 | \alias{fullModel}
 6 | \alias{fullModel,deSet-method}
 7 | \alias{fullModel<-}
 8 | \alias{fullModel<-,deSet-method}
 9 | \title{Full model equation}
10 | \usage{
11 | fullModel(object)
12 | 
13 | fullModel(object) <- value
14 | 
15 | \S4method{fullModel}{deSet}(object)
16 | 
17 | \S4method{fullModel}{deSet}(object) <- value
18 | }
19 | \arguments{
20 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}}
21 | 
22 | \item{value}{\code{formula}: The experiment design for the full model.}
23 | }
24 | \value{
25 | the formula for the full model.
26 | }
27 | \description{
28 | These generic functions access and set the full model for
29 | \code{\linkS4class{deSet}} object.
30 | }
31 | \examples{
32 | # import data
33 | library(splines)
34 | data(kidney)
35 | age <- kidney$age
36 | sex <- kidney$sex
37 | kidexpr <- kidney$kidexpr
38 | cov <- data.frame(sex = sex, age = age)
39 | 
40 | # create models
41 | null_model <- ~sex
42 | full_model <- ~sex + ns(age, df = 4)
43 | 
44 | # create deSet object from data
45 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
46 | full.model = full_model)
47 | 
48 | # extract out the full model equation
49 | mod_full <- fullModel(de_obj)
50 | 
51 | # change the full model in the experiment
52 | fullModel(de_obj) <- ~sex + ns(age, df = 2)
53 | }
54 | \author{
55 | John Storey, Andrew Bass
56 | }
57 | \seealso{
58 | \code{\linkS4class{deSet}}
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/man/gibson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/edge.R
 3 | \docType{data}
 4 | \name{gibson}
 5 | \alias{gibson}
 6 | \title{Gene expression dataset from Idaghdour et al. (2008)}
 7 | \format{\itemize{
 8 |   \item batch: Batches in experiment.
 9 |   \item location: Environment/lifestyle of Moroccan Amazigh groups.
10 |   \item gender: Sex of individuals.
11 |   \item gibexpr: A 500 rows by 46 columns matrix of gene expression values.
12 | }}
13 | \usage{
14 | data(gibson)
15 | }
16 | \value{
17 | gibson dataset
18 | }
19 | \description{
20 | The data provide gene expression measurements in peripheral blood leukocyte
21 | samples from three Moroccan groups leading distinct ways of life:
22 | desert nomadic (DESERT), mountain agrarian (VILLAGE), and coastal urban
23 | (AGADIR).
24 | }
25 | \note{
26 | These data are a random subset of 500 genes from the total number of genes
27 | in the original data set. To download the full data set, go to
28 | \url{http://genomine.org/de/}.
29 | }
30 | \examples{
31 | # import
32 | data(gibson)
33 | batch <- gibson$batch
34 | gender <- gibson$gender
35 | location <- gibson$location
36 | gibexpr <- gibson$gibexpr
37 | cov <- data.frame(Batch = batch, Gender = gender,
38 | Location = location)
39 | 
40 | # create deSet for experiment- static experiment
41 | mNull <- ~Gender + Batch
42 | mFull <- ~Gender + Batch + Location
43 | 
44 | # create deSet object
45 | de_obj <- build_models(gibexpr, cov = cov, full.model = mFull,
46 | null.model = mNull)
47 | 
48 | # Perform ODP/lrt statistic to determine significant genes in study
49 | de_odp <- odp(de_obj, bs.its = 10)
50 | de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 10)
51 | 
52 | # summarize significance results
53 | summary(de_odp)
54 | }
55 | \references{
56 | Idaghdour Y, Storey JD, Jadallah S, and Gibson G. (2008) A genome-wide gene
57 | expression signature of lifestyle in peripheral blood of Moroccan Amazighs.
58 | PLoS Genetics, 4: e1000052.
59 | }
60 | \keyword{datasets}
61 | 
62 | 


--------------------------------------------------------------------------------
/man/individual.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R, R/setMethods.R
 3 | \docType{methods}
 4 | \name{individual}
 5 | \alias{individual}
 6 | \alias{individual,deSet-method}
 7 | \alias{individual<-}
 8 | \alias{individual<-,deSet-method}
 9 | \title{Individuals sampled in experiment}
10 | \usage{
11 | individual(object)
12 | 
13 | individual(object) <- value
14 | 
15 | \S4method{individual}{deSet}(object)
16 | 
17 | \S4method{individual}{deSet}(object) <- value
18 | }
19 | \arguments{
20 | \item{object}{\code{\linkS4class{deSet}}}
21 | 
22 | \item{value}{\code{factor}: Identifies which samples correspond to which
23 |   individuals. Important if the same individuals are sampled multiple times
24 |   in a longitudinal fashion.}
25 | }
26 | \value{
27 | \code{individual} returns information regarding dinstinct individuals
28 |   sampled in the experiment.
29 | }
30 | \description{
31 | These generic functions access and set the individual slot in
32 | \code{\linkS4class{deSet}}.
33 | }
34 | \examples{
35 | library(splines)
36 | # import data
37 | data(endotoxin)
38 | ind <- endotoxin$ind
39 | time <- endotoxin$time
40 | class <- endotoxin$class
41 | endoexpr <- endotoxin$endoexpr
42 | cov <- data.frame(individual = ind, time = time, class = class)
43 | 
44 | # create ExpressionSet object
45 | pDat <- as(cov, "AnnotatedDataFrame")
46 | exp_set <- ExpressionSet(assayData = endoexpr, phenoData = pDat)
47 | 
48 | # formulate null and full models in experiement
49 | # note: interaction term is a way of taking into account group effects
50 | mNull <- ~ns(time, df=4, intercept = FALSE)
51 | mFull <- ~ns(time, df=4, intercept = FALSE) +
52 | ns(time, df=4, intercept = FALSE):class + class
53 | 
54 | # create deSet object
55 | de_obj <- deSet(exp_set, full.model = mFull, null.model = mNull,
56 | individual = ind)
57 | 
58 | # extract out the individuals factor
59 | ind_exp <- individual(de_obj)
60 | }
61 | \author{
62 | John Storey, Andrew Bass
63 | }
64 | \seealso{
65 | \code{\linkS4class{deSet}}
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/man/kidney.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/edge.R
 3 | \docType{data}
 4 | \name{kidney}
 5 | \alias{kidney}
 6 | \title{Gene expression dataset from Rodwell et al. (2004)}
 7 | \format{\itemize{
 8 |   \item kidcov: A 133 rows by 6 columns data frame detailing the study
 9 |   design.
10 |   \item kidexpr: A 500 rows by 133 columns matrix of gene expression values,
11 |   where each row corresponds to a different probe-set and each column to a
12 |   different tissue sample.
13 |   \item age: A vector of length 133 giving the age of each sample.
14 |   \item sex: A vector of length 133 giving the sex of each sample.
15 | }}
16 | \usage{
17 | data(kidney)
18 | }
19 | \value{
20 | kidney dataset
21 | }
22 | \description{
23 | Gene expression measurements from kidney samples were obtained from 72
24 | human subjects ranging in age from 27 to 92 years. Only one array was
25 | obtained per individual, and the age and sex of each individual were
26 | recorded.
27 | }
28 | \note{
29 | These data are a random subset of 500 probe-sets from the total number of
30 | probe-sets in the original data set. To download the full data set, go to
31 | \url{http://genomine.org/edge/}. The \code{age} and \code{sex} are contained
32 | in \code{kidcov} data frame.
33 | }
34 | \examples{
35 | # import data
36 | data(kidney)
37 | sex <- kidney$sex
38 | age <- kidney$age
39 | kidexpr <- kidney$kidexpr
40 | 
41 | # create model
42 | de_obj <- build_study(data = kidexpr, adj.var = sex, tme = age,
43 | sampling = "timecourse", basis.df = 4)
44 | 
45 | # use the ODP/lrt method to determine significant genes
46 | de_odp <- odp(de_obj, bs.its=10)
47 | de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 10)
48 | 
49 | # summarize significance results
50 | summary(de_odp)
51 | }
52 | \references{
53 | Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance
54 | analysis of time course microarray experiments. PNAS, 102: 12837-12842. \cr
55 | \url{http://www.pnas.org/content/100/16/9440.full}
56 | }
57 | \keyword{datasets}
58 | 
59 | 


--------------------------------------------------------------------------------
/man/kl_clust.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R
 3 | \docType{methods}
 4 | \name{kl_clust}
 5 | \alias{kl_clust}
 6 | \alias{kl_clust,deSet,deFit-method}
 7 | \alias{kl_clust,deSet,missing-method}
 8 | \title{Modular optimal discovery procedure (mODP)}
 9 | \usage{
10 | kl_clust(object, de.fit = NULL, n.mods = 50)
11 | 
12 | \S4method{kl_clust}{deSet,missing}(object, de.fit = NULL, n.mods = 50)
13 | 
14 | \S4method{kl_clust}{deSet,deFit}(object, de.fit = NULL, n.mods = 50)
15 | }
16 | \arguments{
17 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}.}
18 | 
19 | \item{de.fit}{\code{S4 object}: \code{\linkS4class{deFit}}.}
20 | 
21 | \item{n.mods}{\code{integer}: number of modules (i.e., clusters).}
22 | }
23 | \value{
24 | A list with the following slots:
25 | \itemize{
26 |   \item {mu.full: cluster averaged fitted values from full model.}
27 |   \item {mu.null: cluster averaged fitted values from null model.}
28 |   \item {sig.full: cluster standard deviations from full model.}
29 |   \item {sig.null: cluster standard deviations from null model.}
30 |   \item {n.per.mod: total members in each cluster.}
31 |   \item {clustMembers: cluster membership for each gene.}
32 | }
33 | }
34 | \description{
35 | \code{kl_clust} is an implementation of mODP that assigns genes to modules
36 | based on of the Kullback-Leibler distance.
37 | }
38 | \details{
39 | mODP utilizes a k-means clustering algorithm where genes are
40 | assigned to a cluster based on the Kullback-Leiber distance. Each gene is
41 | assigned an module-average parameter to calculate the ODP score (See Woo,
42 | Leek and Storey 2010 for more details). The mODP and full ODP produce nearly
43 | exact results but mODP has the advantage of being computationally
44 | faster.
45 | }
46 | \note{
47 | The results are generally insensitive to the number of modules after a
48 |   certain threshold of about n.mods>=50 in our experience. It is recommended
49 |   that users experiment with the number of modules. If the number of modules
50 |   is equal to the number of genes then the original ODP is implemented.
51 | }
52 | \examples{
53 | # import data
54 | library(splines)
55 | data(kidney)
56 | age <- kidney$age
57 | sex <- kidney$sex
58 | kidexpr <- kidney$kidexpr
59 | cov <- data.frame(sex = sex, age = age)
60 | 
61 | # create models
62 | null_model <- ~sex
63 | full_model <- ~sex + ns(age, df = 4)
64 | 
65 | # create deSet object from data
66 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
67 | full.model = full_model)
68 | 
69 | # mODP method
70 | de_clust <- kl_clust(de_obj)
71 | 
72 | # change the number of clusters
73 | de_clust <- kl_clust(de_obj, n.mods = 10)
74 | 
75 | # input a deFit object
76 | de_fit <- fit_models(de_obj, stat.type = "odp")
77 | de_clust <- kl_clust(de_obj, de.fit = de_fit)
78 | }
79 | \author{
80 | John Storey, Jeffrey Leek
81 | }
82 | \references{
83 | Storey JD. (2007) The optimal discovery procedure: A new approach to
84 | simultaneous significance testing. Journal of the Royal Statistical
85 | Society, Series B, 69: 347-368.
86 | 
87 | Storey JD, Dai JY, and Leek JT. (2007) The optimal discovery procedure for
88 | large-scale significance testing, with applications to comparative
89 | microarray experiments. Biostatistics, 8: 414-432.
90 | 
91 | Woo S, Leek JT, Storey JD (2010) A computationally efficient modular optimal
92 |  discovery procedure. Bioinformatics, 27(4): 509-515.
93 | }
94 | \seealso{
95 | \code{\link{odp}}, \code{\link{fit_models}}
96 | }
97 | 
98 | 


--------------------------------------------------------------------------------
/man/lrt.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2 (4.1.1): do not edit by hand
  2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R
  3 | \docType{methods}
  4 | \name{lrt}
  5 | \alias{lrt}
  6 | \alias{lrt,deSet,deFit-method}
  7 | \alias{lrt,deSet,missing-method}
  8 | \title{Performs F-test (likelihood ratio test using Normal likelihood)}
  9 | \usage{
 10 | lrt(object, de.fit, nullDistn = c("normal", "bootstrap"), weights = NULL,
 11 |   bs.its = 100, seed = NULL, verbose = TRUE, mod.F = FALSE, ...)
 12 | 
 13 | \S4method{lrt}{deSet,missing}(object, de.fit, nullDistn = c("normal",
 14 |   "bootstrap"), weights = NULL, bs.its = 100, seed = NULL,
 15 |   verbose = TRUE, mod.F = FALSE, ...)
 16 | 
 17 | \S4method{lrt}{deSet,deFit}(object, de.fit, nullDistn = c("normal",
 18 |   "bootstrap"), weights = NULL, bs.its = 100, seed = NULL,
 19 |   verbose = TRUE, mod.F = FALSE, ...)
 20 | }
 21 | \arguments{
 22 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}.}
 23 | 
 24 | \item{de.fit}{\code{S4 object}: \code{\linkS4class{deFit}}. Optional.}
 25 | 
 26 | \item{nullDistn}{\code{character}: either "normal" or "bootstrap", If
 27 | "normal" then the p-values are calculated using the F distribution. If
 28 | "bootstrap" then a bootstrap algorithm is implemented to simulate
 29 | statistics from the null distribution. In the "bootstrap" case, empirical
 30 | p-values are calculated using the observed and null statistics (see
 31 | \code{\link{empPvals}}). Default is "normal".}
 32 | 
 33 | \item{weights}{\code{matrix}: weights for each observation. Default is NULL.}
 34 | 
 35 | \item{bs.its}{\code{integer}: number of null statistics generated (only
 36 | applicable for "bootstrap" method). Default is 100.}
 37 | 
 38 | \item{seed}{\code{integer}: set the seed value. Default is NULL.}
 39 | 
 40 | \item{verbose}{\code{boolean}: print iterations for bootstrap method.
 41 | Default is TRUE.}
 42 | 
 43 | \item{mod.F}{\code{boolean}: Moderated F-test, recommended for experiments
 44 | with a small sample size. Default is FALSE.}
 45 | 
 46 | \item{...}{Additional arguments for \code{\link{apply_qvalue}} and
 47 | \code{\link{empPvals}} function.}
 48 | }
 49 | \value{
 50 | \code{\linkS4class{deSet}} object
 51 | }
 52 | \description{
 53 | \code{lrt} performs a generalized likelihood ratio test using the full and
 54 | null models.
 55 | }
 56 | \details{
 57 | \code{lrt} fits the full and null models to each gene using the
 58 | function \code{\link{fit_models}} and then performs a likelihood ratio test.
 59 | The user has the option to calculate p-values a Normal distribution
 60 | assumption or through a bootstrap algorithm. If \code{nullDistn} is
 61 | "bootstrap" then empirical p-values will be determined from the
 62 | \code{\link{qvalue}} package (see \code{\link{empPvals}}).
 63 | }
 64 | \examples{
 65 | # import data
 66 | library(splines)
 67 | data(kidney)
 68 | age <- kidney$age
 69 | sex <- kidney$sex
 70 | kidexpr <- kidney$kidexpr
 71 | cov <- data.frame(sex = sex, age = age)
 72 | 
 73 | # create models
 74 | null_model <- ~sex
 75 | full_model <- ~sex + ns(age, df = 4)
 76 | 
 77 | # create deSet object from data
 78 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
 79 | full.model = full_model)
 80 | 
 81 | # lrt method
 82 | de_lrt <- lrt(de_obj, nullDistn = "normal")
 83 | 
 84 | # to generate p-values from bootstrap
 85 | de_lrt <- lrt(de_obj, nullDistn = "bootstrap", bs.its = 30)
 86 | 
 87 | # input a deFit object directly
 88 | de_fit <- fit_models(de_obj, stat.type = "lrt")
 89 | de_lrt <- lrt(de_obj, de.fit = de_fit)
 90 | 
 91 | # summarize object
 92 | summary(de_lrt)
 93 | }
 94 | \author{
 95 | John Storey, Andrew Bass
 96 | }
 97 | \references{
 98 | Storey JD, Xiao W, Leek JT, Tompkins RG, and Davis RW. (2005) Significance
 99 | analysis of time course microarray experiments. Proceedings of the National
100 | Academy of Sciences, 102: 12837-12842.
101 | 
102 | \url{http://en.wikipedia.org/wiki/Likelihood-ratio_test}
103 | }
104 | \seealso{
105 | \code{\linkS4class{deSet}}, \code{\link{build_models}},
106 | \code{\link{odp}}
107 | }
108 | 
109 | 


--------------------------------------------------------------------------------
/man/nullMatrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R, R/setMethods.R
 3 | \docType{methods}
 4 | \name{nullMatrix}
 5 | \alias{nullMatrix}
 6 | \alias{nullMatrix,deSet-method}
 7 | \alias{nullMatrix<-}
 8 | \alias{nullMatrix<-,deSet-method}
 9 | \title{Matrix representation of null model}
10 | \usage{
11 | nullMatrix(object)
12 | 
13 | nullMatrix(object) <- value
14 | 
15 | \S4method{nullMatrix}{deSet}(object)
16 | 
17 | \S4method{nullMatrix}{deSet}(object) <- value
18 | }
19 | \arguments{
20 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}}
21 | 
22 | \item{value}{\code{matrix}: null model matrix where columns are covariates
23 | and rows are observations}
24 | }
25 | \value{
26 | \code{nullMatrix} returns the value of the null model matrix.
27 | }
28 | \description{
29 | These generic functions access and set the null matrix for
30 | \code{\linkS4class{deSet}} object.
31 | }
32 | \examples{
33 | # import data
34 | library(splines)
35 | data(kidney)
36 | age <- kidney$age
37 | sex <- kidney$sex
38 | kidexpr <- kidney$kidexpr
39 | cov <- data.frame(sex = sex, age = age)
40 | 
41 | # create models
42 | null_model <- ~sex
43 | full_model <- ~sex + ns(age, df = 4)
44 | 
45 | # create deSet object from data
46 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
47 | full.model = full_model)
48 | 
49 | # extract the null model as a matrix
50 | mat_null <- nullMatrix(de_obj)
51 | }
52 | \author{
53 | John Storey, Andrew Bass
54 | }
55 | \seealso{
56 | \code{\linkS4class{deSet}}, \code{\link{fullModel}} and
57 | \code{\link{fullModel}}
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/man/nullModel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R, R/setMethods.R
 3 | \docType{methods}
 4 | \name{nullModel}
 5 | \alias{nullModel}
 6 | \alias{nullModel,deSet-method}
 7 | \alias{nullModel<-}
 8 | \alias{nullModel<-,deSet-method}
 9 | \title{Null model equation from deSet object}
10 | \usage{
11 | nullModel(object)
12 | 
13 | nullModel(object) <- value
14 | 
15 | \S4method{nullModel}{deSet}(object)
16 | 
17 | \S4method{nullModel}{deSet}(object) <- value
18 | }
19 | \arguments{
20 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}}
21 | 
22 | \item{value}{\code{formula}: The experiment design for the null model.}
23 | }
24 | \value{
25 | \code{nullModel} returns the formula for the null model.
26 | }
27 | \description{
28 | These generic functions access and set the null model for
29 | \code{\linkS4class{deSet}} object.
30 | }
31 | \examples{
32 | # import data
33 | library(splines)
34 | data(kidney)
35 | age <- kidney$age
36 | sex <- kidney$sex
37 | kidexpr <- kidney$kidexpr
38 | cov <- data.frame(sex = sex, age = age)
39 | 
40 | # create models
41 | null_model <- ~sex
42 | full_model <- ~sex + ns(age, df = 4)
43 | 
44 | # create deSet object from data
45 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
46 | full.model = full_model)
47 | 
48 | # extract the null model equation
49 | mod_null <- nullModel(de_obj)
50 | 
51 | # change null model in experiment but must update full model
52 | nullModel(de_obj) <- ~1
53 | fullModel(de_obj) <- ~1 + ns(age, df=4)
54 | }
55 | \author{
56 | John Storey, Andrew Bass
57 | }
58 | \seealso{
59 | \code{\linkS4class{deSet}}
60 | }
61 | \keyword{nullModel,}
62 | \keyword{nullModel<-}
63 | 
64 | 


--------------------------------------------------------------------------------
/man/odp.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2 (4.1.1): do not edit by hand
  2 | % Please edit documentation in R/AllGenerics.R, R/deSet-methods.R
  3 | \docType{methods}
  4 | \name{odp}
  5 | \alias{odp}
  6 | \alias{odp,deSet,deFit-method}
  7 | \alias{odp,deSet,missing-method}
  8 | \title{The optimal discovery procedure}
  9 | \usage{
 10 | odp(object, de.fit, odp.parms = NULL, weights = NULL, bs.its = 100,
 11 |   n.mods = 50, seed = NULL, verbose = TRUE, ...)
 12 | 
 13 | \S4method{odp}{deSet,missing}(object, de.fit, odp.parms = NULL,
 14 |   weights = NULL, bs.its = 100, n.mods = 50, seed = NULL,
 15 |   verbose = TRUE, ...)
 16 | 
 17 | \S4method{odp}{deSet,deFit}(object, de.fit, odp.parms = NULL,
 18 |   weights = NULL, bs.its = 100, n.mods = 50, seed = NULL,
 19 |   verbose = TRUE, ...)
 20 | }
 21 | \arguments{
 22 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}}
 23 | 
 24 | \item{de.fit}{\code{S4 object}: \code{\linkS4class{deFit}}. Optional.}
 25 | 
 26 | \item{odp.parms}{\code{list}: parameters for each cluster. See
 27 | \code{\link{kl_clust}}.}
 28 | 
 29 | \item{weights}{\code{matrix}: weights for each observation. Default is NULL.}
 30 | 
 31 | \item{bs.its}{\code{numeric}: number of null bootstrap iterations. Default
 32 | is 100.}
 33 | 
 34 | \item{n.mods}{\code{integer}: number of clusters used in
 35 | \code{\link{kl_clust}}. Default is 50.}
 36 | 
 37 | \item{seed}{\code{integer}: set the seed value. Default is NULL.}
 38 | 
 39 | \item{verbose}{\code{boolean}: print iterations for bootstrap method.
 40 | Default is TRUE.}
 41 | 
 42 | \item{...}{Additional arguments for \code{\link{qvalue}} and
 43 | \code{\link{empPvals}}.}
 44 | }
 45 | \value{
 46 | \code{\linkS4class{deSet}} object
 47 | }
 48 | \description{
 49 | \code{odp} performs the optimal discovery procedure, which is a framework for
 50 | optimally performing many hypothesis tests in a high-dimensional study. When
 51 | testing whether a feature is significant, the optimal discovery procedure
 52 | uses information across all features when testing for significance.
 53 | }
 54 | \details{
 55 | The full ODP estimator computationally grows quadratically with respect to
 56 | the number of genes. This becomes computationally taxing at a certain point.
 57 | Therefore, an alternative method called mODP is used which has been shown to
 58 | provide results that are very similar. mODP utilizes a clustering algorithm
 59 | where genes are assigned to a cluster based on the Kullback-Leiber distance.
 60 | Each gene is assigned an module-average parameter to calculate the ODP score
 61 | and it reduces the computations time to approximately linear (see Woo, Leek
 62 | and Storey 2010). If the number of clusters is equal to the number of genes
 63 | then the original ODP is implemented. Depending on the number of hypothesis
 64 | tests, this can take some time.
 65 | }
 66 | \examples{
 67 | # import data
 68 | library(splines)
 69 | data(kidney)
 70 | age <- kidney$age
 71 | sex <- kidney$sex
 72 | kidexpr <- kidney$kidexpr
 73 | cov <- data.frame(sex = sex, age = age)
 74 | 
 75 | # create models
 76 | null_model <- ~sex
 77 | full_model <- ~sex + ns(age, df = 4)
 78 | 
 79 | # create deSet object from data
 80 | de_obj <- build_models(data = kidexpr, cov = cov,
 81 | null.model = null_model, full.model = full_model)
 82 | 
 83 | # odp method
 84 | de_odp <- odp(de_obj, bs.its = 30)
 85 | 
 86 | # input a deFit object or ODP parameters ... not necessary
 87 | de_fit <- fit_models(de_obj, stat.type = "odp")
 88 | de_clust <- kl_clust(de_obj, n.mods = 10)
 89 | de_odp <- odp(de_obj, de.fit = de_fit, odp.parms = de_clust,
 90 | bs.its = 30)
 91 | 
 92 | # summarize object
 93 | summary(de_odp)
 94 | }
 95 | \author{
 96 | John Storey, Jeffrey Leek, Andrew Bass
 97 | }
 98 | \references{
 99 | Storey JD. (2007) The optimal discovery procedure: A new approach to
100 | simultaneous significance testing. Journal of the Royal Statistical
101 | Society, Series B, 69: 347-368.
102 | 
103 | Storey JD, Dai JY, and Leek JT. (2007) The optimal discovery procedure for
104 | large-scale significance testing, with applications to comparative
105 | microarray experiments. Biostatistics, 8: 414-432.
106 | 
107 | Woo S, Leek JT, Storey JD (2010) A computationally efficient modular
108 | optimal discovery procedure. Bioinformatics, 27(4): 509-515.
109 | }
110 | \seealso{
111 | \code{\link{kl_clust}}, \code{\link{build_models}} and
112 | \code{\linkS4class{deSet}}
113 | }
114 | 
115 | 


--------------------------------------------------------------------------------
/man/qvalueObj.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R, R/setMethods.R
 3 | \docType{methods}
 4 | \name{qvalueObj}
 5 | \alias{qvalueObj}
 6 | \alias{qvalueObj,deSet-method}
 7 | \alias{qvalueObj<-}
 8 | \alias{qvalueObj<-,deSet-method}
 9 | \title{Access/set qvalue slot}
10 | \usage{
11 | qvalueObj(object)
12 | 
13 | qvalueObj(object) <- value
14 | 
15 | \S4method{qvalueObj}{deSet}(object)
16 | 
17 | \S4method{qvalueObj}{deSet}(object) <- value
18 | }
19 | \arguments{
20 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}}
21 | 
22 | \item{value}{S3 \code{object}: \code{\link{qvalue}}}
23 | }
24 | \value{
25 | \code{qvalueObj} returns a \code{\link{qvalue}} object.
26 | }
27 | \description{
28 | These generic functions access and set the \code{qvalue} object in the
29 | \code{\linkS4class{deSet}} object.
30 | }
31 | \examples{
32 | # import data
33 | library(splines)
34 | library(qvalue)
35 | data(kidney)
36 | age <- kidney$age
37 | sex <- kidney$sex
38 | kidexpr <- kidney$kidexpr
39 | cov <- data.frame(sex = sex, age = age)
40 | 
41 | # create models
42 | null_model <- ~sex
43 | full_model <- ~sex + ns(age, df = 4)
44 | 
45 | # create deSet object from data
46 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
47 | full.model = full_model)
48 | 
49 | # run the odp method
50 | de_odp <- odp(de_obj, bs.its = 20)
51 | 
52 | # extract out significance results
53 | qval_obj <- qvalueObj(de_odp)
54 | 
55 | # run qvalue and assign it to deSet slot
56 | pvals <- qval_obj$pvalues
57 | qval_new <- qvalue(pvals, pfdr = TRUE, fdr.level = 0.1)
58 | qvalueObj(de_odp) <- qval_new
59 | }
60 | \author{
61 | John Storey, Andrew Bass
62 | }
63 | \seealso{
64 | \code{\link{lrt}}, \code{\link{odp}} and
65 | \code{\linkS4class{deSet}}
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/man/resFull.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R
 3 | \docType{methods}
 4 | \name{resFull}
 5 | \alias{resFull}
 6 | \alias{resFull,deFit-method}
 7 | \title{Residuals of full model fit}
 8 | \usage{
 9 | resFull(object)
10 | 
11 | \S4method{resFull}{deFit}(object)
12 | }
13 | \arguments{
14 | \item{object}{\code{S4 object}: \code{\linkS4class{deFit}}}
15 | }
16 | \value{
17 | \code{resFull} returns a matrix of residuals from full model.
18 | }
19 | \description{
20 | Access the fitted full model residuals in an \code{\linkS4class{deFit}}
21 | object.
22 | }
23 | \examples{
24 | # import data
25 | library(splines)
26 | data(kidney)
27 | age <- kidney$age
28 | sex <- kidney$sex
29 | kidexpr <- kidney$kidexpr
30 | cov <- data.frame(sex = sex, age = age)
31 | 
32 | # create models
33 | null_model <- ~sex
34 | full_model <- ~sex + ns(age, df = 4)
35 | 
36 | # create deSet object from data
37 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
38 | full.model = full_model)
39 | 
40 | # run fit_models to get model fits
41 | de_fit <- fit_models(de_obj)
42 | 
43 | # extract out the full residuals from the model fit
44 | res_full <- resFull(de_fit)
45 | }
46 | \author{
47 | John Storey, Andrew Bass
48 | }
49 | \seealso{
50 | \code{\link{fit_models}}
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/man/resNull.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R
 3 | \docType{methods}
 4 | \name{resNull}
 5 | \alias{resNull}
 6 | \alias{resNull,deFit-method}
 7 | \title{Residuals of null model fit}
 8 | \usage{
 9 | resNull(object)
10 | 
11 | \S4method{resNull}{deFit}(object)
12 | }
13 | \arguments{
14 | \item{object}{\code{S4 object}: \code{\linkS4class{deFit}}}
15 | }
16 | \value{
17 | \code{resNull} returns a matrix of residuals from null model.
18 | }
19 | \description{
20 | Access the fitted null model residuals in an \code{\linkS4class{deFit}}
21 | object.
22 | }
23 | \examples{
24 | # import data
25 | library(splines)
26 | data(kidney)
27 | age <- kidney$age
28 | sex <- kidney$sex
29 | kidexpr <- kidney$kidexpr
30 | cov <- data.frame(sex = sex, age = age)
31 | 
32 | # create models
33 | null_model <- ~sex
34 | full_model <- ~sex + ns(age, df = 4)
35 | 
36 | # create deSet object from data
37 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
38 | full.model = full_model)
39 | 
40 | # run fit_models to get model fits
41 | de_fit <- fit_models(de_obj)
42 | 
43 | # extract out the null residuals from the model fits
44 | res_null <- resNull(de_fit)
45 | }
46 | \author{
47 | John Storey, Andrew Bass
48 | }
49 | \seealso{
50 | \code{\link{fit_models}}
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/man/sType.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/getMethods.R
 3 | \docType{methods}
 4 | \name{sType}
 5 | \alias{sType}
 6 | \alias{sType,deFit-method}
 7 | \title{Statistic type used in analysis}
 8 | \usage{
 9 | sType(object)
10 | 
11 | \S4method{sType}{deFit}(object)
12 | }
13 | \arguments{
14 | \item{object}{\code{S4 object}: \code{\linkS4class{deFit}}}
15 | }
16 | \value{
17 | \code{sType} returns the statistic type- either "odp" or "lrt".
18 | }
19 | \description{
20 | Access the statistic type in a \code{\linkS4class{deFit}} object. Can
21 | either be the optimal discovery procedure (odp) or the likelihood ratio
22 | test (lrt).
23 | }
24 | \examples{
25 | # import data
26 | library(splines)
27 | data(kidney)
28 | age <- kidney$age
29 | sex <- kidney$sex
30 | kidexpr <- kidney$kidexpr
31 | cov <- data.frame(sex = sex, age = age)
32 | 
33 | # create models
34 | null_model <- ~sex
35 | full_model <- ~sex + ns(age, df = 4)
36 | 
37 | # create deSet object from data
38 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
39 | full.model = full_model)
40 | 
41 | # run fit_models to get model fits
42 | de_fit <- fit_models(de_obj)
43 | 
44 | # extract the statistic type of model fits
45 | stat_type <- sType(de_fit)
46 | }
47 | \author{
48 | John Storey, Andrew Bass
49 | }
50 | \seealso{
51 | \code{\link{fit_models}}, \code{\linkS4class{deFit}} and
52 | \code{\linkS4class{deSet}}
53 | }
54 | \keyword{sType}
55 | 
56 | 


--------------------------------------------------------------------------------
/man/show.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/deFit-methods.R, R/deSet-methods.R
 3 | \docType{methods}
 4 | \name{show}
 5 | \alias{show}
 6 | \alias{show,deFit-method}
 7 | \alias{show,deSet-method}
 8 | \title{Show function for deFit and deSet}
 9 | \usage{
10 | show(object)
11 | 
12 | \S4method{show}{deFit}(object)
13 | 
14 | \S4method{show}{deSet}(object)
15 | }
16 | \arguments{
17 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}}
18 | 
19 | \item{\dots}{additional parameters}
20 | }
21 | \value{
22 | Nothing of interest
23 | }
24 | \description{
25 | Show function for \code{\linkS4class{deFit}} and \code{\linkS4class{deSet}}
26 | objects.
27 | }
28 | \examples{
29 | # import data
30 | library(splines)
31 | data(kidney)
32 | age <- kidney$age
33 | sex <- kidney$sex
34 | kidexpr <- kidney$kidexpr
35 | cov <- data.frame(sex = sex, age = age)
36 | 
37 | # create models
38 | null_model <- ~sex
39 | full_model <- ~sex + ns(age, df = 4)
40 | 
41 | # create deSet object from data
42 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
43 | full.model = full_model)
44 | 
45 | # get summary
46 | summary(de_obj)
47 | 
48 | # run odp and summarize
49 | de_odp <- odp(de_obj, bs.its= 20)
50 | de_odp
51 | }
52 | \author{
53 | John Storey, Andrew Bass
54 | }
55 | 
56 | 


--------------------------------------------------------------------------------
/man/summary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/AllGenerics.R, R/deFit-methods.R, R/deSet-methods.R
 3 | \docType{methods}
 4 | \name{summary}
 5 | \alias{summary}
 6 | \alias{summary,deFit-method}
 7 | \alias{summary,deSet-method}
 8 | \title{Summary of deFit and deSet}
 9 | \usage{
10 | summary(object, ...)
11 | 
12 | \S4method{summary}{deFit}(object)
13 | 
14 | \S4method{summary}{deSet}(object, ...)
15 | }
16 | \arguments{
17 | \item{object}{\code{S4 object}: \code{\linkS4class{deSet}}}
18 | 
19 | \item{\dots}{additional parameters}
20 | }
21 | \value{
22 | Summary of \code{\linkS4class{deSet}} object
23 | }
24 | \description{
25 | Summary of \code{\linkS4class{deFit}} and \code{\linkS4class{deSet}} objects.
26 | }
27 | \examples{
28 | # import data
29 | library(splines)
30 | data(kidney)
31 | age <- kidney$age
32 | sex <- kidney$sex
33 | kidexpr <- kidney$kidexpr
34 | cov <- data.frame(sex = sex, age = age)
35 | 
36 | # create models
37 | null_model <- ~sex
38 | full_model <- ~sex + ns(age, df = 4)
39 | 
40 | # create deSet object from data
41 | de_obj <- build_models(data = kidexpr, cov = cov, null.model = null_model,
42 | full.model = full_model)
43 | 
44 | # get summary
45 | summary(de_obj)
46 | 
47 | # run odp and summarize
48 | de_odp <- odp(de_obj, bs.its= 20)
49 | summary(de_odp)
50 | }
51 | \author{
52 | John Storey, Andrew Bass
53 | }
54 | \keyword{summary}
55 | 
56 | 


--------------------------------------------------------------------------------
/src/edge-init.c:
--------------------------------------------------------------------------------
 1 | #include "edgeKLODP.h"
 2 | #include <R_ext/Rdynload.h>
 3 | 
 4 | static R_NativePrimitiveArgType odpScoreCluster_t[] = {
 5 |   REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, REALSXP
 6 | };
 7 | 
 8 | static R_NativePrimitiveArgType kldistance_t[] = {
 9 |   REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, INTSXP, REALSXP
10 | };
11 | 
12 | R_CMethodDef cMethods[] = {
13 |   {"odpScoreCluster", (DL_FUNC) &odpScoreCluster, 9, odpScoreCluster_t},
14 |   {"kldistance", (DL_FUNC) &kldistance, 8, kldistance_t},
15 |   {NULL, NULL, 0}
16 | 
17 | };
18 | 
19 | void R_init_edge(DllInfo *info) {
20 |   R_registerRoutines(info, cMethods, NULL, NULL, NULL);
21 | }


--------------------------------------------------------------------------------
/src/edgeKLODP.c:
--------------------------------------------------------------------------------
  1 | #include "edgeKLODP.h"
  2 | 
  3 | /********************************************************************************************
  4 | functions for KLODP:
  5 |   odpScoreCluster: compute sum of normal densities to be used as numerator or denominator in score
  6 |   with c.member.
  7 | ********************************************************************************************/
  8 | 
  9 | void odpScoreCluster(double *sumDat, double *mu, double *sigma, int *m, int *n, int *p, int *null, int *cluster, double *scr) {
 10 |   int i, j, g;
 11 |   double *first, *middle;
 12 | 
 13 |   /* if alternative component, set up a couple of vectors */
 14 |   /* allocate memory */
 15 |    first = vector(0, *m - 1);
 16 | 
 17 |   /* initialize to zero */
 18 | 	for(i = 0; i < *m; i++)
 19 |       first[i] = 0.0;
 20 | 
 21 |   if(*null == 0) {
 22 |     /* allocate memory */
 23 |     middle = vector(0, *p - 1);
 24 | 
 25 |     /* initialize to zero */
 26 | 	for(i = 0; i < *p; i++) {
 27 |       middle[i] = 0.0;
 28 |     }
 29 |   }
 30 | 
 31 |   for(i = 0; i < *m; i++) {
 32 | 	for(j=0; j< *n ; j++){
 33 | 		  first[i] += sumDat[j + i * *n]*sumDat[j + i * *n];
 34 | 	}
 35 |   }
 36 | 
 37 |   for(i = 0; i < *m; i++) {
 38 |     scr[i] = 0.0;
 39 | 
 40 |     for(g = 0; g < *p; g++) {			/* g scans genes */
 41 |       /* alternative component */
 42 |       if(*null == 0) {
 43 |          /* middle[j] += 2 * sumDat[i + (l + 1) * *m] * mu[g + l * *m];*/
 44 | 		    for(j=0; j< *n ; j++){
 45 | 			    middle[g] += 2 * sumDat[j + i * *n]*sumDat[j + g * *n + *n * *m];
 46 | 		    }
 47 |   		  /*last[g] += nGrp[l] * mu[g + l * *m] * mu[g + l * *m];*/
 48 | 		    scr[i] += pow(1 / sigma[g], *n) * exp(-0.5 / sigma[g] / sigma[g] * (first[i] - middle[g] + mu[g])) * cluster[g];
 49 |       } else /* null component */
 50 |         scr[i] += pow(1 / sigma[g], *n) * exp(-0.5 / sigma[g] / sigma[g] * first[i]) * cluster[g];
 51 |     }
 52 | 	    /* reset vectors to zero, if necessary */
 53 |     if(*null == 0) {
 54 |       for(g = 0; g < *p; g++) {
 55 |         middle[g] = 0.0;
 56 |       }
 57 |     }
 58 | 
 59 |   }
 60 | 
 61 |   /* free memory, if necessary */
 62 |     free_vector(first, 0, *m - 1);
 63 | 
 64 |   if(*null == 0) {
 65 |     free_vector(middle, 0, *p - 1);
 66 |   }
 67 | }
 68 | 
 69 | void kldistance(double *centerFit, double *centerVar, double *fit, double *var, int *m, int *nc, int *n, double *kldd) {
 70 |   int i, j, l;
 71 |   double sum;
 72 | 
 73 |   for(i = 0; i < *m; i++) {
 74 |     for(j = 0; j < *nc; j++) {			/* l scans clusters */
 75 | 	kldd[j + i* *nc] = 0.0;
 76 | 	sum = 0.0;
 77 | 	for(l=0; l< *n ; l++){
 78 | 		sum += pow((centerFit[l + j* *n]-fit[l + i* *n]),2);
 79 | 	}
 80 | 		kldd[j + i* *nc] =  (sum * (1 / centerVar[j] + 1 / var[i]))/2 + *n * (centerVar[j] / var[i] + var[i] / centerVar[j])/2 - *n;
 81 |     }
 82 |   }
 83 | }
 84 | 
 85 | 
 86 | 
 87 | /* quicksort routine */
 88 | void sortQK(int low, int high, int n, double *w) {
 89 |   if(low < high) {
 90 |     int lo = low, hi = high + 1;
 91 |     double elem = w[low];
 92 |     for (;;) {
 93 |       while ((lo < n) && (w[++lo] < elem));
 94 |       while ((hi >= 0) && (w[--hi] > elem));
 95 |       if (lo < hi) swapQK(lo, hi, w);
 96 |       else break;
 97 |     }
 98 | 
 99 |     swapQK(low, hi, w);
100 |     sortQK(low, hi - 1, n, w);
101 |     sortQK(hi + 1, high, n, w);
102 |   }
103 | }
104 | 
105 | /* swap function for use with sortQK() */
106 | void swapQK(int i, int j, double *w) {
107 |   double tmp = w[i];
108 | 
109 |   w[i] = w[j];
110 |   w[j] = tmp;
111 | }
112 | 
113 | /* allocate a int vector with subscript range v[nl...nh] */
114 | int *ivector(int nl, int nh) {
115 |   int *v;
116 | 
117 |   v = (int *) malloc((size_t)((nh - nl + 1 + NR_END) * sizeof(int)));
118 |   if(!v) Rprintf("\n allocation failure in ivector()\n");
119 |   return v - nl + NR_END;
120 | }
121 | 
122 | /* free a int vector allocated with ivector() */
123 | void free_ivector(int *v, int nl, int nh) {
124 |   free((FREE_ARG) (v + nl - NR_END));
125 | }
126 | 
127 | /* allocate a int matrix with subscript ranges m[nrl...nrh][ncl...nch] */
128 | int **imatrix(int nrl, int nrh, int ncl, int nch) {
129 |   int i, nrow = nrh - nrl + 1, ncol = nch - ncl + 1;
130 |   int **m;
131 | 
132 |   /* allocate pointers to rows */
133 |   m = (int **) malloc((size_t)((nrow + NR_END) * sizeof(int*)));
134 |   if(!m) Rprintf("%s", "allocation fialure\n");
135 | 
136 |   m += NR_END;
137 |   m -= nrl;
138 | 
139 |   /* set pointer to rows */
140 |   m[nrl] = (int *) malloc((size_t)((nrow * ncol + NR_END) * sizeof(int)));
141 |   if(!m[nrl]) Rprintf("%s", "allocation fialure\n");
142 |   m[nrl] += NR_END;
143 |   m[nrl] -= ncl;
144 | 
145 |   for(i = nrl + 1; i <= nrh; i++) m[i] = m[i - 1] + ncol;
146 |   return m;
147 | }
148 | 
149 | /* free int matrix allocated with imatrix() */
150 | void free_imatrix(int **m, int nrl, int nrh, int ncl, int nch) {
151 |   free((FREE_ARG) (m[nrl] + ncl - NR_END));
152 |   free((FREE_ARG) (m + nrl - NR_END));
153 | }
154 | 
155 | /* allocate a double matrix with subscript ranges m[nrl...nrh][ncl...nch] */
156 | double **matrix(int nrl, int nrh, int ncl, int nch) {
157 |   int i, nrow = nrh - nrl + 1, ncol = nch - ncl + 1;
158 |   double **m;
159 | 
160 |   /* allocate pointers to rows */
161 |   m = (double **) malloc((size_t)((nrow + NR_END) * sizeof(double*)));
162 |   if(!m) Rprintf("%s", "allocation fialure\n");
163 | 
164 |   m += NR_END;
165 |   m -= nrl;
166 | 
167 |   /* set pointer to rows */
168 |   m[nrl] = (double *) malloc((size_t)((nrow * ncol + NR_END) * sizeof(double)));
169 |   if(!m[nrl]) Rprintf("%s", "allocation fialure\n");
170 |   m[nrl] += NR_END;
171 |   m[nrl] -= ncl;
172 | 
173 |   for(i = nrl + 1; i <= nrh; i++) m[i] = m[i - 1] + ncol;
174 |   return m;
175 | }
176 | 
177 | /* free double matrix allocated with matrix() */
178 | void free_matrix(double **m, int nrl, int nrh, int ncl, int nch) {
179 |   free((FREE_ARG) (m[nrl] + ncl - NR_END));
180 |   free((FREE_ARG) (m + nrl - NR_END));
181 | }
182 | 
183 | /* allocate a double vector with subscript range v[nl...nh] */
184 | double *vector(int nl, int nh) {
185 |   double *v;
186 | 
187 |   v = (double *) malloc((size_t) ((nh - nl + 1 + NR_END) * sizeof(double)));
188 |   if(!v) Rprintf("\n allocation failure in vector()\n");
189 |   return v - nl + NR_END;
190 | }
191 | 
192 | /* free double vector allocated with vector() */
193 | void free_vector(double *v, int nl, int nh) {
194 |   free((FREE_ARG) (v + nl - NR_END));
195 | }
196 | 


--------------------------------------------------------------------------------
/src/edgeKLODP.h:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>/* --------- printf */
 2 | #include <stdlib.h>/* -------- malloc(), free(), etc. */
 3 | #include <math.h>/* ---------- log(), exp(), etc. */
 4 | #include <R.h>/* ------------- R functions */
 5 | #include <Rinternals.h>
 6 | 
 7 | #define NR_END 1
 8 | #define FREE_ARG void*
 9 | 
10 | /***********************************************************************
11 |   EDGE-specific functions
12 | ***********************************************************************/
13 | void odpScoreCluster(double *, double *, double *, int *, int *, int *, int *, int *, double *);
14 | void kldistance(double *, double *, double *, double *, int *, int *, int *, double *);
15 | 
16 | /***********************************************************************
17 |   utility functions
18 | ***********************************************************************/
19 | void sortQK(int, int, int, double *);
20 | void swapQK(int, int, double *);
21 | double *vector(int, int);
22 | void free_vector(double *, int, int);
23 | int *ivector(int, int);
24 | void free_ivector(int *, int, int);
25 | double **matrix(int, int, int, int);
26 | void free_matrix(double **, int, int, int, int);
27 | int **imatrix(int, int, int, int);
28 | void free_imatrix(int **, int, int, int, int);
29 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(edge)
3 | 
4 | test_check("edge")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-edgeSet.R:
--------------------------------------------------------------------------------
 1 | library(edge)
 2 | context("deSet object methods")
 3 | 
 4 | # create data composed of noise -----------------------------------------------
 5 | dat_noise <- matrix(rnorm(3000), ncol = 10)
 6 | cov <- data.frame(grp = c(rep(1, 5), rep(0, 5)))
 7 | 
 8 | # make deSet object ---------------------------------------------------------
 9 | de_obj <- build_models(dat_noise, cov = cov, full.model = ~1 + grp,
10 |                       null.model = ~1)
11 | 
12 | de_obj <- lrt(de_obj)
13 | 
14 | test_that("get methods", {
15 |   expect_equal(fullModel(de_obj), ~1 + grp)
16 |   expect_equal(nullModel(de_obj), ~1)
17 |   expect_equal(individual(de_obj), factor())
18 | 
19 |   expect_equal(class(qvalueObj(de_obj)), "qvalue")
20 | })
21 | 
22 | cov$new_grp <- 1:10
23 | pData(de_obj) <- cov
24 | nullModel(de_obj) <- ~1 + new_grp
25 | fullModel(de_obj) <- ~1 + grp + new_grp
26 | mat_full <- model.matrix(~1 + grp + new_grp, cov)
27 | mat_null <- model.matrix(~1 + new_grp, cov)
28 | individual(de_obj) <- as.factor(1:10)
29 | 
30 | test_that("set methods", {
31 |   expect_equal(fullModel(de_obj), ~1 + grp + new_grp)
32 |   expect_equal(fullMatrix(de_obj), mat_full)
33 |   expect_error(fullModel(de_obj) <- ~1 + DNE)
34 | 
35 |   expect_equal(nullModel(de_obj), ~1 + new_grp)
36 |   expect_equal(nullMatrix(de_obj), mat_null)
37 |   expect_error(nullModel(de_obj) <- ~1 + DNE)
38 | 
39 |   expect_equal(individual(de_obj), as.factor(1:10))
40 |   expect_error(individual(de_obj) <- 1:10)
41 | 
42 |   expect_error(qvalueObj(de_obj) <- 1:10)
43 | })
44 | 
45 | 


--------------------------------------------------------------------------------
/tests/testthat/test-modelCreation.R:
--------------------------------------------------------------------------------
 1 | library(edge)
 2 | library(splines)
 3 | context("Model creation: deSet/build_models/build_study")
 4 | 
 5 | ngenes <- 100
 6 | nobs <- 20
 7 | # create data composed of noise -----------------------------------------------
 8 | dat_noise <- matrix(rnorm(ngenes*nobs), ncol = nobs)
 9 | cov <- data.frame(grp = c(rep(1, nobs/2), rep(0, nobs/2)))
10 | 
11 | # edgeModel -------------------------------------------------------------------
12 | de_objM <- build_models(dat_noise, cov = cov, full.model = ~1 + grp, null.model = ~1)
13 | de_objMi <- build_models(dat_noise, cov = cov, full.model = ~1 + grp,
14 |                        null.model = ~1, ind = factor(1:20))
15 | 
16 | # edgeStudy -------------------------------------------------------------------
17 | de_objS <- build_study(dat_noise, grp = as.factor(cov$grp), sampling = "static")
18 | de_objSi <- build_study(dat_noise, grp = as.factor(cov$grp), sampling = "static",
19 |                          ind = factor(1:20))
20 | adj <- rnorm(20)
21 | tme <- rnorm(20)
22 | cov$adj <- adj
23 | cov$tme <- tme
24 | de_objSit <- build_study(dat_noise, grp = as.factor(cov$grp),
25 |                         adj.var = adj, sampling = "timecourse",
26 |                         ind = factor(1:20), tme = tme)
27 | 
28 | # deSet ---------------------------------------------------------------------
29 | exp_set <- ExpressionSet(assayData = dat_noise,
30 |                          phenoData = as(cov, "AnnotatedDataFrame"))
31 | de_objE <- deSet(exp_set, full.model = ~1 + grp, null.model = ~1)
32 | de_objEi <- deSet(exp_set, full.model = ~1 + grp, null.model = ~1,
33 |                       ind = factor(1:20))
34 | 
35 | 
36 | test_that("build_models method", {
37 |   expect_equal(fullModel(de_objM), ~1 + grp)
38 |   expect_equal(nullModel(de_objM), ~1)
39 | 
40 |   expect_equivalent(fullMatrix(de_objM), model.matrix(~1 + grp, cov))
41 | 
42 |   expect_equivalent(exprs(de_objM), dat_noise)
43 | 
44 |   expect_equivalent(individual(de_objMi), factor(1:20))
45 | 
46 | })
47 | 
48 | test_that("build_study method", {
49 |   expect_equal(fullModel(de_objS), ~grp)
50 |   expect_equal(nullModel(de_objS), ~1)
51 | 
52 |   expect_equivalent(fullMatrix(de_objS), model.matrix(~1 + grp, cov))
53 | 
54 |   expect_equivalent(exprs(de_objS), dat_noise)
55 | 
56 |   expect_equivalent(individual(de_objSi), factor(1:20))
57 | 
58 |   expect_equivalent(fullMatrix(de_objSit), model.matrix(~adj + grp + ns(tme, df=2, intercept=FALSE) + grp:ns(tme, df = 2, intercept=FALSE),  cov))
59 | 
60 | })
61 | 
62 | test_that("deSet method", {
63 |   expect_equal(fullModel(de_objE), ~1 + grp)
64 |   expect_equal(nullModel(de_objE), ~1)
65 | 
66 |   expect_equivalent(fullMatrix(de_objE), model.matrix(~1 + grp, cov))
67 | 
68 |   expect_equivalent(exprs(de_objE), dat_noise)
69 | 
70 |   expect_equivalent(individual(de_objEi), factor(1:20))
71 | 
72 | })
73 | 


--------------------------------------------------------------------------------
/vignettes/edgecomp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StoreyLab/edge/5f973def65bc536b90d46b78e0a0ef849a81caa3/vignettes/edgecomp.pdf


--------------------------------------------------------------------------------
/vignettes/edgerefs.bib:
--------------------------------------------------------------------------------
  1 | 
  2 | @article{Storey:2002fc,
  3 |   Abstract = {Multiple-hypothesis testing involves guarding against much more complicated errors than single-hypothesis testing. Whereas we typically control the type I error rate for a single-hypothesis test, a compound error rate is controlled for multiple-hypothesis tests. For example, controlling the false discovery rate FDR traditionally involves intricate sequential p-value rejection methods based on the observed data. Whereas a sequential p-value method fixes the error rate and estimates its corresponding rejection region, we propose the opposite approach-we fix the rejection region and then estimate its corresponding error rate. This new approach offers increased applicability, accuracy and power. We apply the methodology to both the positive false discovery rate pFDR and FDR, and provide evidence for its benefits. It is shown that pFDR is probably the quantity of interest over FDR. Also discussed is the calculation of the q-value, the pFDR analogue of the p-value, which eliminates the need to set the error rate beforehand as is traditionally done. Some simple numerical examples are presented that show that this new approach can yield an increase of over eight times in power compared with the Benjamini-Hochberg FDR method.},
  4 | 	Address = {108 COWLEY RD, OXFORD OX4 1JF, OXON, ENGLAND},
  5 | 	Author = {Storey, JD},
  6 | 	Date-Added = {2011-10-30 22:26:25 -0400},
  7 | 	Date-Modified = {2011-10-30 22:26:25 -0400},
  8 | 	Isi = {000177425500009},
  9 | 	Isi-Recid = {126051257},
 10 | 	Isi-Ref-Recids = {112504863 90155838 115373815 122784094 87253760 119531800 126051258 126051259 119668320 112504865},
 11 | 	Journal = {Journal of the Royal Statistical Society Series B-Statistical Methodology},
 12 | 	Keywords = {false discovery rate; multiple comparisons; positive false discovery rate; p-values; q-values; sequential p-value methods; simultaneous inference},
 13 | 	Pages = {479--498},
 14 | 	Publisher = {BLACKWELL PUBL LTD},
 15 | 	Times-Cited = {1132},
 16 | 	Title = {A direct approach to false discovery rates},
 17 | 	Volume = {64},
 18 | 	Year = {2002},
 19 | }
 20 | 
 21 | 
 22 | @article{Storey:2003il,
 23 |   Abstract = {With the increase in genomewide experiments and the sequencing of multiple genomes, the analysis of large data sets has become commonplace in biology. It is often the case that thousands of features in a genomewide data set are tested against some null hypothesis, where a number of features are expected to be significant. Here we propose an approach to measuring statistical significance in these genomewide studies based on the concept of the false discovery rate. This approach offers a sensible balance between the number of true and false positives that is automatically calibrated and easily interpreted. In doing so, a measure of statistical significance called the q value is associated with each tested feature. The q value is similar to the well known p value, except it is a measure of significance in terms of the false discovery rate rather than the false positive rate. Our approach avoids a flood of false positive results, while offering a more liberal criterion than what has been used in genome scans for linkage.},
 24 | 	Author = {Storey, John D and Tibshirani, Robert},
 25 | 	Date-Added = {2011-10-30 22:16:49 -0400},
 26 | 	Date-Modified = {2011-10-30 22:16:49 -0400},
 27 | 	Doi = {10.1073/pnas.1530509100},
 28 | 	Journal = {Proc Natl Acad Sci U S A},
 29 | 	Journal-Full = {Proceedings of the National Academy of Sciences of the United States of America},
 30 | 	Mesh = {Algorithms; Alternative Splicing; Animals; Binding Sites; Exons; Gene Expression Regulation; Genetic Linkage; Genetic Techniques; Genome; Humans; Oligonucleotide Array Sequence Analysis; Statistics as Topic; Transcription, Genetic},
 31 | 	Month = {Aug},
 32 | 	Number = {16},
 33 | 	Pages = {9440-5},
 34 | 	Pmc = {PMC170937},
 35 | 	Pmid = {12883005},
 36 | 	Pst = {ppublish},
 37 | 	Title = {Statistical significance for genomewide studies},
 38 | 	Volume = {100},
 39 | 	Year = {2003},
 40 | }
 41 | 
 42 | 
 43 | @article{woo:leek:storey:2011,
 44 | author = {Woo, Sangsoon and Leek, Jeffrey T. and Storey, John D.},
 45 | title = {A computationally efficient modular optimal discovery procedure},
 46 | volume = {27},
 47 | number = {4},
 48 | pages = {509-515},
 49 | year = {2011},
 50 | doi = {10.1093/bioinformatics/btq701},
 51 | abstract ={Motivation: It is well known that patterns of differential gene expression across biological conditions are often shared by many genes, particularly those within functional groups. Taking advantage of these patterns can lead to increased statistical power and biological clarity when testing for differential expression in a microarray experiment. The optimal discovery procedure (ODP), which maximizes the expected number of true positives for each fixed number of expected false positives, is a framework aimed at this goal. Storey et al. introduced an estimator of the ODP for identifying differentially expressed genes. However, their ODP estimator grows quadratically in computational time with respect to the number of genes. Reducing this computational burden is a key step in making the ODP practical for usage in a variety of high-throughput problems.Results: Here, we propose a new estimate of the ODP called the modular ODP (mODP). The existing ‘full ODP’ requires that the likelihood function for each gene be evaluated according to the parameter estimates for all genes. The mODP assigns genes to modules according to a Kullback–Leibler distance, and then evaluates the statistic only at the module-averaged parameter estimates. We show that the mODP is relatively insensitive to the choice of the number of modules, but dramatically reduces the computational complexity from quadratic to linear in the number of genes. We compare the full ODP algorithm and mODP on simulated data and gene expression data from a recent study of Morrocan Amazighs. The mODP and full ODP algorithm perform very similarly across a range of comparisons.Availability: The mODP methodology has been implemented into EDGE, a comprehensive gene expression analysis software package in R, available at http://genomine.org/edge/.Contact: jstorey@princeton.eduSupplementary information: Supplementary data are available at Bioinformatics online.},
 52 | URL = {http://bioinformatics.oxfordjournals.org/content/27/4/509.abstract},
 53 | eprint = {http://bioinformatics.oxfordjournals.org/content/27/4/509.full.pdf+html},
 54 | journal = {Bioinformatics}
 55 | }
 56 | 
 57 | @article {storey:2007,
 58 | author = {Storey, John D.},
 59 | title = {The optimal discovery procedure: a new approach to simultaneous significance testing},
 60 | journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
 61 | volume = {69},
 62 | number = {3},
 63 | publisher = {Blackwell Publishing Ltd},
 64 | issn = {1467-9868},
 65 | url = {http://dx.doi.org/10.1111/j.1467-9868.2007.005592.x},
 66 | doi = {10.1111/j.1467-9868.2007.005592.x},
 67 | pages = {347--368},
 68 | keywords = {Classification, False discovery rate, Multiple-hypothesis testing, Optimal discovery procedure, q-value, Single-thresholding procedure},
 69 | year = {2007},
 70 | }
 71 | 
 72 | @article {gibson:2008,
 73 | author = {Idaghdour, Y and Storey, JD and Jadallah, SJ and Gibson, G },
 74 | title = {A Genome-Wide Gene Expression Signature of Environmental Geography in Leukocytes of Moroccan Amazighs},
 75 | journal = {PLoS Genetics},
 76 | volume = {4},
 77 | doi = {10.1371/journal.pgen.1000052}
 78 | }
 79 | 
 80 | @article{storey:etal:2007,
 81 | author = {Storey, John D. and Dai, James Y. and Leek, Jeffrey T.},
 82 | title = {The optimal discovery procedure for large-scale significance testing, with applications to comparative microarray experiments},
 83 | volume = {8},
 84 | number = {2},
 85 | pages = {414-432},
 86 | year = {2007},
 87 | doi = {10.1093/biostatistics/kxl019},
 88 | abstract ={As much of the focus of genetics and molecular biology has shifted toward the systems level, it has become increasingly important to accurately extract biologically relevant signal from thousands of related measurements. The common property among these high-dimensional biological studies is that the measured features have a rich and largely unknown underlying structure. One example of much recent interest is identifying differentially expressed genes in comparative microarray experiments. We propose a new approach aimed at optimally performing many hypothesis tests in a high-dimensional study. This approach estimates the optimal discovery procedure (ODP), which has recently been introduced and theoretically shown to optimally perform multiple significance tests. Whereas existing procedures essentially use data from only one feature at a time, the ODP approach uses the relevant information from the entire data set when testing each feature. In particular, we propose a generally applicable estimate of the ODP for identifying differentially expressed genes in microarray experiments. This microarray method consistently shows favorable performance over five highly used existing methods. For example, in testing for differential expression between two breast cancer tumor types, the ODP provides increases from 72% to 185% in the number of genes called significant at a false discovery rate of 3%. Our proposed microarray method is freely available to academic users in the open-source, point-and-click EDGE software package.},
 89 | URL = {http://biostatistics.oxfordjournals.org/content/8/2/414.abstract},
 90 | eprint = {http://biostatistics.oxfordjournals.org/content/8/2/414.full.pdf+html},
 91 | journal = {Biostatistics}
 92 | }
 93 | 
 94 | 
 95 | 
 96 | @Article{storey:2005,
 97 | author = "Storey, John D. and Xiao, Wenzhong and Leek, Jeffrey T. and Tompkins, Ronald G. and Davis, Ronald W.",
 98 | title = "Significance analysis of time course microarray experiments",
 99 | volume = "102",
100 | number = "36",
101 | pages = "12837-12842",
102 | year = "2005",
103 | doi = "10.1073/pnas.0504609102",
104 | abstract ="Characterizing the genome-wide dynamic regulation of gene expression is important and will be of much interest in the future. However, there is currently no established method for identifying differentially expressed genes in a time course study. Here we propose a significance method for analyzing time course microarray studies that can be applied to the typical types of comparisons and sampling schemes. This method is applied to two studies on humans. In one study, genes are identified that show differential expression over time in response to in vivo endotoxin administration. By using our method, 7,409 genes are called significant at a 1% false-discovery rate level, whereas several existing approaches fail to identify any genes. In another study, 417 genes are identified at a 10% false-discovery rate level that show expression changing with age in the kidney cortex. Here it is also shown that as many as 47% of the genes change with age in a manner more complex than simple exponential growth or decay. The methodology proposed here has been implemented in the freely distributed and open-source edge software package.",
105 | URL = "http://www.pnas.org/content/102/36/12837.abstract",
106 | eprint = "http://www.pnas.org/content/102/36/12837.full.pdf+html",
107 | journal = "Proceedings of the National Academy of Sciences of the United States of America"
108 | }
109 | 
110 | 
111 | @article{leek2005,
112 | author = {Leek, Jeffrey T. and Monsen, Eva and Dabney, Alan R. and Storey, John D.},
113 | title = {EDGE: extraction and analysis of differential gene expression},
114 | volume = {22},
115 | number = {4},
116 | pages = {507-508},
117 | year = {2006},
118 | doi = {10.1093/bioinformatics/btk005},
119 | abstract ={Summary: EDGE (Extraction of Differential Gene Expression) is an open source, point-and-click software program for the significance analysis of DNA microarray experiments. EDGE can perform both standard and time course differential expression analysis. The functions are based on newly developed statistical theory and methods. This document introduces the EDGE software package.Availability: EDGE is freely available for non-commercial users. EDGE can be downloaded for Windows, Macintosh and Linux/UNIX from http://faculty.washington.edu/jstorey/edgeContact: jtleek@u.washington.edu},
120 | URL = {http://bioinformatics.oxfordjournals.org/content/22/4/507.abstract},
121 | eprint = {http://bioinformatics.oxfordjournals.org/content/22/4/507.full.pdf+html},
122 | journal = {Bioinformatics}
123 | }
124 | 
125 | 
126 | @article{hedenfalk:2001,
127 | author = {Hedenfalk, Ingrid and Duggan, David and Chen, Yidong and Radmacher, Michael and Bittner, Michael and Simon, Richard and Meltzer, Paul and Gusterson, Barry and Esteller, Manel and Raffeld, Mark and Yakhini, Zohar and Ben-Dor, Amir and Dougherty, Edward and Kononen, Juha and Bubendorf, Lukas and Fehrle, Wilfrid and Pittaluga, Stefania and Gruvberger, Sofia and Loman, Niklas and Johannsson, Oskar and Olsson, Håkan and Wilfond, Benjamin and Sauter, Guido and Kallioniemi, Olli-P. and Borg, Åke and Trent, Jeffrey},
128 | title = {Gene-Expression Profiles in Hereditary Breast Cancer},
129 | journal = {New England Journal of Medicine},
130 | volume = {344},
131 | number = {8},
132 | pages = {539-548},
133 | year = {2001},
134 | doi = {10.1056/NEJM200102223440801},
135 |     note ={PMID: 11207349},
136 | 
137 | URL = {
138 |         http://dx.doi.org/10.1056/NEJM200102223440801
139 | 
140 | },
141 | eprint = {
142 |         http://dx.doi.org/10.1056/NEJM200102223440801
143 | 
144 | }
145 | 
146 | }
147 | 
148 | 
149 | @article{rodwell:2004,
150 |     author = {Rodwell, Graham E. J AND Sonu, Rebecca AND Zahn, Jacob M AND Lund, James AND Wilhelmy, Julie AND Wang, Lingli AND Xiao, Wenzhong AND Mindrinos, Michael AND Crane, Emily AND Segal, Eran AND Myers, Bryan D AND Brooks, James D AND Davis, Ronald W AND Higgins, John AND Owen, Art B AND Kim, Stuart K},
151 |     journal = {PLoS Biol},
152 |     publisher = {Public Library of Science},
153 |     title = {A Transcriptional Profile of Aging in the Human Kidney},
154 |     year = {2004},
155 |     month = {11},
156 |     volume = {2},
157 |     pages = {e427},
158 |     number = {12},
159 |     doi = {10.1371/journal.pbio.0020427}
160 | }
161 | 
162 | @Article{storey:2003,
163 |   Author = "Storey, J. D.",
164 | 	Title = "The positive false discovery rate: A Bayesian interpretation and the q-value",
165 | 	Journal = "Annals of Statistics",
166 | 	Year = 2003,
167 | 	Volume = 31,
168 | 	Pages = "2013-2035",
169 | }
170 | 
171 | @article{leek:2007,
172 |     author = {Leek, Jeffrey T AND Storey, John D},
173 |     journal = {PLoS Genet},
174 |     publisher = {Public Library of Science},
175 |     title = {Capturing Heterogeneity in Gene Expression Studies by Surrogate Variable Analysis},
176 |     year = {2007},
177 |     month = {09},
178 |     volume = {3},
179 |     pages = {e161},
180 |     abstract = {<title>Author Summary</title><sec id="st1"><title/><p>In scientific and medical studies, great care must be taken when collecting data to understand the relationship between two variables, such as a drug and its effect on a disease. In any given study there will be many other variables at play, such as the effects of age and sex on the disease. We show that in studies where the expression levels of thousands of genes are measured at once, these issues become surprisingly critical. Due to the complexity of our genomes, environment, and demographic features, there are many sources of variation when analyzing gene expression levels. In any given study, it is impossible to measure every single variable that may be influencing how our genes are expressed. Despite this, we show that by considering all expression levels simultaneously, one can actually recover the effects of these important missed variables and essentially produce an analysis as if all relevant variables were included. As opposed to traditional studies, the massive amount of data available in this setting is what makes the method, called surrogate variable analysis, possible. We hypothesize that surrogate variable analysis will be useful in many large-scale gene expression studies.</p></sec>},
181 |     number = {9},
182 |     doi = {10.1371/journal.pgen.0030161}
183 | }
184 | 
185 | 
186 | @article{Leek:2008qf,
187 |   Abstract = {We develop a general framework for performing large-scale significance testing in the presence of arbitrarily strong dependence. We derive a low-dimensional set of random vectors, called a dependence kernel, that fully captures the dependence structure in an observed high-dimensional dataset. This result shows a surprising reversal of the "curse of dimensionality" in the high-dimensional hypothesis testing setting. We show theoretically that conditioning on a dependence kernel is sufficient to render statistical tests independent regardless of the level of dependence in the observed data. This framework for multiple testing dependence has implications in a variety of common multiple testing problems, such as in gene expression studies, brain imaging, and spatial epidemiology.},
188 | 	Author = {Leek, Jeffrey T and Storey, John D},
189 | 	Date-Added = {2011-10-30 22:16:12 -0400},
190 | 	Date-Modified = {2011-10-30 22:16:12 -0400},
191 | 	Doi = {10.1073/pnas.0808709105},
192 | 	Journal = {Proc Natl Acad Sci U S A},
193 | 	Journal-Full = {Proceedings of the National Academy of Sciences of the United States of America},
194 | 	Mesh = {Algorithms; Computer Simulation; Models, Statistical; Software; Statistics as Topic},
195 | 	Month = {Dec},
196 | 	Number = {48},
197 | 	Pages = {18718-23},
198 | 	Pmc = {PMC2586646},
199 | 	Pmid = {19033188},
200 | 	Pst = {ppublish},
201 | 	Title = {A general framework for multiple testing dependence},
202 | 	Volume = {105},
203 | 	Year = {2008},
204 | 	Bdsk-Url-1 = {http://dx.doi.org/10.1073/pnas.0808709105}
205 | }
206 | 
207 | 
208 | @article{mecham:2010,
209 | author = {Mecham, Brigham H. and Nelson, Peter S. and Storey, John D.},
210 | title = {Supervised normalization of microarrays},
211 | volume = {26},
212 | number = {10},
213 | pages = {1308-1315},
214 | year = {2010},
215 | doi = {10.1093/bioinformatics/btq118},
216 | abstract ={Motivation: A major challenge in utilizing microarray technologies to measure nucleic acid abundances is ‘normalization’, the goal of which is to separate biologically meaningful signal from other confounding sources of signal, often due to unavoidable technical factors. It is intuitively clear that true biological signal and confounding factors need to be simultaneously considered when performing normalization. However, the most popular normalization approaches do not utilize what is known about the study, both in terms of the biological variables of interest and the known technical factors in the study, such as batch or array processing date.Results: We show here that failing to include all study-specific biological and technical variables when performing normalization leads to biased downstream analyses. We propose a general normalization framework that fits a study-specific model employing every known variable that is relevant to the expression study. The proposed method is generally applicable to the full range of existing probe designs, as well as to both single-channel and dual-channel arrays. We show through real and simulated examples that the method has favorable operating characteristics in comparison to some of the most highly used normalization methods.Availability: An R package called snm implementing the methodology will be made available from Bioconductor (http://bioconductor.org).Contact: jstorey@princeton.eduSupplementary information: Supplementary data are available at Bioinformatics online.},
217 | URL = {http://bioinformatics.oxfordjournals.org/content/26/10/1308.abstract},
218 | eprint = {http://bioinformatics.oxfordjournals.org/content/26/10/1308.full.pdf+html},
219 | journal = {Bioinformatics}
220 | }
221 | 
222 | @article{calvano:2005,
223 | author = {Calvano, SE and Xiao, W and Richards, DR and Felciano, RM and Baker, HV and Cho, RJ and Chen, RO and Brownstein, BH and Cobb, JP and Tschoeke, SK and Miller-Graziano, C and Moldawer, LL and Mindrinos, MN and Davis, RW and Tompkins, RG and Lowry, SF},
224 | title = {A network-based analysis of systemic inflammation in humans},
225 | volume = {437},
226 | pages = {1032-1037},
227 | year = {2005},
228 | doi = {10.1038/nature03985},
229 | URL = {http://www.nature.com/nature/journal/v437/n7061/full/nature03985.html},
230 | journal = {Nature}
231 | }


--------------------------------------------------------------------------------