├── tests ├── testthat.R └── testthat │ └── testConfirmationAdjustment.R ├── data ├── hammer.eset.rda └── esetProstate.rda ├── .gitignore ├── NEWS.txt ├── man ├── hammer.eset.Rd ├── esetProstate.Rd ├── getPScreen.Rd ├── getPConfirmation.Rd ├── getTx2gene.Rd ├── getAlpha.Rd ├── getMethod.Rd ├── getSignificantGenes.Rd ├── isAdjusted.Rd ├── getSignificantTx.Rd ├── isPScreenAdjusted.Rd ├── getResults.Rd ├── stageR.Rd ├── adjustedAlphaLevel.Rd ├── stageRTx.Rd ├── stageRClass.Rd ├── getAdjustedPValues.Rd └── stageWiseAdjustment.Rd ├── NAMESPACE ├── README.md ├── DESCRIPTION ├── R ├── data.R ├── allGenerics.R ├── methods.R ├── constructors.R ├── stageRClasses.R └── accessors.R ├── inst └── extdata │ └── constructDTUDataset.R └── vignettes ├── stageR.bib └── stageRVignette.Rmd /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(stageR) 3 | 4 | test_check("stageR") -------------------------------------------------------------------------------- /data/hammer.eset.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statOmics/stageR/HEAD/data/hammer.eset.rda -------------------------------------------------------------------------------- /data/esetProstate.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statOmics/stageR/HEAD/data/esetProstate.rda -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | inst/doc 2 | *.Rproj 3 | *.swp 4 | .Rproj.user 5 | .Rhistory 6 | .Rbuildignore 7 | .gitignore 8 | hammer_eset.RData 9 | ..Rcheck* 10 | -------------------------------------------------------------------------------- /NEWS.txt: -------------------------------------------------------------------------------- 1 | version 1.1.1 2 | 3 | We discovered a bug in the ‘holm’ and ‘user’ corrections for the ‘stageWiseAdjustment’ function that was introduced when making changes for Bioconductor submission. The bug was present in version 0.99.08 until version 1.1.1. It is also present in the Bioconductor 3.6 release branch, hence it is preferable to install the package through Github. -------------------------------------------------------------------------------- /man/hammer.eset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{hammer.eset} 5 | \alias{hammer.eset} 6 | \title{Hammer dataset} 7 | \format{An \code{ExpressionSet} object.} 8 | \source{ 9 | \url{http://bowtie-bio.sourceforge.net/recount/} 10 | } 11 | \usage{ 12 | hammer.eset 13 | } 14 | \description{ 15 | A gene expression dataset from an experiment on spinal nerve ligation in rats, comparing this treatment to control samples in two timepoints, i.e. two weeks and two months post treatment. 2 Biological replicates available in every treatment x time combination. 16 | } 17 | \references{ 18 | Hammer P, Banck MS, Amberg R, et al. mRNA-seq with agnostic splice site discovery for nervous system transcriptomics tested in chronic pain. Genome Research. 2010;20(6):847-860. doi:10.1101/gr.101204.109. 19 | } 20 | \keyword{datasets} 21 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export() 4 | export(adjustedAlphaLevel) 5 | export(getAdjustedPValues) 6 | export(getAlpha) 7 | export(getMethod) 8 | export(getPConfirmation) 9 | export(getPScreen) 10 | export(getResults) 11 | export(getSignificantGenes) 12 | export(getSignificantTx) 13 | export(getTx2gene) 14 | export(isAdjusted) 15 | export(isPScreenAdjusted) 16 | export(stageR) 17 | export(stageRTx) 18 | export(stageWiseAdjustment) 19 | exportClasses(stageR) 20 | exportClasses(stageRTx) 21 | exportMethods(adjustedAlphaLevel) 22 | exportMethods(getAdjustedPValues) 23 | exportMethods(getAlpha) 24 | exportMethods(getMethod) 25 | exportMethods(getPConfirmation) 26 | exportMethods(getPScreen) 27 | exportMethods(getResults) 28 | exportMethods(getSignificantGenes) 29 | exportMethods(getSignificantTx) 30 | exportMethods(getTx2gene) 31 | exportMethods(isAdjusted) 32 | exportMethods(isPScreenAdjusted) 33 | exportMethods(stageWiseAdjustment) 34 | import(SummarizedExperiment) 35 | -------------------------------------------------------------------------------- /man/esetProstate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{esetProstate} 5 | \alias{esetProstate} 6 | \title{Transcript-level abundance estimates in 14 Chinese prostate cancer patients} 7 | \format{An \code{ExpressionSet} object.} 8 | \source{ 9 | \url{http://pachterlab.github.io/lair/} 10 | } 11 | \usage{ 12 | esetProstate 13 | } 14 | \description{ 15 | A dataset containing 14 matched samples of tumoral prostate cancer and normal tissue, both derived from the same Chinese patient. The dataset has been prefiltered to reduce the computational burden of the vignette. 16 | } 17 | \references{ 18 | Ren, Shancheng, Zhiyu Peng, Jian-Hua Mao, Yongwei Yu, Changjun Yin, Xin Gao, Zilian Cui, et al. 2012. "RNA-seq analysis of prostate cancer in the Chinese population identifies recurrent gene fusions, cancer-associated long noncoding RNAs and aberrant alternative splicings." Cell Research 22 (5): 806-21. doi:10.1038/cr.2012.30. 19 | } 20 | \keyword{datasets} 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # stageR 2 | 3 | ## stage-wise analysis of high-throughput genomic data with omnibus and posthoc tests 4 | 5 | This is the repository for the stageR package. stageR allows user-friendly automated stage-wise analysis of high-throughput genomic data. 6 | 7 | See the [Bioconductor website](https://bioconductor.org/packages/release/bioc/html/stageR.html) to install the package through Bioconductor. 8 | To install the package from the GitHub repository in R, please use 9 | 10 | ``` 11 | library(devtools) 12 | install_github("statOmics/stageR") 13 | ``` 14 | 15 | The repository containing all code required to reproduce the analyses in the paper can be found [here](http://www.github.com/statOmics/stageWiseTestingPaper). 16 | 17 | 18 | Note, that we discovered a bug in the ‘holm’ and ‘user’ corrections for the ‘stageWiseAdjustment’ function that was introduced when making changes for Bioconductor submission. The bug was present in version 0.99.08 until version 1.1.1. It was also present in the Bioconductor 3.6 release branch. 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: stageR 2 | Type: Package 3 | Title: stageR: stage-wise analysis of high throughput gene expression data in R 4 | Version: 1.7.01 5 | Author: Koen Van den Berge and Lieven Clement 6 | Maintainer: Koen Van den Berge 7 | Description: The stageR package allows automated stage-wise analysis of high-throughput gene expression data. The method is published in Genome Biology at https://genomebiology.biomedcentral.com/articles/10.1186/s13059-017-1277-0 8 | License: GNU General Public License version 3 9 | LazyData: TRUE 10 | biocViews: Software, StatisticalMethod 11 | Depends: R (>= 3.4), SummarizedExperiment 12 | Collate: 13 | 'stageRClasses.R' 14 | 'allGenerics.R' 15 | 'constructors.R' 16 | 'accessors.R' 17 | 'data.R' 18 | 'methods.R' 19 | RoxygenNote: 6.0.1 20 | Suggests: 21 | knitr, 22 | rmarkdown, 23 | BiocStyle, 24 | methods, 25 | Biobase, 26 | edgeR, 27 | limma, 28 | DEXSeq, 29 | testthat 30 | VignetteBuilder: knitr 31 | Imports: methods, 32 | stats 33 | -------------------------------------------------------------------------------- /man/getPScreen.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/allGenerics.R, R/accessors.R 3 | \docType{methods} 4 | \name{getPScreen} 5 | \alias{getPScreen} 6 | \alias{getPScreen} 7 | \alias{getPScreen,stageR} 8 | \alias{getPScreen,stageRTx} 9 | \alias{getPScreen,stageRTx-method} 10 | \title{Return screening hypothesis p-values from a \code{\link{stageRClass}} object.} 11 | \usage{ 12 | getPScreen(object, ...) 13 | 14 | \S4method{getPScreen}{stageR}(object) 15 | 16 | \S4method{getPScreen}{stageRTx}(object) 17 | } 18 | \arguments{ 19 | \item{object}{an object of the \code{\link{stageRClass}} class.} 20 | 21 | \item{...}{Additional arguments} 22 | } 23 | \value{ 24 | A vector of screening stage (aggregated) p-values. 25 | } 26 | \description{ 27 | Return screening hypothesis p-values from a \code{\link{stageRClass}} object. 28 | } 29 | \section{Methods (by class)}{ 30 | \itemize{ 31 | \item \code{stageRTx}: Return screening hypothesis p-values from a \code{\link{stageRClass}} object. 32 | }} 33 | 34 | \examples{ 35 | pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 36 | names(pScreen)=paste0("gene",1:300) 37 | pConfirmation=matrix(runif(900),nrow=300,ncol=3) 38 | dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 39 | stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 40 | getPScreen(stageRObj) 41 | } 42 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' Transcript-level abundance estimates in 14 Chinese prostate cancer patients 2 | #' 3 | #' A dataset containing 14 matched samples of tumoral prostate cancer and normal tissue, both derived from the same Chinese patient. The dataset has been prefiltered to reduce the computational burden of the vignette. 4 | #' 5 | #' @format An \code{ExpressionSet} object. 6 | #' @references 7 | #'Ren, Shancheng, Zhiyu Peng, Jian-Hua Mao, Yongwei Yu, Changjun Yin, Xin Gao, Zilian Cui, et al. 2012. "RNA-seq analysis of prostate cancer in the Chinese population identifies recurrent gene fusions, cancer-associated long noncoding RNAs and aberrant alternative splicings." Cell Research 22 (5): 806-21. doi:10.1038/cr.2012.30. 8 | #' @source \url{http://pachterlab.github.io/lair/} 9 | "esetProstate" 10 | 11 | #' Hammer dataset 12 | #' 13 | #' A gene expression dataset from an experiment on spinal nerve ligation in rats, comparing this treatment to control samples in two timepoints, i.e. two weeks and two months post treatment. 2 Biological replicates available in every treatment x time combination. 14 | #' 15 | #' @format An \code{ExpressionSet} object. 16 | #' @references 17 | #' Hammer P, Banck MS, Amberg R, et al. mRNA-seq with agnostic splice site discovery for nervous system transcriptomics tested in chronic pain. Genome Research. 2010;20(6):847-860. doi:10.1101/gr.101204.109. 18 | #' @source \url{http://bowtie-bio.sourceforge.net/recount/} 19 | "hammer.eset" -------------------------------------------------------------------------------- /man/getPConfirmation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/allGenerics.R, R/accessors.R 3 | \docType{methods} 4 | \name{getPConfirmation} 5 | \alias{getPConfirmation} 6 | \alias{getPConfirmation} 7 | \alias{getPConfirmation,stageR} 8 | \alias{getPConfirmation,stageRTx} 9 | \alias{getPConfirmation,stageRTx-method} 10 | \title{Return unadjusted confirmation hypothesis p-values from a \code{\link{stageRClass}} object.} 11 | \usage{ 12 | getPConfirmation(object, ...) 13 | 14 | \S4method{getPConfirmation}{stageR}(object) 15 | 16 | \S4method{getPConfirmation}{stageRTx}(object) 17 | } 18 | \arguments{ 19 | \item{object}{an object of the \code{\link{stageRClass}} class.} 20 | } 21 | \value{ 22 | A matrix of the unadjusted p-values to be used in the confirmation stage. 23 | } 24 | \description{ 25 | Return unadjusted confirmation hypothesis p-values from a \code{\link{stageRClass}} object. 26 | } 27 | \section{Methods (by class)}{ 28 | \itemize{ 29 | \item \code{stageRTx}: Return unadjusted confirmation hypothesis p-values from a \code{\link{stageRClass}} object. 30 | }} 31 | 32 | \examples{ 33 | pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 34 | names(pScreen)=paste0("gene",1:300) 35 | pConfirmation=matrix(runif(900),nrow=300,ncol=3) 36 | dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 37 | stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 38 | getPConfirmation(stageRObj) 39 | } 40 | -------------------------------------------------------------------------------- /man/getTx2gene.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/allGenerics.R, R/accessors.R 3 | \docType{methods} 4 | \name{getTx2gene} 5 | \alias{getTx2gene} 6 | \alias{getTx2gene} 7 | \alias{getTx2gene,stageRTx} 8 | \title{Retrieve the data frame linking genes to transcripts.} 9 | \usage{ 10 | getTx2gene(object, ...) 11 | 12 | \S4method{getTx2gene}{stageRTx}(object, ...) 13 | } 14 | \arguments{ 15 | \item{object}{an object of the \code{stageRTxClass} class.} 16 | 17 | \item{...}{Additional arguments} 18 | } 19 | \value{ 20 | A matrix linking gene to transcript identifiers. 21 | } 22 | \description{ 23 | This functions returns a data frame that links the genes with the transcripts being analysed. 24 | } 25 | \examples{ 26 | #make identifiers linking transcripts to genes 27 | set.seed(1) 28 | genes=paste0("gene",sample(1:200,1000,replace=TRUE)) 29 | nGenes=length(table(genes)) 30 | transcripts=paste0("tx",1:1000) 31 | tx2gene=data.frame(transcripts,genes) 32 | #gene-wise q-values 33 | pScreen=c(seq(1e-10,1e-2,length.out=nGenes-100),seq(1e-2,.2,length.out=50),seq(50)) 34 | names(pScreen)=names(table(genes)) #discards genes that are not simulated 35 | pConfirmation=matrix(runif(1000),nrow=1000,ncol=1) 36 | rownames(pConfirmation)=transcripts 37 | stageRObj <- stageRTx(pScreen=pScreen, pConfirmation=pConfirmation ,pScreenAdjusted=TRUE, tx2gene=tx2gene) 38 | getTx2gene(stageRObj) 39 | } 40 | \references{ 41 | Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 42 | } 43 | -------------------------------------------------------------------------------- /man/getAlpha.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/allGenerics.R, R/accessors.R 3 | \docType{methods} 4 | \name{getAlpha} 5 | \alias{getAlpha} 6 | \alias{getAlpha} 7 | \alias{getAlpha,stageR} 8 | \alias{getAlpha,stageRTx} 9 | \alias{getAlpha,stageRTx-method} 10 | \title{Retrieve the significance level for the stage-wise adjustment.} 11 | \usage{ 12 | getAlpha(object, ...) 13 | 14 | \S4method{getAlpha}{stageR}(object, ...) 15 | 16 | \S4method{getAlpha}{stageRTx}(object, ...) 17 | } 18 | \arguments{ 19 | \item{object}{an object of the \code{\link{stageRClass}} or \code{stageRTxClass} class.} 20 | 21 | \item{...}{Additional arguments} 22 | } 23 | \value{ 24 | Returns a calar vector with the OFDR alpha level that was specified by the user. 25 | } 26 | \description{ 27 | This functions returns the significance level on which the stage-wise adjustment is based. 28 | } 29 | \section{Methods (by class)}{ 30 | \itemize{ 31 | \item \code{stageRTx}: Retrieve the significance level for the stage-wise adjustment. 32 | }} 33 | 34 | \examples{ 35 | pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 36 | names(pScreen)=paste0("gene",1:300) 37 | pConfirmation=matrix(runif(900),nrow=300,ncol=3) 38 | dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 39 | stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 40 | stageRObj <- stageWiseAdjustment(stageRObj, method="holm", alpha=0.05) 41 | getAlpha(stageRObj) 42 | } 43 | \references{ 44 | Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 45 | } 46 | -------------------------------------------------------------------------------- /man/getMethod.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/allGenerics.R, R/accessors.R 3 | \docType{methods} 4 | \name{getMethod} 5 | \alias{getMethod} 6 | \alias{getMethod} 7 | \alias{getMethod,stageR} 8 | \alias{getMethod,stageRTx} 9 | \alias{getMethod,stageRTx-method} 10 | \title{Retrieve FWER correction method.} 11 | \usage{ 12 | getMethod(object, ...) 13 | 14 | \S4method{getMethod}{stageR}(object, ...) 15 | 16 | \S4method{getMethod}{stageRTx}(object, ...) 17 | } 18 | \arguments{ 19 | \item{object}{an object of the \code{\link{stageRClass}} or \code{stageRTxClass} class.} 20 | 21 | \item{...}{Additional arguments} 22 | } 23 | \value{ 24 | Returns a character vector of length 1 specifying the FWER correction method that is used in the confirmation stage of the stage-wise analysis. 25 | } 26 | \description{ 27 | This functions retrieves the method used for FWER multiple testing correction in the confirmation stage of a stage-wise analysis. 28 | } 29 | \section{Methods (by class)}{ 30 | \itemize{ 31 | \item \code{stageRTx}: Retrieve FWER correction method. 32 | }} 33 | 34 | \examples{ 35 | pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 36 | names(pScreen)=paste0("gene",1:300) 37 | pConfirmation=matrix(runif(900),nrow=300,ncol=3) 38 | dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 39 | stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 40 | stageRObj <- stageWiseAdjustment(stageRObj, method="holm", alpha=0.05) 41 | getMethod(stageRObj) 42 | } 43 | \references{ 44 | Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 45 | } 46 | -------------------------------------------------------------------------------- /man/getSignificantGenes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/allGenerics.R, R/accessors.R 3 | \docType{methods} 4 | \name{getSignificantGenes} 5 | \alias{getSignificantGenes} 6 | \alias{getSignificantGenes} 7 | \alias{getSignificantGenes,stageRTx} 8 | \title{Return significant genes when performing transcript-level analysis.} 9 | \usage{ 10 | getSignificantGenes(object, ...) 11 | 12 | \S4method{getSignificantGenes}{stageRTx}(object) 13 | } 14 | \arguments{ 15 | \item{object}{an object of the \code{\link{stageRClass}} class.} 16 | } 17 | \value{ 18 | A matrix with significant genes and their corresponding FDR-adjusted screening stage (aggregated) p-value. 19 | } 20 | \description{ 21 | This functions returns a matrix with significant genes by aggregated testing of its respective transcripts. 22 | } 23 | \examples{ 24 | #make identifiers linking transcripts to genes 25 | set.seed(1) 26 | genes=paste0("gene",sample(1:200,1000,replace=TRUE)) 27 | nGenes=length(table(genes)) 28 | transcripts=paste0("tx",1:1000) 29 | tx2gene=data.frame(transcripts,genes) 30 | #gene-wise q-values 31 | pScreen=c(seq(1e-10,1e-2,length.out=nGenes-100),seq(1e-2,.2,length.out=50),seq(50)) 32 | names(pScreen)=names(table(genes)) #discards genes that are not simulated 33 | pConfirmation=matrix(runif(1000),nrow=1000,ncol=1) 34 | rownames(pConfirmation)=transcripts 35 | stageRObj <- stageRTx(pScreen=pScreen, pConfirmation=pConfirmation ,pScreenAdjusted=TRUE, tx2gene=tx2gene) 36 | stageRObj <- stageWiseAdjustment(stageRObj, method="dte", alpha=0.05) 37 | head(getSignificantGenes(stageRObj)) 38 | } 39 | \references{ 40 | Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 41 | } 42 | -------------------------------------------------------------------------------- /man/isAdjusted.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/allGenerics.R, R/accessors.R 3 | \docType{methods} 4 | \name{isAdjusted} 5 | \alias{isAdjusted} 6 | \alias{isAdjusted} 7 | \alias{isAdjusted,stageR} 8 | \alias{isAdjusted,stageRTx} 9 | \alias{isAdjusted,stageRTx-method} 10 | \title{Has stage-wise adjustment already been performed on the object?} 11 | \usage{ 12 | isAdjusted(object, ...) 13 | 14 | \S4method{isAdjusted}{stageR}(object, ...) 15 | 16 | \S4method{isAdjusted}{stageRTx}(object, ...) 17 | } 18 | \arguments{ 19 | \item{object}{an object of the \code{\link{stageRClass}} or \code{stageRTxClass} class.} 20 | 21 | \item{...}{Additional arguments} 22 | } 23 | \value{ 24 | A logical stating whether the p-values have already been adjusted according to the stage-wise method 25 | } 26 | \description{ 27 | This functions returns a logical stating whether the p-values have already been adjusted according to the stage-wise method. 28 | } 29 | \section{Methods (by class)}{ 30 | \itemize{ 31 | \item \code{stageRTx}: Has stage-wise adjustment already been performed on the object? 32 | }} 33 | 34 | \examples{ 35 | pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 36 | names(pScreen)=paste0("gene",1:300) 37 | pConfirmation=matrix(runif(900),nrow=300,ncol=3) 38 | dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 39 | stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 40 | isAdjusted(stageRObj) 41 | stageRObj <- stageWiseAdjustment(stageRObj, method="holm", alpha=0.05) 42 | isAdjusted(stageRObj) 43 | } 44 | \references{ 45 | Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 46 | } 47 | -------------------------------------------------------------------------------- /man/getSignificantTx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/allGenerics.R, R/accessors.R 3 | \docType{methods} 4 | \name{getSignificantTx} 5 | \alias{getSignificantTx} 6 | \alias{getSignificantTx} 7 | \alias{getSignificantTx,stageR} 8 | \alias{getSignificantTx,stageRTx} 9 | \title{Return significant transcripts when performing transcript-level analysis.} 10 | \usage{ 11 | getSignificantTx(object, ...) 12 | 13 | \S4method{getSignificantTx}{stageRTx}(object) 14 | } 15 | \arguments{ 16 | \item{object}{an object of the \code{\link{stageRClass}} class.} 17 | } 18 | \value{ 19 | A matrix of significant transcripts with their corresponding stage-wise adjusted p-value (i.e. FDR and FWER correction). 20 | } 21 | \description{ 22 | This functions returns a matrix with significant transctripts according to a stage-wise analysis. 23 | } 24 | \examples{ 25 | #make identifiers linking transcripts to genes 26 | set.seed(1) 27 | genes=paste0("gene",sample(1:200,1000,replace=TRUE)) 28 | nGenes=length(table(genes)) 29 | transcripts=paste0("tx",1:1000) 30 | tx2gene=data.frame(transcripts,genes) 31 | #gene-wise q-values 32 | pScreen=c(seq(1e-10,1e-2,length.out=nGenes-100),seq(1e-2,.2,length.out=50),seq(50)) 33 | names(pScreen)=names(table(genes)) #discards genes that are not simulated 34 | pConfirmation=matrix(runif(1000),nrow=1000,ncol=1) 35 | rownames(pConfirmation)=transcripts 36 | stageRObj <- stageRTx(pScreen=pScreen, pConfirmation=pConfirmation ,pScreenAdjusted=TRUE, tx2gene=tx2gene) 37 | stageRObj <- stageWiseAdjustment(stageRObj, method="dte", alpha=0.05) 38 | head(getSignificantTx(stageRObj)) 39 | } 40 | \references{ 41 | Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 42 | } 43 | -------------------------------------------------------------------------------- /man/isPScreenAdjusted.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/allGenerics.R, R/accessors.R 3 | \docType{methods} 4 | \name{isPScreenAdjusted} 5 | \alias{isPScreenAdjusted} 6 | \alias{isPScreenAdjusted} 7 | \alias{isPScreenAdjusted,stageR} 8 | \alias{isPSCreenAdjusted,stageRTx} 9 | \alias{isPScreenAdjusted,stageRTx-method} 10 | \title{Are the screening p-values adjusted for multiplicity?} 11 | \usage{ 12 | isPScreenAdjusted(object, ...) 13 | 14 | \S4method{isPScreenAdjusted}{stageR}(object, ...) 15 | 16 | \S4method{isPScreenAdjusted}{stageRTx}(object, ...) 17 | } 18 | \arguments{ 19 | \item{object}{an object of the \code{\link{stageRClass}} or \code{stageRTxClass} class.} 20 | 21 | \item{...}{Additional arguments} 22 | } 23 | \value{ 24 | A logical stating whether the screening hypothesis p-values are already adjusted for multiple testing according to the BH FDR criterion. 25 | } 26 | \description{ 27 | This functions returns a logical stating whether the screening hypothesis p-values are already adjusted for multiple testing according to the BH FDR criterion. 28 | } 29 | \section{Methods (by class)}{ 30 | \itemize{ 31 | \item \code{stageRTx}: Are the screening p-values adjusted for multiplicity? 32 | }} 33 | 34 | \examples{ 35 | pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 36 | names(pScreen)=paste0("gene",1:300) 37 | pConfirmation=matrix(runif(900),nrow=300,ncol=3) 38 | dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 39 | stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 40 | isPScreenAdjusted(stageRObj) 41 | } 42 | \references{ 43 | Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 44 | } 45 | -------------------------------------------------------------------------------- /R/allGenerics.R: -------------------------------------------------------------------------------- 1 | #' @include stageRClasses.R 2 | 3 | #' @rdname stageR 4 | #' @export 5 | setGeneric("stageR", function(pScreen, pConfirmation, ...) standardGeneric("stageR")) 6 | 7 | #' @rdname getPScreen 8 | #' @export 9 | setGeneric("getPScreen", function(object, ...) standardGeneric("getPScreen")) 10 | 11 | #' @rdname getPConfirmation 12 | #' @export 13 | setGeneric("getPConfirmation", function(object, ...) standardGeneric("getPConfirmation")) 14 | 15 | #' @rdname stageWiseAdjustment 16 | #' @export 17 | setGeneric("stageWiseAdjustment", function(object, method, alpha, ...) standardGeneric("stageWiseAdjustment")) 18 | 19 | #' @rdname getAdjustedPValues 20 | #' @export 21 | setGeneric("getAdjustedPValues", function(object, onlySignificantGenes, order, ...) standardGeneric("getAdjustedPValues")) 22 | 23 | #' @rdname adjustedAlphaLevel 24 | #' @export 25 | setGeneric("adjustedAlphaLevel", function(object, ...) standardGeneric("adjustedAlphaLevel")) 26 | 27 | #' @rdname getResults 28 | #' @export 29 | setGeneric("getResults", function(object, ...) standardGeneric("getResults")) 30 | 31 | #' @rdname getSignificantGenes 32 | #' @export 33 | setGeneric("getSignificantGenes", function(object, ...) standardGeneric("getSignificantGenes")) 34 | 35 | #' @rdname getSignificantTx 36 | #' @export 37 | setGeneric("getSignificantTx", function(object, ...) standardGeneric("getSignificantTx")) 38 | 39 | #' @rdname getAlpha 40 | #' @export 41 | setGeneric("getAlpha", function(object, ...) standardGeneric("getAlpha")) 42 | 43 | #' @rdname getTx2gene 44 | #' @export 45 | setGeneric("getTx2gene", function(object, ...) standardGeneric("getTx2gene")) 46 | 47 | #' @rdname isPScreenAdjusted 48 | #' @export 49 | setGeneric("isPScreenAdjusted", function(object, ...) standardGeneric("isPScreenAdjusted")) 50 | 51 | #' @rdname isAdjusted 52 | #' @export 53 | setGeneric("isAdjusted", function(object, ...) standardGeneric("isAdjusted")) 54 | 55 | #' @rdname getMethod 56 | #' @export 57 | setGeneric("getMethod", function(object, ...) standardGeneric("getMethod")) 58 | -------------------------------------------------------------------------------- /man/getResults.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/allGenerics.R, R/accessors.R 3 | \docType{methods} 4 | \name{getResults} 5 | \alias{getResults} 6 | \alias{getResults} 7 | \alias{getResults,stageR} 8 | \alias{getResults,stageRTx} 9 | \title{Get significance results according to a stage-wise analysis.} 10 | \usage{ 11 | getResults(object, ...) 12 | 13 | \S4method{getResults}{stageR}(object) 14 | } 15 | \arguments{ 16 | \item{object}{an object of the \code{\link{stageRClass}} class.} 17 | } 18 | \value{ 19 | A logical matrix with rows corresponding to genes and columns corresponding to contrasts, where the first column represents the screening stage on the aggregated p-values. A 0 represents a non-significant test, a 1 represents a significant test according to the stage-wise analysis. 20 | } 21 | \description{ 22 | This functions returns a matrix that indicates whether a specific feature is significant for a specific hypothesis of interest according to a stage-wise analysis. The function is not applicable to transcript-level analysis. 23 | } 24 | \details{ 25 | The FDR adjusted screening hypothesis p-values are compared to the alpha level specified. The FWER adjusted confirmation stage p-values are compared to the adjusted significance level from the screening stage. 26 | } 27 | \examples{ 28 | pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 29 | names(pScreen)=paste0("gene",1:300) 30 | pConfirmation=matrix(runif(900),nrow=300,ncol=3) 31 | dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 32 | stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 33 | stageRObj <- stageWiseAdjustment(stageRObj, method="holm", alpha=0.05) 34 | head(getResults(stageRObj)) 35 | } 36 | \references{ 37 | Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 38 | } 39 | -------------------------------------------------------------------------------- /man/stageR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/constructors.R 3 | \name{stageR} 4 | \alias{stageR} 5 | \title{Create stageR object} 6 | \usage{ 7 | stageR(pScreen, pConfirmation, pScreenAdjusted = FALSE) 8 | } 9 | \arguments{ 10 | \item{pScreen}{A vector of screening hypothesis p-values.} 11 | 12 | \item{pConfirmation}{A matrix of confirmation hypothesis p-values. When constructing a \code{\link{stageRClass}} object, the number of rows should be equal to the length of \code{pScreen}. For a \code{\link{stageRTxClass}} object, the dimensions can be different.} 13 | 14 | \item{pScreenAdjusted}{logical, indicating whether the supplied p-values for the screening hypothesis have already been adjusted for multiplicity according to the FDR.} 15 | 16 | \item{...}{Additional arguments.} 17 | } 18 | \value{ 19 | An instance of an object of the \code{\link{stageRClass}} 20 | } 21 | \description{ 22 | Constructor function for \code{\link{stageRClass}}. A stageR class is a class used for stage-wise analysis in high throughput settings. 23 | In its most basic form, it consists of a vector of p-values for the screening hypothesis and a matrix of p-values for the confirmation hypotheses. 24 | } 25 | \examples{ 26 | # create a \\code{\\link{stageRClass}} object 27 | pScreen <- runif(10) 28 | names(pScreen) <- paste0("gene",1:10) 29 | pConfirmation <- matrix(runif(30),nrow=10,ncol=3) 30 | rownames(pConfirmation) <- paste0("gene",1:10) 31 | stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 32 | pConfirmationTx <- matrix(runif(10),ncol=1) 33 | names(pScreen) <- paste0("gene",rep(1:2,each=5)) 34 | stageRObj <- stageRTx(pScreen=pScreen, pConfirmation=pConfirmationTx, tx2gene=data.frame(transcripts=paste0("transcript",1:10),genes=paste0("gene",rep(1:2,each=5)))) 35 | } 36 | \references{ 37 | Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 38 | } 39 | -------------------------------------------------------------------------------- /R/methods.R: -------------------------------------------------------------------------------- 1 | #' @include stageRClasses.R 2 | 3 | setMethod("show", "stageR", function(object){ 4 | cat("stageR object, containing: \n") 5 | if (!isAdjusted(object)) { 6 | cat(paste0("- ",length(getPScreen(object)), 7 | " screening hypothesis p-values \n")) 8 | cat(paste0("- ", 9 | ncol(getPConfirmation(object)), 10 | " confirmation hypotheses for ", 11 | nrow(getPConfirmation(object)), 12 | " genes \n" 13 | )) 14 | } 15 | if (isAdjusted(object)) { 16 | cat(paste0("- ",length(getPScreen(object)), 17 | " screening hypothesis p-values \n")) 18 | cat(paste0("- ", 19 | ncol(getPConfirmation(object)), 20 | " confirmation hypotheses for ", 21 | nrow(getPConfirmation(object)), 22 | " genes \n" 23 | )) 24 | cat( 25 | paste0( 26 | "- adjusted p-values on a ", 27 | getAlpha(object) * 100, 28 | "% OFDR level with the following FWER correction method: ", 29 | getMethod(object), 30 | " \n" 31 | ) 32 | ) 33 | 34 | } 35 | }) 36 | 37 | setMethod("show", "stageRTx", function(object){ 38 | cat("stageRTx object, containing: \n") 39 | if (!isAdjusted(object)) { 40 | cat(paste0("- ", length(getPScreen(object)), 41 | " screening hypothesis p-values \n")) 42 | cat(paste0("- ", 43 | ncol(getPConfirmation(object)), 44 | " confirmation hypothesis for ", 45 | nrow(getPConfirmation(object)), 46 | " transcripts \n" 47 | )) 48 | } 49 | if (isAdjusted(object)) { 50 | cat("- ",paste0(length(getPScreen(object)), 51 | " screening hypothesis p-values \n")) 52 | cat(paste0("- ", 53 | ncol(getPConfirmation(object)), 54 | " confirmation hypothesis for ", 55 | nrow(getPConfirmation(object)), 56 | " transcripts \n" 57 | )) 58 | cat( 59 | paste0( 60 | "- adjusted p-values on a ", 61 | getAlpha(object) * 100, 62 | "% OFDR level with the following FWER correction method: ", 63 | getMethod(object), 64 | " \n" 65 | ) 66 | ) 67 | 68 | } 69 | }) 70 | -------------------------------------------------------------------------------- /man/adjustedAlphaLevel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/allGenerics.R, R/accessors.R 3 | \docType{methods} 4 | \name{adjustedAlphaLevel} 5 | \alias{adjustedAlphaLevel} 6 | \alias{adjustedAlphaLevel} 7 | \alias{adjustedAlphaLevel,stageR} 8 | \alias{adjustedAlphaLevel,stageRTx} 9 | \alias{adjustedAlphaLevel,stageRTx-method} 10 | \title{Get adjusted significance level from the screening stage.} 11 | \usage{ 12 | adjustedAlphaLevel(object, ...) 13 | 14 | \S4method{adjustedAlphaLevel}{stageR}(object) 15 | 16 | \S4method{adjustedAlphaLevel}{stageRTx}(object) 17 | } 18 | \arguments{ 19 | \item{object}{an object of the \code{\link{stageRClass}} class.} 20 | } 21 | \value{ 22 | Scalar, the adjusted significance level from the screening stage. 23 | } 24 | \description{ 25 | This functions returns the adjusted significance level from the screening stage that should be used to compare confirmation stage FWER adjusted p-values against. 26 | } 27 | \details{ 28 | The adjusted significance level is calculated as the fraction of significant features in the screening stage multiplied the alpha level. 29 | } 30 | \section{Methods (by class)}{ 31 | \itemize{ 32 | \item \code{stageRTx}: Get adjusted significance level from the screening stage. 33 | }} 34 | 35 | \examples{ 36 | pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 37 | names(pScreen)=paste0("gene",1:300) 38 | pConfirmation=matrix(runif(900),nrow=300,ncol=3) 39 | dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 40 | stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation, pScreenAdjusted=FALSE) 41 | stageRObj <- stageWiseAdjustment(stageRObj, method="holm", alpha=0.05) 42 | adjustedAlphaLevel(stageRObj) 43 | # @method stageR-method 44 | } 45 | \references{ 46 | Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 47 | 48 | R. Heller, E. Manduchi, G. R. Grant, and W. J. Ewens, "A flexible two-stage procedure for identifying gene sets that are differentially expressed." Bioinformatics (Oxford, England), vol. 25, pp. 1019-25, 2009. 49 | } 50 | \seealso{ 51 | \code{\link{stageR}}, \code{\link{stageRClass}} 52 | } 53 | -------------------------------------------------------------------------------- /man/stageRTx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/constructors.R 3 | \name{stageRTx} 4 | \alias{stageRTx} 5 | \title{Create stageRTx object.} 6 | \usage{ 7 | stageRTx(pScreen, pConfirmation, pScreenAdjusted = FALSE, tx2gene) 8 | } 9 | \arguments{ 10 | \item{pScreen}{A vector of screening hypothesis p-values.} 11 | 12 | \item{pConfirmation}{A matrix of confirmation hypothesis p-values. The number of rows should be equal to the length of \code{pScreen}.} 13 | 14 | \item{pScreenAdjusted}{logical, indicating whether the supplied p-values for the screening hypothesis have already been adjusted for multiplicity according to the FDR.} 15 | 16 | \item{tx2gene}{Only applicable for transcript-level analysis. A \code{\link[base]{data.frame}} with transcript IDs in the first columns and gene IDs in the second column. The rownames from \code{pConfirmation} must be contained in the transcript IDs from \code{tx2gene}, and the names from \code{pScreen} must be contained in the gene IDs.} 17 | 18 | \item{...}{Additional arguments.} 19 | } 20 | \value{ 21 | An instance of an object of the \code{\link{stageRTxClass}} 22 | } 23 | \description{ 24 | Constructor function for \code{\link{stageRTxClass}}. A stageR class is a class used for stage-wise analysis in high throughput settings. 25 | In its most basic form, it consists of a vector of p-values for the screening hypothesis, a matrix of p-values for the confirmation hypotheses and a tx2gene object for linking genes to transcripts. 26 | } 27 | \examples{ 28 | # create a \\code{\\link{stageRClass}} object 29 | pScreen <- runif(10) 30 | names(pScreen) <- paste0("gene",1:10) 31 | pConfirmation <- matrix(runif(30),nrow=10,ncol=3) 32 | rownames(pConfirmation) <- paste0("gene",1:10) 33 | stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 34 | pConfirmationTx <- matrix(runif(10),ncol=1) 35 | names(pScreen) <- paste0("gene",rep(1:2,each=5)) 36 | stageRObj <- stageRTx(pScreen=pScreen, pConfirmation=pConfirmationTx, tx2gene=data.frame(transcripts=paste0("transcript",1:10),genes=paste0("gene",rep(1:2,each=5)))) 37 | } 38 | \references{ 39 | Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 40 | } 41 | -------------------------------------------------------------------------------- /inst/extdata/constructDTUDataset.R: -------------------------------------------------------------------------------- 1 | local=FALSE 2 | if(local){ 3 | setwd("/Users/koenvandenberge/PhD_Data/dtu/humanCancer/prostateCancer/") 4 | library(tidyr) 5 | dataMessy <- read.csv(file="/Users/koenvandenberge/PhD_Data/dtu/humanCancer/prostateCancer/kallisto_table_unnormalized_unfiltered.csv",header=TRUE) 6 | dataMessy <- dataMessy[,c("target_id","est_counts","sample")] 7 | dataClean <- tidyr::spread(dataMessy,key=sample,value=est_counts) 8 | rm(dataMessy) 9 | rownames(dataClean) <- dataClean[,"target_id"] 10 | data <- dataClean[,-1] 11 | prostateData=data[1:10000,] 12 | library(biomaRt) 13 | mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host='mar2016.archive.ensembl.org', dataset="hsapiens_gene_ensembl") 14 | tx2gene = getBM(attributes=c("ensembl_transcript_id","ensembl_gene_id"), filters="ensembl_transcript_id", values=rownames(prostateData), mart=mart, bmHeader=TRUE, uniqueRows=TRUE) 15 | colnames(tx2gene) <- c("Ensembl.Transcript.ID","Ensembl.Gene.ID") 16 | 17 | ### clean up 18 | prostateData <- ceiling(prostateData) 19 | ## remove tx without gene match 20 | prostateData <- prostateData[!is.na(match(rownames(prostateData),tx2gene$Ensembl.Transcript.ID)),] 21 | ## remove all zero rows 22 | prostateData <- prostateData[!rowSums(prostateData)==0,] 23 | ## remove genes with only one tx 24 | geneTable <- table(as.character(tx2gene$Ensembl.Gene.ID[match(rownames(prostateData),tx2gene$Ensembl.Transcript.ID)])) 25 | genesWithOneTx <- names(geneTable)[geneTable==1] 26 | txFromGenesWithOneTx <- tx2gene$Ensembl.Transcript.ID[tx2gene$Ensembl.Gene.ID%in%genesWithOneTx] 27 | prostateData <- prostateData[!rownames(prostateData)%in%as.character(txFromGenesWithOneTx),] 28 | 29 | txGeneData = as.data.frame(cbind(rownames(prostateData),as.character(tx2gene$Ensembl.Transcript.ID[match(rownames(prostateData),tx2gene$Ensembl.Transcript.ID)]),as.character(tx2gene$Ensembl.Gene.ID[match(rownames(prostateData),tx2gene$Ensembl.Transcript.ID)]))) 30 | txGeneData=txGeneData[,2:3] 31 | colnames(txGeneData)=c("transcript","gene") 32 | rownames(txGeneData)=txGeneData[,"transcript"] 33 | barplot(table(table(txGeneData$gene)), main="Distribution of number of tx per gene") 34 | 35 | #this leaves us with 36 | length(unique(txGeneData$gene)) #nr genes 37 | median(table(as.character(txGeneData$gene))) #median nr of tx/gene 38 | 39 | ## metadata 40 | metaData <- read.table("sampleDataRelationship.txt",header=TRUE,sep="\t") 41 | assays <- metaData$Assay.Name 42 | runs <- as.character(metaData$Comment.ENA_RUN.)[seq(1,length(assays),2)] 43 | samples=gsub(x=assays,pattern="_[1-2]",replacement="")[seq(1,length(assays),2)] 44 | patient=factor(sapply(samples,function(x) substr(x,1,nchar(x)-1))) 45 | condition=factor(sapply(samples,function(x) substr(x,nchar(x),nchar(x)))) 46 | prostateData <- prostateData[,match(runs,colnames(prostateData))] #same ordering as metadata 47 | sampleData <- data.frame(condition=condition,patient=patient) 48 | rownames(sampleData)=colnames(prostateData) 49 | 50 | 51 | ### build an expressionset. 52 | library(Biobase) 53 | esetProstate = ExpressionSet(assayData=as.matrix(prostateData), 54 | phenoData=AnnotatedDataFrame(sampleData), 55 | featureData=AnnotatedDataFrame(txGeneData)) 56 | save(esetProstate,file="~/esetProstate.RData") 57 | devtools::use_data(esetProstate) 58 | } 59 | -------------------------------------------------------------------------------- /man/stageRClass.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/stageRClasses.R 3 | \docType{class} 4 | \name{stageRClass} 5 | \alias{stageRClass} 6 | \alias{stageRClass} 7 | \alias{stageRTxClass} 8 | \title{The stageR class} 9 | \description{ 10 | This class is used for adjusting p-values with stage-wise testing for high-throughput studies. 11 | } 12 | \section{Slots}{ 13 | 14 | \describe{ 15 | \item{\code{pScreen}}{A vector of p-values for the screening hypothesis.} 16 | 17 | \item{\code{pConfirmation}}{A matrix of p-values for the confirmation hypotheses.} 18 | 19 | \item{\code{adjustedP}}{A matrix of adjusted p-values. This slot should be accessed through \code{\link{getAdjustedPValues,stageR,logical,logical-method}}. Alternatively, significance results can be accessed through \code{\link{getResults,stageR-method}}.} 20 | 21 | \item{\code{method}}{Character string indicating the method used for FWER correction in the confirmation stage of the stage-wise analysis. Can be any of \code{"none"}, \code{"holm"}, \code{"dte"}, \code{"dtu"}, \code{"user"}. \code{"none"} will not adjust the p-values in the confirmation stage. \code{"holm"} is an adapted Holm procedure for a stage-wise analysis, where the method takes into account the fact that genes in the confirmation stage have already passed the screening stage, hence the procedure will be more powerful for the most significant p-value as compared to the standard Holm procedure. \code{"dte"} is the adjusted Holm-Shaffer procedure for differential transcript expression analysis. \code{"dtu"} is the adjusted Holm-Shaffer procedure for differential transcript usage. \code{"user"} indicates a user-defined adjustment that should be specified with the \code{adjustment} argument.} 22 | 23 | \item{\code{alpha}}{the OFDR level on which the stage-wise analysis should be controlled.} 24 | 25 | \item{\code{alphaAdjusted}}{the adjusted significance level to compare against FWER-adjusted p-values of the confirmation stage to decide on significance of the hypothesis test.} 26 | 27 | \item{\code{pScreenAdjusted}}{logical, indicating whether the supplied p-values for the screening hypothesis have already been adjusted for multiplicity according to the FDR.} 28 | 29 | \item{\code{tx2gene}}{matrix with transcript IDs in the first column and gene IDs in the second column to be used for DTE and DTU analysis. All rownames from \code{pConfirmation} should match with a transcript ID and all names from \code{pScreen} should match with a gene ID.} 30 | }} 31 | 32 | \references{ 33 | Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 34 | R. Heller, E. Manduchi, G. R. Grant, and W. J. Ewens, "A flexible two-stage procedure for identifying gene sets that are differentially expressed." Bioinformatics (Oxford, England), vol. 25, pp. 1019-25, 2009. 35 | S. Holm, "A Simple Sequentially Rejective Multiple Test Procedure," Scandinavian Journal of Statistics, vol. 6, no. 2, pp. 65-70, 1979. 36 | J. P. Shaffer, "Modified Sequentially Rejective Multiple Test Procedures," Journal of the American Statistical Association, vol. 81, p. 826, 1986. 37 | } 38 | -------------------------------------------------------------------------------- /man/getAdjustedPValues.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/allGenerics.R, R/accessors.R 3 | \docType{methods} 4 | \name{getAdjustedPValues} 5 | \alias{getAdjustedPValues} 6 | \alias{getAdjustedPValues} 7 | \alias{getAdjustedPValues,stageR} 8 | \alias{getAdjustedPValues,stageRTx} 9 | \alias{getAdjustedPValues,stageRTx,logical,logical-method} 10 | \title{Retrieve the stage-wise adjusted p-values.} 11 | \usage{ 12 | getAdjustedPValues(object, onlySignificantGenes, order, ...) 13 | 14 | \S4method{getAdjustedPValues}{stageR,logical,logical}(object, 15 | onlySignificantGenes, order, ...) 16 | 17 | \S4method{getAdjustedPValues}{stageRTx,logical,logical}(object, 18 | onlySignificantGenes, order, ...) 19 | } 20 | \arguments{ 21 | \item{object}{an object of the \code{\link{stageRClass}} class.} 22 | 23 | \item{onlySignificantGenes}{logical. If FALSE (default), all genes are returned. If TRUE, only the genes significant for the screening hypothesis are returned.} 24 | 25 | \item{order}{logical. If TRUE (default), the returned matrix of adjusted p-values are ordered based on the screening hypothesis adjusted p-value.} 26 | 27 | \item{...}{Other arguments passed to .getAdjustedP or .getAdjustedPTx} 28 | } 29 | \value{ 30 | For complex DGE experiments (stageR object), a matrix of adjusted p-values where every row corresponds to a gene, and every column corresponds to a contrast. The first column will be the BH FDR adjusted p-value from the screening step. 31 | For transcript-level experiments (stageRTx object), a matrix of adjusted p-values where every row corresponds to a transcript. 32 | } 33 | \description{ 34 | This functions returns the stage-wise adjusted p-values for an object from the \code{\link{stageRClass}} class. Note, that the p-values should have been adjusted with the \code{\link{stageWiseAdjustment,stageR,character,numeric-method}} function prior to calling this function. 35 | } 36 | \details{ 37 | The function returns FDR adjusted p-values for the screening hypothesis and stage-wise adjusted p-values for the confirmation hypothesis p-values. For features that were not significant in the screening hypothesis, the confirmation stage adjusted p-values are set to \code{NA}. 38 | The adjusted p-values in the output of \code{getAdjustedPValues} can directly be compared to alpha, the OFDR level specified in \code{stageWiseAdjustment}, to flag significant features. 39 | } 40 | \section{Methods (by class)}{ 41 | \itemize{ 42 | \item \code{object = stageRTx,onlySignificantGenes = logical,order = logical}: Retrieve the stage-wise adjusted p-values. 43 | }} 44 | 45 | \examples{ 46 | pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 47 | names(pScreen)=paste0("gene",1:300) 48 | pConfirmation=matrix(runif(900),nrow=300,ncol=3) 49 | dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 50 | stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 51 | stageRObj <- stageWiseAdjustment(stageRObj, method="holm", alpha=0.05) 52 | head(getAdjustedPValues(stageRObj, onlySignificantGenes=TRUE, order=TRUE)) 53 | } 54 | \references{ 55 | Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 56 | } 57 | -------------------------------------------------------------------------------- /man/stageWiseAdjustment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/allGenerics.R, R/accessors.R 3 | \docType{methods} 4 | \name{stageWiseAdjustment} 5 | \alias{stageWiseAdjustment} 6 | \alias{stageWiseAdjustment} 7 | \alias{stageWiseAdjustment,stageR} 8 | \alias{stageWiseAdjustment,stageRTx} 9 | \alias{stageWiseAdjustment,stageRTx,character,numeric-method} 10 | \title{adjust p-values in a two-stage analysis} 11 | \usage{ 12 | stageWiseAdjustment(object, method, alpha, ...) 13 | 14 | \S4method{stageWiseAdjustment}{stageR,character,numeric}(object, method, alpha, 15 | adjustment = NULL, ...) 16 | 17 | \S4method{stageWiseAdjustment}{stageRTx,character,numeric}(object, method, 18 | alpha, tx2gene, ...) 19 | } 20 | \arguments{ 21 | \item{object}{an object of the \code{\link{stageRClass}} class.} 22 | 23 | \item{method}{Character string indicating the method used for FWER correction in the confirmation stage of the stage-wise analysis. Can be any of \code{"none"}, \code{"holm"}, \code{"dte"}, \code{"dtu"}, \code{"user"}. \code{"none"} will not adjust the p-values in the confirmation stage. \code{"holm"} is an adapted Holm procedure for a stage-wise analysis, where the method takes into account the fact that genes in the confirmation stage have already passed the screening stage, hence the procedure will be more powerful for the most significant p-value as compared to the standard Holm procedure. \code{"dte"} is the adjusted Holm-Shaffer procedure for differential transcript expression analysis. \code{"dtu"} is the adjusted Holm-Shaffer procedure for differential transcript usage. \code{"user"} indicates a user-defined adjustment that should be specified with the \code{adjustment} argument.} 24 | 25 | \item{alpha}{the OFDR on which to control the two-stage analysis.} 26 | 27 | \item{...}{Additional arguments passed to \code{.stageWiseTest}} 28 | 29 | \item{adjustment}{a user-defined adjustment of the confirmation stage p-values. Only applicable when \code{method} is \code{"user"} and ignored otherwise.} 30 | 31 | \item{tx2gene}{Only applicable when \code{method} is \code{"dte"} or \code{"dtu"}. A \code{\link[base]{data.frame}} with transcript IDs in the first columns and gene IDs in the second column. The rownames from \code{pConfirmation} must be contained in the transcript IDs from \code{tx2gene}, and the names from \code{pScreen} must be contained in the gene IDs.} 32 | } 33 | \value{ 34 | A stageR/stageRTx object with stage-wise adjusted p-values. 35 | } 36 | \description{ 37 | This function will adjust p-values according to a hierarchical two-stage testing paradigm. 38 | } 39 | \section{Methods (by class)}{ 40 | \itemize{ 41 | \item \code{object = stageRTx,method = character,alpha = numeric}: Adjust p-values in a two-stage analysis 42 | }} 43 | 44 | \examples{ 45 | pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 46 | names(pScreen)=paste0("gene",1:300) 47 | pConfirmation=matrix(runif(900),nrow=300,ncol=3) 48 | dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 49 | stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 50 | stageRObj <- stageWiseAdjustment(stageRObj, method="holm", alpha=0.05) 51 | getAdjustedPValues(stageRObj, onlySignificantGenes=TRUE, order=TRUE) 52 | } 53 | \references{ 54 | Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 55 | R. Heller, E. Manduchi, G. R. Grant, and W. J. Ewens, "A flexible two-stage procedure for identifying gene sets that are differentially expressed." Bioinformatics (Oxford, England), vol. 25, pp. 1019-25, 2009. 56 | 57 | S. Holm, "A Simple Sequentially Rejective Multiple Test Procedure," Scandinavian Journal of Statistics, vol. 6, no. 2, pp. 65-70, 1979. 58 | J. P. Shaffer, "Modified Sequentially Rejective Multiple Test Procedures," Journal of the American Statistical Association, vol. 81, p. 826, 1986. 59 | } 60 | -------------------------------------------------------------------------------- /tests/testthat/testConfirmationAdjustment.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(stageR) 3 | 4 | context("Test the confirmation p-value correction methods.") 5 | 6 | set.seed(723) 7 | pTest = matrix(runif(150,1e-10,1e-1),nrow=15,ncol=10, dimnames=list(paste0("gene",1:15), paste0("H",1:10))) 8 | pScreen = rep(1e-5,15) ; names(pScreen)=rownames(pTest) 9 | stageRObj = stageR(pScreen=pScreen, pConfirmation=pTest) 10 | 11 | ## Holm 12 | pTestHolm = t(apply(pTest,1,function(row){ 13 | o=order(row) 14 | row=cummax(pmin(row[o]*c(9,9:1),1)) 15 | rowBack=vector(length=length(row)) 16 | rowBack[o]=row 17 | rowBack 18 | })) 19 | colnames(pTestHolm)=paste0("H",1:10) 20 | 21 | test_that("Test that Holm correction is correct",{ 22 | expect_equal(suppressWarnings(getAdjustedPValues(stageWiseAdjustment(stageRObj, method="holm",alpha=0.05), FALSE, FALSE)[,2:11]), pTestHolm) 23 | }) 24 | 25 | ## user 26 | adjustment=1:10 27 | pTestUser = t(apply(pTest,1,function(row){ 28 | o=order(row) 29 | row=cummax(pmin(row[o]*adjustment,1)) 30 | rowBack=vector(length=length(row)) 31 | rowBack[o]=row 32 | rowBack 33 | })) 34 | colnames(pTestUser)=paste0("H",1:10) 35 | 36 | test_that("Test that user correction is correct",{ 37 | expect_equal(suppressWarnings(getAdjustedPValues(stageWiseAdjustment(stageRObj, method="user",alpha=0.05, adjustment=1:10), FALSE, FALSE)[,2:11]), pTestUser) 38 | }) 39 | 40 | ## none 41 | adjustment=rep(1,10) 42 | pTestNone = t(apply(pTest,1,function(row){ 43 | o=order(row) 44 | row=cummax(pmin(row[o]*adjustment,1)) 45 | rowBack=vector(length=length(row)) 46 | rowBack[o]=row 47 | rowBack 48 | })) 49 | colnames(pTestNone)=paste0("H",1:10) 50 | 51 | test_that("Test that none correction is correct",{ 52 | expect_equal(suppressWarnings(getAdjustedPValues(stageWiseAdjustment(stageRObj, method="none",alpha=0.05), FALSE, FALSE)[,2:11]), pTestNone) 53 | }) 54 | 55 | ## DTE 56 | pScreen=pScreen[1:9] 57 | names(pScreen)=paste0("gene",1:9) 58 | pTx=pTest[,1,drop=FALSE] 59 | rownames(pTx)=paste0("transcript",1:15) 60 | tx2gene = data.frame(transcript=paste0("transcript",1:15), gene=paste0("gene",c(rep(c(1,2),each=4),3:9))) 61 | stageRTxObj = stageRTx(pScreen=pScreen, pConfirmation=pTx, tx2gene=tx2gene) 62 | # adjust manually 63 | #gene1 64 | gene1P = pTx[1:4,] 65 | o=order(gene1P) 66 | gene1PAdj <- vector(length=length(gene1P)) 67 | gene1PAdj[o] = cummax(pmin(gene1P[o]*c(3,3,2,1),1)) 68 | #gene2 69 | gene2P = pTx[5:8,] 70 | o=order(gene2P) 71 | gene2PAdj <- vector(length=length(gene2P)) 72 | gene2PAdj[o] = cummax(pmin(gene2P[o]*c(3,3,2,1),1)) 73 | #others 74 | geneOthers=rep(0,7) 75 | allAdjP=unname(c(gene1PAdj, gene2PAdj, geneOthers)) 76 | 77 | test_that("Test that DTE correction is correct",{ 78 | expect_equal(suppressWarnings(getAdjustedPValues(stageWiseAdjustment(stageRTxObj, method="dte",alpha=0.05), FALSE, FALSE))[,"transcript"], allAdjP) 79 | }) 80 | 81 | ## DTU 82 | pScreen=pScreen[1:2] 83 | names(pScreen)=paste0("gene",1:2) 84 | pTx=pTest[1:8,1,drop=FALSE]/10 85 | rownames(pTx)=paste0("transcript",1:8) 86 | tx2gene = data.frame(transcript=paste0("transcript",1:8), gene=paste0("gene",rep(c(1,2),each=4))) 87 | stageRTxObj = stageRTx(pScreen=pScreen, pConfirmation=pTx, tx2gene=tx2gene) 88 | # adjust manually 89 | #gene1 90 | gene1P = pTx[1:4,] 91 | o=order(gene1P) 92 | gene1PAdj = cummax(pmin(gene1P[o]*c(2,2,2,1),1)) 93 | gene1Back=vector(length=length(gene1PAdj)) 94 | gene1Back[o] = gene1PAdj 95 | #gene2 96 | gene2P = pTx[5:8,] 97 | o=order(gene2P) 98 | gene2PAdj = cummax(pmin(gene2P[o]*c(2,2,2,1),1)) 99 | gene2Back=vector(length=length(gene2PAdj)) 100 | gene2Back[o] = gene2PAdj 101 | 102 | allAdjP=unname(c(gene1Back, gene2Back)) 103 | 104 | test_that("Test that DTU correction is correct",{ 105 | expect_equal(suppressWarnings(getAdjustedPValues(stageWiseAdjustment(stageRTxObj, method="dtu",alpha=0.05), FALSE, FALSE))[,"transcript"], allAdjP) 106 | }) 107 | 108 | rm(pTest, pScreen, stageRObj, pTestHolm, pTestUser, pTestNone, pTx, tx2gene, stageRTxObj, gene1P, o, gene1PAdj, gene2P, gene2PAdj, geneOthers, allAdjP, gene1Back, gene2Back) -------------------------------------------------------------------------------- /R/constructors.R: -------------------------------------------------------------------------------- 1 | #' @include stageRClasses.R allGenerics.R 2 | 3 | #' @title Create stageR object 4 | #' @description 5 | #' Constructor function for \code{\link{stageRClass}}. A stageR class is a class used for stage-wise analysis in high throughput settings. 6 | #' In its most basic form, it consists of a vector of p-values for the screening hypothesis and a matrix of p-values for the confirmation hypotheses. 7 | #' @param pScreen A vector of screening hypothesis p-values. 8 | #' @param pConfirmation A matrix of confirmation hypothesis p-values. When constructing a \code{\link{stageRClass}} object, the number of rows should be equal to the length of \code{pScreen}. For a \code{\link{stageRTxClass}} object, the dimensions can be different. 9 | #' @param pScreenAdjusted logical, indicating whether the supplied p-values for the screening hypothesis have already been adjusted for multiplicity according to the FDR. 10 | #' @param ... Additional arguments. 11 | #' @return An instance of an object of the \code{\link{stageRClass}} 12 | #' @references 13 | #' Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 14 | #' @examples 15 | #' # create a \code{\link{stageRClass}} object 16 | #' pScreen <- runif(10) 17 | #' names(pScreen) <- paste0("gene",1:10) 18 | #' pConfirmation <- matrix(runif(30),nrow=10,ncol=3) 19 | #' rownames(pConfirmation) <- paste0("gene",1:10) 20 | #' stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 21 | #' pConfirmationTx <- matrix(runif(10),ncol=1) 22 | #' names(pScreen) <- paste0("gene",rep(1:2,each=5)) 23 | #' stageRObj <- stageRTx(pScreen=pScreen, pConfirmation=pConfirmationTx, tx2gene=data.frame(transcripts=paste0("transcript",1:10),genes=paste0("gene",rep(1:2,each=5)))) 24 | #' @name stageR 25 | #' @rdname stageR 26 | #' @export 27 | #setMethod("stageR", signature=signature(pScreen="numeric", pConfirmation="matrix"), 28 | # definition=function(pScreen, pConfirmation, pScreenAdjusted=FALSE){ 29 | stageR <- function(pScreen, pConfirmation, pScreenAdjusted=FALSE){ 30 | if(length(pScreen)!=nrow(pConfirmation)) 31 | stop("The number of screening hypothesis p-values must be equal to the number of rows in pConfirmation.") 32 | if(!identical(as.character(names(pScreen)),as.character(rownames(pConfirmation)))) 33 | warning("The features (names) in pScreen are not identical to the features (rownames) in pConfirmation.") 34 | stageR <- new("stageR") 35 | stageR@pScreen <- pScreen 36 | stageR@pConfirmation <- pConfirmation 37 | stageR@pScreenAdjusted <- pScreenAdjusted 38 | stageR@adjusted <- FALSE 39 | return(stageR) 40 | } 41 | #) 42 | 43 | #' @title Create stageRTx object. 44 | #' @description 45 | #' Constructor function for \code{\link{stageRTxClass}}. A stageR class is a class used for stage-wise analysis in high throughput settings. 46 | #' In its most basic form, it consists of a vector of p-values for the screening hypothesis, a matrix of p-values for the confirmation hypotheses and a tx2gene object for linking genes to transcripts. 47 | #' @param pScreen A vector of screening hypothesis p-values. 48 | #' @param pConfirmation A matrix of confirmation hypothesis p-values. The number of rows should be equal to the length of \code{pScreen}. 49 | #' @param pScreenAdjusted logical, indicating whether the supplied p-values for the screening hypothesis have already been adjusted for multiplicity according to the FDR. 50 | #' @param tx2gene Only applicable for transcript-level analysis. A \code{\link[base]{data.frame}} with transcript IDs in the first columns and gene IDs in the second column. The rownames from \code{pConfirmation} must be contained in the transcript IDs from \code{tx2gene}, and the names from \code{pScreen} must be contained in the gene IDs. 51 | #' @param ... Additional arguments. 52 | #' @return An instance of an object of the \code{\link{stageRTxClass}} 53 | #' @references 54 | #' Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 55 | #' @examples 56 | #' # create a \code{\link{stageRClass}} object 57 | #' pScreen <- runif(10) 58 | #' names(pScreen) <- paste0("gene",1:10) 59 | #' pConfirmation <- matrix(runif(30),nrow=10,ncol=3) 60 | #' rownames(pConfirmation) <- paste0("gene",1:10) 61 | #' stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 62 | #' pConfirmationTx <- matrix(runif(10),ncol=1) 63 | #' names(pScreen) <- paste0("gene",rep(1:2,each=5)) 64 | #' stageRObj <- stageRTx(pScreen=pScreen, pConfirmation=pConfirmationTx, tx2gene=data.frame(transcripts=paste0("transcript",1:10),genes=paste0("gene",rep(1:2,each=5)))) 65 | #' @name stageRTx 66 | #' @rdname stageRTx 67 | #' @export 68 | stageRTx <- function(pScreen, pConfirmation, pScreenAdjusted=FALSE, tx2gene){ 69 | if(is.null(names(pScreen))) stop("pScreen does not have names, please set the names of the corresponding genes to the pScreen vector.") 70 | if(any(is.na(match(rownames(pConfirmation),tx2gene[,1])))) 71 | stop("not all transcript names in pConfirmation match with a transcript ID from the tx2gene object.") 72 | if(any(is.na(match(names(pScreen),tx2gene[,2])))) 73 | stop("not all gene names in pScreen match with a gene ID from the tx2gene object.") 74 | stageR <- new("stageRTx") 75 | stageR@pScreen <- pScreen 76 | stageR@pConfirmation <- pConfirmation 77 | stageR@pScreenAdjusted <- pScreenAdjusted 78 | stageR@adjusted <- FALSE 79 | stageR@tx2gene <- tx2gene 80 | return(stageR) 81 | } 82 | 83 | setValidity("stageR",function(object){ 84 | if(length(pScreen)!=nrow(pConfirmation)) 85 | message("The number of screening hypothesis p-values must be equal to the number of rows in pConfirmation.") 86 | 87 | if(!identical(as.character(names(pScreen)),as.character(rownames(pConfirmation)))) 88 | message("The features (names) in pScreen are not identical to the features (rownames) in pConfirmation.") 89 | 90 | if(any(is.na(getPConfirmation(object)))) 91 | message("NA confirmation stage p-values are not allowed.") 92 | 93 | }) 94 | 95 | setValidity("stageRTx",function(object){ 96 | if(any(is.na(match(rownames(pConfirmation),tx2gene[,1])))) 97 | message("not all transcript names in pConfirmation match with a transcript ID from the tx2gene object.") 98 | 99 | if(any(is.na(match(names(pScreen),tx2gene[,2])))) 100 | message("not all gene names in pScreen match with a gene ID from the tx2gene object.") 101 | 102 | if(any(is.na(getPConfirmation(object)))) 103 | message("NA confirmation stage p-values are not allowed.") 104 | 105 | }) 106 | 107 | -------------------------------------------------------------------------------- /R/stageRClasses.R: -------------------------------------------------------------------------------- 1 | #' @include constructors.R 2 | #' @import SummarizedExperiment 3 | 4 | #' @title The stageR class 5 | #' 6 | #' @description 7 | #' This class is used for adjusting p-values with stage-wise testing for high-throughput studies. 8 | #' 9 | #' @slot pScreen A vector of p-values for the screening hypothesis. 10 | #' @slot pConfirmation A matrix of p-values for the confirmation hypotheses. 11 | #' @slot adjustedP A matrix of adjusted p-values. This slot should be accessed through \code{\link{getAdjustedPValues,stageR,logical,logical-method}}. Alternatively, significance results can be accessed through \code{\link{getResults,stageR-method}}. 12 | #' @slot method Character string indicating the method used for FWER correction in the confirmation stage of the stage-wise analysis. Can be any of \code{"none"}, \code{"holm"}, \code{"dte"}, \code{"dtu"}, \code{"user"}. \code{"none"} will not adjust the p-values in the confirmation stage. \code{"holm"} is an adapted Holm procedure for a stage-wise analysis, where the method takes into account the fact that genes in the confirmation stage have already passed the screening stage, hence the procedure will be more powerful for the most significant p-value as compared to the standard Holm procedure. \code{"dte"} is the adjusted Holm-Shaffer procedure for differential transcript expression analysis. \code{"dtu"} is the adjusted Holm-Shaffer procedure for differential transcript usage. \code{"user"} indicates a user-defined adjustment that should be specified with the \code{adjustment} argument. 13 | #' @slot alpha the OFDR level on which the stage-wise analysis should be controlled. 14 | #' @slot alphaAdjusted the adjusted significance level to compare against FWER-adjusted p-values of the confirmation stage to decide on significance of the hypothesis test. 15 | #' @slot pScreenAdjusted logical, indicating whether the supplied p-values for the screening hypothesis have already been adjusted for multiplicity according to the FDR. 16 | #' @slot tx2gene matrix with transcript IDs in the first column and gene IDs in the second column to be used for DTE and DTU analysis. All rownames from \code{pConfirmation} should match with a transcript ID and all names from \code{pScreen} should match with a gene ID. 17 | #' @references 18 | #' Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 19 | #' R. Heller, E. Manduchi, G. R. Grant, and W. J. Ewens, "A flexible two-stage procedure for identifying gene sets that are differentially expressed." Bioinformatics (Oxford, England), vol. 25, pp. 1019-25, 2009. 20 | #' S. Holm, "A Simple Sequentially Rejective Multiple Test Procedure," Scandinavian Journal of Statistics, vol. 6, no. 2, pp. 65-70, 1979. 21 | #' J. P. Shaffer, "Modified Sequentially Rejective Multiple Test Procedures," Journal of the American Statistical Association, vol. 81, p. 826, 1986. 22 | #' @aliases stageRClass 23 | #' @name stageRClass 24 | #' @rdname stageRClass 25 | #' @exportClass stageR 26 | setClass("stageR", 27 | contains="RangedSummarizedExperiment", 28 | representation=representation( 29 | pScreen="numeric", 30 | pConfirmation="matrix", 31 | adjustedP="matrix", 32 | method="character", 33 | alpha="numeric", 34 | alphaAdjusted="numeric", 35 | adjusted="logical", 36 | pScreenAdjusted="logical" 37 | ) 38 | ) 39 | 40 | #' @name stageRClass 41 | #' @aliases stageRTxClass 42 | #' @rdname stageRClass 43 | #' @exportClass stageRTx 44 | setClass("stageRTx", 45 | contains="RangedSummarizedExperiment", 46 | representation=representation( 47 | pScreen="numeric", 48 | pConfirmation="matrix", 49 | adjustedP="matrix", 50 | method="character", 51 | alpha="numeric", 52 | alphaAdjusted="numeric", 53 | adjusted="logical", 54 | pScreenAdjusted="logical", 55 | tx2gene="data.frame" 56 | ) 57 | ) 58 | 59 | 60 | 61 | 62 | 63 | #' #' @title The stageR class 64 | #' #' 65 | #' #' @description 66 | #' #' This class is used for adjusting p-values with stage-wise testing for high-throughput studies. 67 | #' #' 68 | #' #' @slot pScreen A vector of p-values for the screening hypothesis. 69 | #' #' @slot pConfirmation A matrix of p-values for the confirmation hypotheses. 70 | #' #' @slot adjustedP A matrix of adjusted p-values. This slot should be accessed through \code{\link{getAdjustedPValues,stageR,logical,logical-method}}. Alternatively, significance results can be accessed through \code{\link{getResults,stageR-method}}. 71 | #' #' @slot method Character string indicating the method used for FWER correction in the confirmation stage of the stage-wise analysis. Can be any of \code{"none"}, \code{"holm"}, \code{"dte"}, \code{"dtu"}, \code{"user"}. \code{"none"} will not adjust the p-values in the confirmation stage. \code{"holm"} is an adapted Holm procedure for a stage-wise analysis, where the method takes into account the fact that genes in the confirmation stage have already passed the screening stage, hence the procedure will be more powerful for the most significant p-value as compared to the standard Holm procedure. \code{"dte"} is the adjusted Holm-Shaffer procedure for differential transcript expression analysis. \code{"dtu"} is the adjusted Holm-Shaffer procedure for differential transcript usage. \code{"user"} indicates a user-defined adjustment that should be specified with the \code{adjustment} argument. 72 | #' #' @slot alpha the OFDR level on which the stage-wise analysis should be controlled. 73 | #' #' @slot alphaAdjusted the adjusted significance level to compare against FWER-adjusted p-values of the confirmation stage to decide on significance of the hypothesis test. 74 | #' #' @slot pScreenAdjusted logical, indicating whether the supplied p-values for the screening hypothesis have already been adjusted for multiplicity according to the FDR. 75 | #' #' @slot tx2gene matrix with transcript IDs in the first column and gene IDs in the second column to be used for DTE and DTU analysis. All rownames from \code{pConfirmation} should match with a transcript ID and all names from \code{pScreen} should match with a gene ID. 76 | #' #' @references 77 | #' #' Van den Berge K., Soneson C., Robinson M.D., Clement L. "A general and powerful stage-wise testing procedure for differential expression and differential transcript usage." \url{http://biorxiv.org/content/early/2017/02/16/109082} 78 | #' #' R. Heller, E. Manduchi, G. R. Grant, and W. J. Ewens, "A flexible two-stage procedure for identifying gene sets that are differentially expressed." Bioinformatics (Oxford, England), vol. 25, pp. 1019-25, 2009. 79 | #' #' S. Holm, "A Simple Sequentially Rejective Multiple Test Procedure," Scandinavian Journal of Statistics, vol. 6, no. 2, pp. 65-70, 1979. 80 | #' #' J. P. Shaffer, "Modified Sequentially Rejective Multiple Test Procedures," Journal of the American Statistical Association, vol. 81, p. 826, 1986. 81 | #' #' @aliases stageRClass 82 | #' #' @name stageRClass 83 | #' #' @rdname stageRClass 84 | #' #' @exportClass stageR 85 | #' setClass("stageR",slots=c(pScreen="numeric", pConfirmation="matrix", adjustedP="matrix", method="character", alpha="numeric", alphaAdjusted="numeric", adjusted="logical", pScreenAdjusted="logical")) 86 | 87 | #' #' @name stageRClass 88 | #' #' @aliases stageRTxClass 89 | #' #' @rdname stageRClass 90 | #' #' @exportClass stageRTx 91 | #' setClass("stageRTx",slots=c(pScreen="numeric", pConfirmation="matrix", adjustedP="matrix", method="character", alpha="numeric", alphaAdjusted="numeric", adjusted="logical", pScreenAdjusted="logical", tx2gene="data.frame")) 92 | 93 | 94 | -------------------------------------------------------------------------------- /vignettes/stageR.bib: -------------------------------------------------------------------------------- 1 | Automatically generated by Mendeley Desktop 1.16.3 2 | Any changes to this file will be lost if it is regenerated by Mendeley. 3 | 4 | BibTeX export options can be customized via Preferences -> BibTeX in Mendeley Desktop 5 | 6 | @article{Heller2009, 7 | abstract = {MOTIVATION Microarray data analysis has expanded from testing individual genes for differential expression to testing gene sets for differential expression. The tests at the gene set level may focus on multivariate expression changes or on the differential expression of at least one gene in the gene set. These tests may be powerful at detecting subtle changes in expression, but findings at the gene set level need to be examined further to understand whether they are informative and if so how. RESULTS We propose to first test for differential expression at the gene set level but then proceed to test for differential expression of individual genes within discovered gene sets. We introduce the overall false discovery rate (OFDR) as an appropriate error rate to control when testing multiple gene sets and genes. We illustrate the advantage of this procedure over procedures that only test gene sets or individual genes.}, 8 | author = {Heller, Ruth and Manduchi, Elisabetta and Grant, Gregory R and Ewens, Warren J}, 9 | doi = {10.1093/bioinformatics/btp076}, 10 | file = {:Users/koenvandenberge/Library/Application Support/Mendeley Desktop/Downloaded/Heller et al. - 2009 - A flexible two-stage procedure for identifying gene sets that are differentially expressed.pdf:pdf}, 11 | issn = {1367-4811}, 12 | journal = {Bioinformatics (Oxford, England)}, 13 | month = {apr}, 14 | number = {8}, 15 | pages = {1019--25}, 16 | pmid = {19213738}, 17 | title = {{A flexible two-stage procedure for identifying gene sets that are differentially expressed.}}, 18 | url = {http://www.ncbi.nlm.nih.gov/pubmed/19213738}, 19 | volume = {25}, 20 | year = {2009} 21 | } 22 | @article{Ren2012, 23 | abstract = {There are remarkable disparities among patients of different races with prostate cancer; however, the mechanism underlying this difference remains unclear. Here, we present a comprehensive landscape of the transcriptome profiles of 14 primary prostate cancers and their paired normal counterparts from the Chinese population using RNA-seq, revealing tremendous diversity across prostate cancer transcriptomes with respect to gene fusions, long noncoding RNAs (long ncRNA), alternative splicing and somatic mutations. Three of the 14 tumors (21.4{\%}) harbored a TMPRSS2-ERG fusion, and the low prevalence of this fusion in Chinese patients was further confirmed in an additional tumor set (10/54=18.5{\%}). Notably, two novel gene fusions, CTAGE5-KHDRBS3 (20/54=37{\%}) and USP9Y-TTTY15 (19/54=35.2{\%}), occurred frequently in our patient cohort. Further systematic transcriptional profiling identified numerous long ncRNAs that were differentially expressed in the tumors. An analysis of the correlation between expression of long ncRNA and genes suggested that long ncRNAs may have functions beyond transcriptional regulation. This study yielded new insights into the pathogenesis of prostate cancer in the Chinese population.}, 24 | author = {Ren, Shancheng and Peng, Zhiyu and Mao, Jian-Hua and Yu, Yongwei and Yin, Changjun and Gao, Xin and Cui, Zilian and Zhang, Jibin and Yi, Kang and Xu, Weidong and Chen, Chao and Wang, Fubo and Guo, Xinwu and Lu, Ji and Yang, Jun and Wei, Min and Tian, Zhijian and Guan, Yinghui and Tang, Liang and Xu, Chuanliang and Wang, Linhui and Gao, Xu and Tian, Wei and Wang, Jian and Yang, Huanming and Wang, Jun and Sun, Yinghao}, 25 | doi = {10.1038/cr.2012.30}, 26 | issn = {1748-7838}, 27 | journal = {Cell research}, 28 | month = {may}, 29 | number = {5}, 30 | pages = {806--21}, 31 | pmid = {22349460}, 32 | title = {{RNA-seq analysis of prostate cancer in the Chinese population identifies recurrent gene fusions, cancer-associated long noncoding RNAs and aberrant alternative splicings.}}, 33 | url = {http://www.ncbi.nlm.nih.gov/pubmed/22349460 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC3343650}, 34 | volume = {22}, 35 | year = {2012} 36 | } 37 | @article{Frazee2011, 38 | abstract = {RNA sequencing is a flexible and powerful new approach for measuring gene, exon, or isoform expression. To maximize the utility of RNA sequencing data, new statistical methods are needed for clustering, differential expression, and other analyses. A major barrier to the development of new statistical methods is the lack of RNA sequencing datasets that can be easily obtained and analyzed in common statistical software packages such as R. To speed up the development process, we have created a resource of analysis-ready RNA-sequencing datasets. ReCount is an online resource of RNA-seq gene count tables and auxilliary data. Tables were built from raw RNA sequencing data from 18 different published studies comprising 475 samples and over 8 billion reads. Using the Myrna package, reads were aligned, overlapped with gene models and tabulated into gene-by-sample count tables that are ready for statistical analysis. Count tables and phenotype data were combined into Bioconductor ExpressionSet objects for ease of analysis. ReCount also contains the Myrna manifest files and R source code used to process the samples, allowing statistical and computational scientists to consider alternative parameter values. By combining datasets from many studies and providing data that has already been processed from. fastq format into ready-to-use. RData and. txt files, ReCount facilitates analysis and methods development for RNA-seq count data. We anticipate that ReCount will also be useful for investigators who wish to consider cross-study comparisons and alternative normalization strategies for RNA-seq.}, 39 | author = {Frazee, Alyssa C and Langmead, Ben and Leek, Jeffrey T}, 40 | doi = {10.1186/1471-2105-12-449}, 41 | journal = {BMC Bioinformatics}, 42 | number = {1}, 43 | pages = {449}, 44 | publisher = {BioMed Central}, 45 | title = {{ReCount: A multi-experiment resource of analysis-ready RNA-seq gene count datasets}}, 46 | url = {http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-449}, 47 | volume = {12}, 48 | year = {2011} 49 | } 50 | @article{Hammer2010, 51 | abstract = {mRNA-seq is a paradigm-shifting technology because of its superior sensitivity and dynamic range and its potential to capture transcriptomes in an agnostic fashion, i.e., independently of existing genome annotations. Implementation of the agnostic approach, however, has not yet been fully achieved. In particular, agnostic mapping of pre-mRNA splice sites has not been demonstrated. The present study pursued dual goals: (1) to advance mRNA-seq bioinformatics toward unbiased transcriptome capture and (2) to demonstrate its potential for discovery in neuroscience by applying the approach to an in vivo model of neurological disease. We have performed mRNA-seq on the L4 dorsal root ganglion (DRG) of rats with chronic neuropathic pain induced by spinal nerve ligation (SNL) of the neighboring (L5) spinal nerve. We found that 12.4{\%} of known genes were induced and 7{\%} were suppressed in the dysfunctional (but anatomically intact) L4 DRG 2 wk after SNL. These alterations persisted chronically (2 mo). Using a read cluster classifier with strong test characteristics (ROC area 97{\%}), we discovered 10,464 novel exons. A new algorithm for agnostic mapping of pre-mRNA splice junctions (SJs) achieved a precision of 97{\%}. Integration of information from all mRNA-seq read classes including SJs led to genome reannotations specifically relevant for the species used (rat), the anatomical site studied (DRG), and the neurological disease considered (pain); for example, a 64-exon coreceptor for the nociceptive transmitter substance P was identified, and 21.9{\%} of newly discovered exons were shown to be dysregulated. Thus, mRNA-seq with agnostic analysis methods appears to provide a highly productive approach for in vivo transcriptomics in the nervous system.}, 52 | author = {Hammer, Paul and Banck, Michaela S and Amberg, Ronny and Wang, Cheng and Petznick, Gabriele and Luo, Shujun and Khrebtukova, Irina and Schroth, Gary P and Beyerlein, Peter and Beutler, Andreas S}, 53 | doi = {10.1101/gr.101204.109}, 54 | issn = {1549-5469}, 55 | journal = {Genome research}, 56 | month = {jun}, 57 | number = {6}, 58 | pages = {847--60}, 59 | pmid = {20452967}, 60 | title = {{mRNA-seq with agnostic splice site discovery for nervous system transcriptomics tested in chronic pain.}}, 61 | url = {http://www.ncbi.nlm.nih.gov/pubmed/20452967 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC2877581}, 62 | volume = {20}, 63 | year = {2010} 64 | } 65 | @article{Pimentel2016b, 66 | abstract = {Increased emphasis on reproducibility of published research in the last few years has led to the large-scale archiving of sequencing data. While this data can, in theory, be used to reproduce results in papers, it is difficult to use in practice. We introduce a series of tools for processing and analyzing RNA-Seq data in the Sequence Read Archive, that together have allowed us to build an easily extendable resource for analysis of data underlying published papers. Our system makes the exploration of data easily accessible and usable without technical expertise. Our database and associated tools can be accessed at The Lair: http://pachterlab.github.io/lair .}, 67 | author = {Pimentel, Harold and Sturmfels, Pascal and Bray, Nicolas and Melsted, P{\'{a}}ll and Pachter, Lior}, 68 | doi = {10.1186/s12859-016-1357-2}, 69 | file = {:Users/koenvandenberge/Library/Application Support/Mendeley Desktop/Downloaded/Pimentel et al. - 2016 - The Lair a resource for exploratory analysis of published RNA-Seq data(2).pdf:pdf}, 70 | issn = {1471-2105}, 71 | journal = {BMC bioinformatics}, 72 | keywords = {Exploratory data analysis,Interactive visualization,Kallisto,RNA-Seq,Reanalysis,Reproducibility,Sequence read archive,Shiny,Sleuth}, 73 | month = {dec}, 74 | number = {1}, 75 | pages = {490}, 76 | pmid = {27905880}, 77 | publisher = {BioMed Central}, 78 | title = {{The Lair: a resource for exploratory analysis of published RNA-Seq data.}}, 79 | url = {http://www.ncbi.nlm.nih.gov/pubmed/27905880 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC5131447}, 80 | volume = {17}, 81 | year = {2016} 82 | } 83 | @article{Bourgon2010, 84 | abstract = {With high-dimensional data, variable-by-variable statistical testing is often used to select variables whose behavior differs across conditions. Such an approach requires adjustment for multiple testing, which can result in low statistical power. A two-stage approach that first filters variables by a criterion independent of the test statistic, and then only tests variables which pass the filter, can provide higher power. We show that use of some filter/test statistics pairs presented in the literature may, however, lead to loss of type I error control. We describe other pairs which avoid this problem. In an application to microarray data, we found that gene-by-gene filtering by overall variance followed by a t-test increased the number of discoveries by 50{\%}. We also show that this particular statistic pair induces a lower bound on fold-change among the set of discoveries. Independent filtering-using filter/test pairs that are independent under the null hypothesis but correlated under the alternative-is a general approach that can substantially increase the efficiency of experiments.}, 85 | author = {Bourgon, Richard and Gentleman, Robert and Huber, Wolfgang}, 86 | doi = {10.1073/pnas.0914005107}, 87 | issn = {1091-6490}, 88 | journal = {Proceedings of the National Academy of Sciences of the United States of America}, 89 | month = {may}, 90 | number = {21}, 91 | pages = {9546--51}, 92 | pmid = {20460310}, 93 | title = {{Independent filtering increases detection power for high-throughput experiments.}}, 94 | url = {http://www.ncbi.nlm.nih.gov/pubmed/20460310 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC2906865}, 95 | volume = {107}, 96 | year = {2010} 97 | } 98 | @article{Robinson2010, 99 | abstract = {The fine detail provided by sequencing-based transcriptome surveys suggests that RNA-seq is likely to become the platform of choice for interrogating steady state RNA. In order to discover biologically important changes in expression, we show that normalization continues to be an essential step in the analysis. We outline a simple and effective method for performing normalization and show dramatically improved results for inferring differential expression in simulated and publicly available data sets.}, 100 | author = {Robinson, Mark D and Oshlack, Alicia}, 101 | doi = {10.1186/gb-2010-11-3-r25}, 102 | journal = {Genome Biology}, 103 | number = {3}, 104 | pages = {R25}, 105 | publisher = {BioMed Central}, 106 | title = {{A scaling normalization method for differential expression analysis of RNA-seq data}}, 107 | url = {http://genomebiology.biomedcentral.com/articles/10.1186/gb-2010-11-3-r25}, 108 | volume = {11}, 109 | year = {2010} 110 | } 111 | @article{Law2014, 112 | abstract = {New normal linear modeling strategies are presented for analyzing read counts from RNA-seq experiments. The voom method estimates the mean-variance relationship of the log-counts, generates a precision weight for each observation and enters these into the limma empirical Bayes analysis pipeline. This opens access for RNA-seq analysts to a large body of methodology developed for microarrays. Simulation studies show that voom performs as well or better than count-based RNA-seq methods even when the data are generated according to the assumptions of the earlier methods. Two case studies illustrate the use of linear modeling and gene set testing methods.}, 113 | author = {Law, Charity W and Chen, Yunshun and Shi, Wei and Smyth, Gordon K}, 114 | doi = {10.1186/gb-2014-15-2-r29}, 115 | file = {:Users/koenvandenberge/Library/Application Support/Mendeley Desktop/Downloaded/Law et al. - 2014 - voom Precision weights unlock linear model analysis tools for RNA-seq read counts.pdf:pdf}, 116 | issn = {1465-6914}, 117 | journal = {Genome biology}, 118 | keywords = {Algorithms,Base Sequence,Bayes Theorem,Computer Simulation,Gene Expression Profiling,High-Throughput Nucleotide Sequencing,High-Throughput Nucleotide Sequencing: methods,Linear Models,RNA,RNA: genetics,Sequence Analysis, RNA}, 119 | month = {jan}, 120 | number = {2}, 121 | pages = {R29}, 122 | pmid = {24485249}, 123 | title = {{voom: Precision weights unlock linear model analysis tools for RNA-seq read counts.}}, 124 | url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4053721{\&}tool=pmcentrez{\&}rendertype=abstract}, 125 | volume = {15}, 126 | year = {2014} 127 | } 128 | @article{Shaffer1986, 129 | author = {Shaffer, Juliet Popper}, 130 | doi = {10.2307/2289016}, 131 | issn = {01621459}, 132 | journal = {Journal of the American Statistical Association}, 133 | month = {sep}, 134 | number = {395}, 135 | pages = {826}, 136 | title = {{Modified Sequentially Rejective Multiple Test Procedures}}, 137 | url = {http://www.jstor.org/stable/2289016?origin=crossref}, 138 | volume = {81}, 139 | year = {1986} 140 | } 141 | @article{Bray2016, 142 | author = {Bray, Nicolas L and Pimentel, Harold and Melsted, P{\'{a}}ll and Pachter, Lior}, 143 | doi = {10.1038/nbt.3519}, 144 | issn = {1087-0156}, 145 | journal = {Nature Biotechnology}, 146 | month = {apr}, 147 | number = {5}, 148 | pages = {525--527}, 149 | publisher = {Nature Research}, 150 | title = {{Near-optimal probabilistic RNA-seq quantification}}, 151 | url = {http://www.nature.com/doifinder/10.1038/nbt.3519}, 152 | volume = {34}, 153 | year = {2016} 154 | } 155 | @article{Holm1979, 156 | author = {Holm, Sture}, 157 | journal = {Scandinavian Journal of Statistics}, 158 | number = {2}, 159 | pages = {65--70}, 160 | title = {{A Simple Sequentially Rejective Multiple Test Procedure}}, 161 | volume = {6}, 162 | year = {1979} 163 | } -------------------------------------------------------------------------------- /vignettes/stageRVignette.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "stageR: stage-wise analysis of high-throughput gene expression data in R" 3 | author: "Koen Van den Berge and Lieven Clement" 4 | date: "`r Sys.Date()`" 5 | bibliography: stageR.bib 6 | output: 7 | BiocStyle::html_document: 8 | toc: true 9 | vignette: > 10 | %\VignetteEncoding{UTF-8} 11 | %\VignetteEngine{knitr::rmarkdown} 12 | %\VignetteIndexEntry{stageR: stage-wise analysis of high-throughput gene expression data in R} 13 | --- 14 | 15 | This vignette describes how to use the stageR package that has been developed for stage-wise analysis of high throughput gene expression data in R. A stage-wise analysis was shown to be beneficial in terms of biological interpretation and statistical performance when multiple hypotheses per gene are of interest. 16 | The stage-wise analysis has been adopted from [@Heller2009] and consists of a screening stage and a confirmation stage. In the screening stage, genes are screened by calculating p-values that aggregate evidence across the different hypotheses of interest for the gene. The screening p-values are then adjusted for FDR control after which significance of the screening hypothesis is assessed. 17 | In the confirmation stage, only genes passing the screening stage are considered for analysis. For those genes, every hypothesis of interest is assessed separately and multiple testing correction is performed across hypotheses within a gene to control the FWER on the BH-adjusted significance level of the screening stage. 18 | `stageR` provides an automated way to perform stage-wise testing, given p-values for the screening and confirmation stages. A number of FWER control procedures that take into account the logical relations among the hypotheses are implemented. Since the logical relations may be specific to the experiment, the user can also specify an adjustment deemed appropriate. 19 | 20 | The vignette analyses two datasets. The Hammer dataset [@Hammer2010] is a differential gene expression analysis for an experiment with a complex design. This type of analyses are supported by the `stageR` class. The Ren dataset [@Ren2012] analyses differential transcript usage (DTU) in tumoral versus normal tissue in Chinese patients. Transcript-level analyses are supported by the `stageRTx` class. 21 | 22 | # Installing and loading the package 23 | 24 | The release version of the package is hosted on Bioconductor, and can be installed with the following code 25 | 26 | ```{r} 27 | #if (!requireNamespace("BiocManager", quietly=TRUE)) 28 | # install.packages("BiocManager") 29 | #BiocManager::install("stageR") 30 | ``` 31 | 32 | The development version of the package is hosted on GitHub and can be installed with the `devtools` library using `devtools::install_github("statOmics/stageR")`. 33 | 34 | After installing, we will load the package. 35 | 36 | ```{r} 37 | library(stageR) 38 | ``` 39 | 40 | 41 | # Differential gene expression: Hammer dataset 42 | 43 | ```{r,echo=TRUE,warning=FALSE} 44 | library(edgeR) ; library(Biobase) ; library(limma) ; library(utils) ; library(DEXSeq) 45 | ``` 46 | 47 | As a case study for differential gene expression analysis, we analyse the Hammer dataset [@Hammer2010]. The dataset is provided with the stageR package and was originally downloaded from the ReCount project [website](http://bowtie-bio.sourceforge.net/recount) [@Frazee2011]. 48 | 49 | ```{r} 50 | data(hammer.eset, package="stageR") 51 | eset <- hammer.eset ; rm(hammer.eset) 52 | ``` 53 | 54 | The Hammer experiment investigated the effect of a spinal nerve ligation (SNL) versus control samples in rats at two weeks and two months after treatment. For every time $\times$ treatment combination, 2 biological replicates were used. The hypotheses of interest are 55 | 56 | - the treatment effect at the first timepoint, 57 | - the treatment effect at the second timepoint and 58 | - assessing whether the effect of the treatment is different between the two timepoints (i.e. the treatment-time interaction) 59 | 60 | We use a contrast for the differential expression at the first and second timepoint and a difference in fold change between the two timepoints, respectively. 61 | Therefore we create a design matrix consisting of two timepoints, two treatments and two biological replicates in every treatment $\times$ time combination. Note there has been a typo in the phenoData, so we will correct this first. 62 | 63 | ```{r} 64 | pData(eset)$Time #typo. Will do it ourself 65 | time <- factor(rep(c("mo2","w2"),each=4),levels=c("w2","mo2")) 66 | pData(eset)$protocol 67 | treat <- factor(c("control","control","SNL","SNL","control","control","SNL","SNL"),levels=c("control","SNL")) 68 | design <- model.matrix(~time*treat) 69 | rownames(design) = paste0(time,treat,rep(1:2,4)) 70 | colnames(design)[4] = "timeMo2xTreatSNL" 71 | design 72 | ``` 73 | 74 | We perform indpendent filtering [@Bourgon2010] of the genes and retain genes that are expressed with at least 2 counts per million in 2 samples. 75 | The data is then normalised with TMM normalisation [@Robinson2010] to correct for differences in sequencing depth and RNA population between the samples. 76 | 77 | ```{r} 78 | cpmOffset <- 2 79 | keep <- rowSums(cpm(exprs(eset))>cpmOffset)>=2 #2cpm in 2 samples 80 | dge <- DGEList(exprs(eset)[keep,]) 81 | colnames(dge) = rownames(design) 82 | dge <- calcNormFactors(dge) 83 | ``` 84 | 85 | ## Conventional analysis 86 | 87 | We will first analyse the data with limma-voom [@Law2014] in a standard way: the three contrasts are assessed separately on an FDR level of $5\%$. 88 | 89 | ```{r} 90 | ## regular analysis 91 | voomObj <- voom(dge,design,plot=TRUE) 92 | fit <- lmFit(voomObj,design) 93 | contrast.matrix <- makeContrasts(treatSNL, treatSNL+timeMo2xTreatSNL, timeMo2xTreatSNL, levels=design) 94 | fit2 <- contrasts.fit(fit, contrast.matrix) 95 | fit2 <- eBayes(fit2) 96 | res <- decideTests(fit2) 97 | summary.TestResults(res) #nr of significant up-/downregulated genes 98 | colSums(summary.TestResults(res)[c(1,3),]) #total nr of significant genes 99 | ``` 100 | 101 | The conventional analysis does not find any genes that have a different effect of the treatment between the two timepoints (i.e. the interaction effect test), while many genes are differentially expressed between treatment and control within every timepoint. 102 | 103 | To get a global picture of the effect of SNL on the transcriptome, we can check how many genes are significantly altered following SNL. 104 | 105 | ```{r} 106 | uniqueGenesRegular <- which(res[,1]!=0 | res[,2]!=0 | res[,3]!=0) 107 | length(uniqueGenesRegular) #total nr of significant genes 108 | ``` 109 | 110 | In total, `r length(uniqueGenesRegular)` genes are found to be differentially expressed following a spinal nerve ligation. However, FDR was only controlled at the contrast level and not at the gene level so we cannot state a target FDR level together with this number. 111 | 112 | ## Stage-wise analysis 113 | 114 | The stage-wise analysis first considers an omnibus test that tests whether any of the three contrasts are significant, i.e. it tests whether there has been any effect of the treatment whatsoever. 115 | For the screening hypothesis, we use the `topTableF` function from the `limma` package to perform an F-test across the three contrasts. The screening hypothesis p-values are then stored in the vector `pScreen`. 116 | 117 | ```{r} 118 | alpha <- 0.05 119 | nGenes <- nrow(dge) 120 | tableF <- topTableF(fit2, number=nGenes, sort.by="none") #screening hypothesis 121 | pScreen <- tableF$P.Value 122 | names(pScreen) = rownames(tableF) 123 | ``` 124 | 125 | In the confirmation stage, every contrast is assessed separately. The confirmation stage p-values are adjusted to control the FWER across the hypotheses within a gene and are subsequently corrected to the BH-adjusted significance level of the screening stage. This allows a direct comparison of the adjusted p-values to the provided significance level `alpha` for both screening and confirmation stage adjusted p-values. The function `stageR` constructs an object from the `stageR` class and requires a (preferably named) vector of p-values for the screening hypothesis `pScreen` and a (preferably named) matrix of p-values for the confirmation stage `pConfirmation` with columns corresponding to the different contrasts of interest. Note that the rows in `pConfirmation` correspond to features (genes) and the features should be identically sorted in `pScreen` and `pConfirmation`. The constructor function will check whether the length of `pScreen` is identical to the number of rows in `pConfirmation` and return an error if this is not the case. Finally, the `pScreenAdjusted` argument specifies whether the screening p-values have already been adjusted according to FDR control. 126 | 127 | ```{r} 128 | pConfirmation <- sapply(1:3,function(i) topTable(fit2, coef=i, number=nGenes, sort.by="none")$P.Value) 129 | dimnames(pConfirmation) <- list(rownames(fit2),c("t1","t2","t1t2")) 130 | stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation, pScreenAdjusted=FALSE) 131 | ``` 132 | 133 | The function `stageWiseAdjustment` then adjusts the p-values according to a stage-wise analysis. The `method` argument specifies the FWER correction procedure to be used in the confirmation stage. More details on the different methods can be found in the help file for `stageWiseAdjustment`. The `alpha` argument specifies the target OFDR level that is used for controlling the fraction of false positive genes across all rejected genes over the entire stage-wise testing procedure. The adjusted p-values for genes that did not pass the screening stage are by default set to `NA`. 134 | 135 | Note that when a gene passed the screening hypothesis in the Hammer experiment, only one null hypothesis can still be true: there has to be DE at timepoint 1 or timepoint 2; if the DE only occurs on one timepoint there also exist an interaction; if DE occurs at both timepoints, the $H_0$ of no interaction can still be true. Thus, according to Shaffer's MSRB procedure [@Shaffer1986], no correction is required in the confirmation stage for this experiment to control the FWER. This can be specified with the `method="none"` argument. 136 | 137 | 138 | ```{r} 139 | stageRObj <- stageWiseAdjustment(object=stageRObj, method="none", alpha=0.05) 140 | ``` 141 | 142 | We can explore the results of the stage-wise analysis by querying the object returned by `stageWiseAdjustment`. **Note that the confirmation stage adjusted p-values returned by the function are only valid for the OFDR level provided. If a different OFDR level is of interest, the stage-wise testing adjustment of p-values should be re-run entirely with the other OFDR level specified in `stageWiseAdjustment`.** The adjusted p-values from the confirmation stage can be accessed with the `getAdjustedPValues` function 143 | 144 | ```{r} 145 | head(getAdjustedPValues(stageRObj, onlySignificantGenes=FALSE, order=FALSE)) 146 | head(getAdjustedPValues(stageRObj, onlySignificantGenes=TRUE, order=TRUE)) 147 | ``` 148 | 149 | and may either return all p-values or only those from the significant genes, as specified by the `onlySignificantGenes` argument which can then be ordered or not as specified by the `order` argument. 150 | 151 | Finally, the `getResults` function returns a binary matrix where rows correspond to features and columns correspond to hypotheses, including the screening hypothesis. For every feature $\times$ hypothesis combination, it indicates whether the test is significant (1) or not (0) according to the stage-wise testing procedure. 152 | 153 | ```{r} 154 | res <- getResults(stageRObj) 155 | head(res) 156 | colSums(res) #stage-wise analysis results 157 | ``` 158 | 159 | The `adjustment` argument from the `stageWiseAdjustment` function allows the user to specify the FWER adjustment correction. It requires a numeric vector of the same length as the number of columns in `pConfirmation`. The first element of the vector is the adjustment for the most significant p-value of the gene, the second element for the second most significant p-value etc. Since the Hammer dataset did not require any adjustment, identical results are obtained when manually specifying the adjustments to equal $1$. 160 | 161 | ```{r} 162 | stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation, pScreenAdjusted=FALSE) 163 | adjustedPSW <- stageWiseAdjustment(object=stageRObj, method="user", alpha=0.05, adjustment=c(1,1,1)) 164 | res <- getResults(adjustedPSW) 165 | colSums(res) 166 | ``` 167 | 168 | # Differential transcript expression/usage 169 | 170 | Multiple hypotheses of interest per gene also arise in transcript-level studies, where the different hypotheses correspond to the different isoforms from a gene. 171 | We analyse differential transcript usage for a case study that investigated expression in prostate cancer tumoral tissue versus normal tissue in 14 Chinese patients [@Ren2012]. 172 | The raw sequences have been preprocessed with kallisto [@Bray2016] and transcript-level abundance estimates can be downloaded from The Lair project [@Pimentel2016b] [website](http://pachterlab.github.io/lair/). We used the unnormalized, unfiltered abundances for the analysis. 173 | A subset of the dataset comes with the `stageR` package and can be accessed with `data(esetProstate)` after loading `stageR`. The `ExpressionSet` contains the metadata for the samples in `pData(esetProstate)` and corresponding gene identifiers for the transcripts are stored in `fData(esetProstate)`. The dataset contains 945 transcripts from 456 genes. 174 | 175 | ```{r} 176 | data("esetProstate", package="stageR") #from stageR package 177 | head(pData(esetProstate)) 178 | head(fData(esetProstate)) 179 | ``` 180 | 181 | We will perform some basic data exploration on the transcripts in the dataset. Since the dataset was preprocessed for the purposes of this vignette, every gene has at least two transcripts, and all transcripts are expressed in at least 1 sample. 182 | 183 | 184 | ```{r} 185 | tx2gene <- fData(esetProstate) 186 | colnames(tx2gene) <- c("transcript","gene") 187 | barplot(table(table(tx2gene$gene)), main="Distribution of number of tx per gene") 188 | 189 | #the dataset contains 190 | length(unique(tx2gene$gene)) #nr genes 191 | median(table(as.character(tx2gene$gene))) #median nr of tx/gene 192 | ``` 193 | 194 | ## Conventional analysis 195 | 196 | We will show how to use the `stageR` package to analyse DTU with a stage-wise approach. We start with a regular DEXseq analysis to obtain p-values for every transcript and q-values for every gene. Since both control and tumoral tissue are derived from the same patient for all 14 patients, we add a block effect for the patient to account for the correlation between samples within every patient. 197 | 198 | ```{r} 199 | ### regular DEXSeq analysis 200 | sampleData <- pData(esetProstate) 201 | geneForEachTx <- tx2gene[match(rownames(exprs(esetProstate)),tx2gene[,1]),2] 202 | dxd <- DEXSeqDataSet(countData = exprs(esetProstate), 203 | sampleData = sampleData, 204 | design = ~ sample + exon + patient + condition:exon, 205 | featureID = rownames(esetProstate), 206 | groupID = as.character(geneForEachTx)) 207 | dxd <- estimateSizeFactors(dxd) 208 | dxd <- estimateDispersions(dxd) 209 | dxd <- testForDEU(dxd, reducedModel=~ sample + exon + patient) 210 | dxr <- DEXSeqResults(dxd) 211 | qvalDxr <- perGeneQValue(dxr) 212 | ``` 213 | 214 | ## Stage-wise analysis 215 | 216 | The code above is a conventional `DEXSeq` analysis for analysing differential transcript usage. It would proceed by either assessing the significant genes according to the gene-wise q-values or by assessing the significant transcripts according to the transcript-level p-values, after adjustment for multiple testing. Performing and interpreting both analyses does not provide appropriate FDR control and thus should be avoided. However, interpretation on the gene level combined with transcript-level results can provide useful biological insights and this can be achieved through stage-wise testing. In the following code, we show how to automatically perform a stage-wise analysis using `stageR`. We start by constructing 217 | 218 | - a named vector of gene-wise q-values `pScreen` 219 | - a named matrix with transcript-level p-values `pConfirmation` 220 | - a `data.frame` with transcript identifiers and corresponding gene identifiers `tx2gene` 221 | 222 | These three objects provide everything we need to construct an instance from the `stageRTx` class for the stage-wise analysis. Note that a different class and thus a different constructor function is used for transcript-level analyses in comparison to DE analysis for complex designs. 223 | 224 | ```{r} 225 | pConfirmation <- matrix(dxr$pvalue,ncol=1) 226 | dimnames(pConfirmation) <- list(c(dxr$featureID),c("transcript")) 227 | pScreen <- qvalDxr 228 | tx2gene <- fData(esetProstate) 229 | ``` 230 | 231 | Next we build an object from the `stageRTx` class and indicate that the screening hypothesis p-values were already adjusted by setting `pScreenAdjusted=TRUE`. Similar as in the DGE example, we port this object to the `stageWiseAdjustment` function for correcting the p-values. We control the analysis on a $5\%$ target OFDR (`alpha=0.05`). `method="dtu"` indicates the adapted Holm-Shaffer FWER correction that was specifically tailored for DTU analysis as described in the manuscript. In brief, the Holm procedure [@Holm1979] is used from the third transcript onwards and the two most significant p-values are tested on a $\alpha_I/(n_g-2)$ significance level, with $\alpha_I$ the BH adjusted significance level from the screening stage and $n_g$ the number of transcripts for gene $g$. The method will return `NA` p-values for genes with only one transcript if the stage-wise testing method equals `"dtu"`. 232 | 233 | ```{r} 234 | stageRObj <- stageRTx(pScreen=pScreen, pConfirmation=pConfirmation, pScreenAdjusted=TRUE, tx2gene=tx2gene) 235 | stageRObj <- stageWiseAdjustment(object=stageRObj, method="dtu", alpha=0.05) 236 | ``` 237 | 238 | We can then explore the results using a range of accessor functions. The significant genes can be returned with the `getSignificantGenes` function. 239 | 240 | ```{r} 241 | head(getSignificantGenes(stageRObj)) 242 | ``` 243 | 244 | Similar, the significant transcripts can be returned with `getSignificantTx`. 245 | 246 | ```{r} 247 | head(getSignificantTx(stageRObj)) 248 | ``` 249 | 250 | 251 | The stage-wise adjusted p-values are returned using the `getAdjustedPValues` function. The screening (gene) hypothesis p-values were adjusted according to the BH FDR criterion, and the confirmation (transcript) hypothesis p-values were adjusted to control for the full stage-wise analysis, by adopting the correction method specified in `stageWiseAdjustment`. Hence, the confirmation adjusted p-values returned from this function can be directly compared to the significance level `alpha` as provided in the `stageWiseAdjustment` function. `getAdjustedPValues` returns a matrix where the different rows correspond to transcripts and the respective gene and transcript identifiers are given in the first two columns. Transcript-level adjusted p-values for genes not passing the screening stage are set to `NA` by default. Note, that the stage-wise adjusted p-values are only valid for the provided significance level and must not be compared to a different significance level. If this would be of interest, the entire stage-wise testing adjustment should be re-run with the other significance level provided in `alpha`. 252 | 253 | ```{r} 254 | padj <- getAdjustedPValues(stageRObj, order=TRUE, onlySignificantGenes=FALSE) 255 | head(padj) 256 | ``` 257 | 258 | The output indeed shows that 2 genes and three transcripts are significant because their adjusted p-values are below the specified `alpha` level of $0.05$. The third gene in the list is not significant and thus the p-value of the transcript is set to `NA`. 259 | 260 | ### Note on a stage-wise DEXSeq analysis. 261 | 262 | By default, DEXSeq performs an independent filtering step. This may result in a number of genes that have been filtered and thus no q-value for these genes is given in the output of `perGeneQValue`. This can cause an error in the stage-wise analysis, since we have confirmation stage p-values for transcripts but no q-value for their respective genes. In order to avoid this, one should filter these transcripts in the `pConfirmation` and `tx2gene` objects. 263 | 264 | ```{r} 265 | rowsNotFiltered <- tx2gene[,2]%in%names(qvalDxr) 266 | pConfirmation <- matrix(pConfirmation[rowsNotFiltered,],ncol=1,dimnames=list(dxr$featureID[rowsNotFiltered],"transcript")) 267 | tx2gene <- tx2gene[rowsNotFiltered,] 268 | ``` 269 | 270 | After which the stage-wise analysis may proceed. 271 | 272 | # References 273 | 274 | 275 | -------------------------------------------------------------------------------- /R/accessors.R: -------------------------------------------------------------------------------- 1 | #' @include stageRClasses.R allGenerics.R constructors.R 2 | 3 | .stageWiseTest <- function(pScreen, pConfirmation, alpha, 4 | method=c("none","holm","dte","dtu","user"), 5 | adjustment=NULL, tx2gene=NULL, pScreenAdjusted, 6 | allowNA=FALSE){ 7 | 8 | if(allowNA){ 9 | if(any(is.na(pScreen))){ 10 | if(is.null(tx2gene)){ #DGE analysis 11 | naFeatures <- which(is.na(pScreen)) 12 | message(paste0("Removing ",length(naFeatures), 13 | " features with NA screening hypothesis p-values. \n")) 14 | pScreen <- pScreen[-naFeatures] 15 | pConfirmation <- pConfirmation[-naFeatures,] 16 | } else { #tx-level analysis 17 | naFeatures <- which(is.na(pScreen)) 18 | message(paste0("Removing ",length(naFeatures), 19 | " genes with NA screening hypothesis p-values. \n")) 20 | naGenes <- names(pScreen)[naFeatures] 21 | naTx <- as.character(tx2gene[tx2gene[,2] %in% naGenes,1]) 22 | pScreen <- pScreen[!names(pScreen) %in% naGenes] 23 | pConfirmation <- pConfirmation[!rownames(pConfirmation) %in% naTx,,drop=FALSE] 24 | } 25 | } 26 | } 27 | 28 | ## check for NA values 29 | if(!allowNA){ 30 | if(any(is.na(pScreen)) | any(is.na(pConfirmation))){ 31 | stop("NA p-values found in either the screening or confirmation tests. 32 | If you want to allow for NA p-values, set allowNA=TRUE.") 33 | } 34 | } 35 | method <- match.arg(method,c("none","holm","dte","dtu","user")) 36 | 37 | #screening stage 38 | if(!pScreenAdjusted) 39 | padjScreen <- p.adjust(pScreen,"BH") else 40 | padjScreen <- pScreen 41 | significanceOrdering <- order(padjScreen) 42 | genesStageI <- padjScreen<=alpha 43 | 44 | if(method=="none"){ 45 | 46 | pAdjConfirmation <- matrix(nrow=nrow(pConfirmation), 47 | ncol=ncol(pConfirmation), 48 | dimnames=list(c(rownames(pConfirmation)), 49 | colnames(pConfirmation))) 50 | pAdjConfirmation[genesStageI,] <- pConfirmation[genesStageI,] 51 | padjScreenReturn <- padjScreen 52 | 53 | } else if(method=="holm"){ 54 | 55 | padjScreenReturn <- padjScreen 56 | ## only do correction for genes that passed the screening stage 57 | pAdjConfirmation <- matrix(nrow=nrow(pConfirmation), 58 | ncol=ncol(pConfirmation), 59 | dimnames=list(c(rownames(pConfirmation)), 60 | colnames(pConfirmation))) 61 | 62 | for(k in seq_len(sum(genesStageI))){ 63 | row <- pConfirmation[which(genesStageI)[k],] 64 | # Holm correction conditional on passing the screening stage. 65 | o <- order(row) 66 | if(all(!is.na(row))){ #if no NA's, standard Holm with screening correct. 67 | n <- length(row) 68 | } else { #if NA's present, only correct for non NA p-values 69 | n <- length(row[!is.na(row)]) 70 | } 71 | # Holm adjustment: passing screening stage implies 1 false hypothesis 72 | adjustment <- c(n-1,(n-1):1) 73 | if(length(adjustment)!=length(row)){ 74 | adjustment <- c(adjustment, rep(1,length(row)-length(adjustment))) 75 | } 76 | rowAdjusted <- row[o]*adjustment 77 | rowAdjusted <- pmin(rowAdjusted,1) 78 | rowAdjusted <- cummax(rowAdjusted) 79 | rowBack <- vector(length=length(row)) 80 | rowBack[o] <- rowAdjusted 81 | pAdjConfirmation[which(genesStageI)[k],] <- rowBack 82 | } 83 | 84 | } else if(method=="user"){ 85 | if(length(adjustment)!=ncol(pConfirmation)){ 86 | stop("the length of the adjustment vector is not equal to the number of 87 | confirmation hypotheses as defined by the number of 88 | columns in pConfirmation.") 89 | } 90 | padjScreenReturn=padjScreen 91 | pAdjConfirmation <- matrix(nrow=nrow(pConfirmation), 92 | ncol=ncol(pConfirmation), 93 | dimnames=list(c(rownames(pConfirmation)), 94 | colnames(pConfirmation))) 95 | for(k in seq_len(sum(genesStageI))){ 96 | row <- pConfirmation[which(genesStageI)[k],] 97 | o <- order(row) 98 | rowAdjusted <- row[o]*adjustment 99 | rowAdjusted <- pmin(rowAdjusted,1) 100 | # check monotone increase of adjusted p-values 101 | rowAdjusted <- cummax(rowAdjusted) 102 | rowBack <- vector(length=length(row)) 103 | rowBack[o] <- rowAdjusted 104 | rowBack 105 | pAdjConfirmation[which(genesStageI)[k],] <- rowBack 106 | } 107 | 108 | } else if(method=="dte"){ 109 | 110 | if(any(is.na(match(rownames(pConfirmation),tx2gene[,1])))){ 111 | stop("not all transcript names in pConfirmation match with 112 | a transcript ID from the tx2gene object.") 113 | } 114 | if(any(is.na(match(names(pScreen),tx2gene[,2])))){ 115 | stop("not all gene names in pScreen match with 116 | a gene ID from the tx2gene object.") 117 | } 118 | significantGenes <- names(padjScreen)[genesStageI] 119 | geneForEachTx <- tx2gene[match(rownames(pConfirmation),tx2gene[,1]),2] 120 | txLevelAdjustments <- sapply(significantGenes,function(gene){ 121 | id <- which(geneForEachTx %in% gene) 122 | row <- pConfirmation[id,] 123 | #make sure names are passed along if only one tx 124 | if(length(id)==1) names(row)=rownames(pConfirmation)[id] 125 | o <- order(row) 126 | n <- length(row) 127 | # DTE adjustment: passing screening stage implies 1 false hypothesis 128 | if(n==1) adjustment=0 else adjustment=c(n-1,(n-1):1) 129 | rowAdjusted <- row[o]*adjustment 130 | rowAdjusted <- pmin(rowAdjusted,1) 131 | rowAdjusted <- cummax(rowAdjusted) 132 | rowBack <- vector(length=length(row)) 133 | rowBack[o] <- rowAdjusted 134 | names(rowBack) <- names(row) 135 | rowBack 136 | }, simplify=FALSE) 137 | pAdjConfirmation <- matrix(nrow=nrow(pConfirmation),ncol=1) 138 | rownames(pAdjConfirmation) <- paste0(geneForEachTx,":",rownames(pConfirmation)) 139 | # adjusted p-values for screening hypothesis 140 | padjScreenReturn <- padjScreen[geneForEachTx] 141 | # adjusted p-values for confirmation hypothesis 142 | #idCon <- names(unlist(txLevelAdjustments)) 143 | # replace '.' by ':' in names to avoid confusion with ENSEMBL version names 144 | #idCon <- gsub(x=idCon,pattern=".",replacement=":",fixed=TRUE) 145 | namesList=names(txLevelAdjustments) 146 | namesListElements=lapply(txLevelAdjustments,names) 147 | idCon <- unlist(sapply(seq_len(length(namesList)), function(ii){ 148 | gsub(x=paste(namesList[ii],namesListElements[[ii]]), 149 | pattern=" ",replace=":") 150 | })) 151 | pAdjConfirmation[idCon,1] <- unlist(txLevelAdjustments) 152 | 153 | } else if(method=="dtu"){ 154 | 155 | if(any(is.na(match(rownames(pConfirmation),tx2gene[,1])))){ 156 | stop("not all transcript names in pConfirmation match with 157 | a transcript ID from the tx2gene object.") 158 | } 159 | if(any(is.na(match(names(pScreen),tx2gene[,2])))){ 160 | stop("not all gene names in pScreen match with 161 | a gene ID from the tx2gene object.") 162 | } 163 | # adjust screening 164 | significantGenes <- names(padjScreen)[genesStageI] 165 | geneForEachTx <- as.character(tx2gene[match(rownames(pConfirmation), 166 | tx2gene[,1]),2]) 167 | txLevelAdjustments <- sapply(significantGenes,function(gene){ 168 | id <- which(geneForEachTx %in% gene) 169 | row <- pConfirmation[id,] 170 | o <- order(row) 171 | n <- length(row) 172 | # DTU adjustment: passing screening stage implies 2 false hypotheses 173 | if(n==1) stop("Some genes have only one transcript; this is incompatible with DTU correction. Remove these transcripts.") 174 | if(n==2) adjustment=c(0,0) else adjustment=c(n-2,n-2,(n-2):1) 175 | rowAdjusted <- row[o]*adjustment 176 | rowAdjusted <- pmin(rowAdjusted,1) 177 | rowAdjusted <- cummax(rowAdjusted) 178 | rowBack <- vector(length=length(row)) 179 | rowBack[o] <- rowAdjusted 180 | names(rowBack) <- names(row) 181 | rowBack 182 | }, simplify=FALSE) 183 | pAdjConfirmation <- matrix(nrow=nrow(pConfirmation),ncol=1) 184 | rownames(pAdjConfirmation) <- paste0(geneForEachTx,":", 185 | rownames(pConfirmation)) 186 | # adjusted p-values for screening hypothesis 187 | padjScreenReturn <- padjScreen[as.character(geneForEachTx)] 188 | # adjusted p-values for confirmation hypothesis 189 | #idCon <- names(unlist(txLevelAdjustments)) 190 | # replace '.' by ':' in names to avoid confusion with ENSEMBL version names 191 | #idCon <- gsub(x=idCon,pattern=".",replacement=":",fixed=TRUE) 192 | namesList=names(txLevelAdjustments) 193 | namesListElements=lapply(txLevelAdjustments,names) 194 | idCon <- unlist(sapply(seq_len(length(namesList)), function(ii){ 195 | gsub(x=paste(namesList[ii],namesListElements[[ii]]), 196 | pattern=" ",replace=":") 197 | })) 198 | pAdjConfirmation[idCon,1] <- unlist(txLevelAdjustments) 199 | 200 | } else stop("method must be either one of 'holm' or ... ") 201 | 202 | #BH-adjusted s.l. 203 | alphaAdjusted <- sum(padjScreen<=alpha)/length(padjScreen)*alpha 204 | #Correct FWER-adjusted p-values acc. to BH-adjusted s.l. 205 | naPAdj <- is.na(pAdjConfirmation) 206 | pAdjBH <- pAdjConfirmation[!naPAdj]*length(padjScreen)/sum(padjScreen<=alpha) 207 | pAdjConfirmation[!naPAdj] <- pmin(pAdjBH,1) 208 | if(!(method %in% c("dte","dtu"))){ 209 | pAdjStage <- cbind(padjScreenReturn,pAdjConfirmation) 210 | colnames(pAdjStage)[1] <- "padjScreen" 211 | } 212 | if(method %in% c("dte","dtu")){ 213 | pAdjStage <- cbind(pAdjConfirmation,padjScreenReturn)[,2:1] 214 | colnames(pAdjStage) <- c("gene","transcript") 215 | } 216 | return(list(pAdjStage=pAdjStage, alphaAdjusted=alphaAdjusted, 217 | pScreen=pScreen, pConfirmation=pConfirmation)) 218 | } 219 | 220 | .getAdjustedP <- function(object, onlySignificantGenes=FALSE, order=TRUE){ 221 | ## this function is used in getAdjustedPValues 222 | ## to return the adjusted p-values for a stageR class. 223 | message(paste0("The returned adjusted p-values are based on a ", 224 | "stage-wise testing approach and are only valid for ", 225 | "the provided target OFDR level of ", 226 | getAlpha(object)*100, 227 | "%. If a different target OFDR level is of interest,", 228 | "the entire adjustment should be re-run. \n")) 229 | if(onlySignificantGenes){ #significant genes 230 | genesStageI <- object@adjustedP[,"padjScreen"]<=getAlpha(object) 231 | if(sum(genesStageI)==0){ 232 | message(paste0("No genes were found to be significant on a ", 233 | getAlpha(object)*100,"% OFDR level.")) 234 | } else { 235 | if(order){ 236 | sigGenes <- object@adjustedP[genesStageI,] 237 | o <- order(sigGenes[,"padjScreen"]) 238 | return(sigGenes[o,]) 239 | } else { 240 | sigGenes <- object@adjustedP[genesStageI,] 241 | return(sigGenes) 242 | } 243 | } 244 | } else { #all genes 245 | if(order){ 246 | o <- order(object@adjustedP[,"padjScreen"]) 247 | return(object@adjustedP[o,]) 248 | } else { 249 | return(object@adjustedP) 250 | } 251 | } 252 | } 253 | 254 | .getAdjustedPTx <- function(object, onlySignificantGenes=FALSE, order=TRUE){ 255 | ## this function is used in getAdjustedPValues 256 | ## to return the adjusted p-values for a stageRTx class. 257 | message(paste0("The returned adjusted p-values are based on a ", 258 | "stage-wise testing approach and are only valid for ", 259 | "the provided target OFDR level of ", 260 | getAlpha(object)*100, 261 | "%. If a different target OFDR level is of interest,", 262 | "the entire adjustment should be re-run. \n")) 263 | tx2gene <- getTx2gene(object) 264 | pConfirmation <- getPConfirmation(object) 265 | geneForEachTx <- tx2gene[match(rownames(pConfirmation),tx2gene[,1]),2] 266 | 267 | if(onlySignificantGenes){ #significant genes 268 | genesStageI <- which(object@adjustedP[,"gene"]<=getAlpha(object)) 269 | if(sum(genesStageI)==0){ 270 | message(paste0("No genes were found to be significant on a ", 271 | getAlpha(object)*100,"% OFDR level.")) 272 | } else { 273 | if(order){ #sort 274 | ordGenes <- order(object@adjustedP[genesStageI,1]) 275 | sigGeneIDs <- unlist(lapply(strsplit(names(genesStageI),split=":", 276 | fixed=TRUE), function(x) x[1] )) 277 | #order acc to gene significance 278 | idList <- sapply(unique(sigGeneIDs[ordGenes]), function(gene){ 279 | which(geneForEachTx%in%gene) 280 | }) 281 | #order tx within gene 282 | idListOrdTx <- lapply(idList, function(x) x[order(pConfirmation[x,])]) 283 | outData <- object@adjustedP[unlist(idListOrdTx),] 284 | outData <- data.frame("geneID"=sigGeneIDs[ordGenes], 285 | "txID"=unlist(lapply(strsplit(rownames(outData), 286 | split=":",fixed=TRUE), function(x) x[2] )), 287 | outData, row.names=NULL) 288 | return(outData) 289 | } else { #dont sort 290 | outData <- object@adjustedP[genesStageI,] 291 | outData <- data.frame("geneID"=unlist(lapply(strsplit(rownames(outData), 292 | split=":",fixed=TRUE), function(x) x[1] )), 293 | "txID"=unlist(lapply(strsplit(rownames(outData), 294 | split=":",fixed=TRUE), function(x) x[2] )), 295 | outData, row.names=NULL) 296 | return(outData) 297 | } 298 | } 299 | } else { #all genes 300 | if(order){ #sort 301 | ordGenes <- order(object@adjustedP[,"gene"]) 302 | sigGeneIDs <- unlist(lapply(strsplit(rownames(object@adjustedP), 303 | split=":",fixed=TRUE), function(x) x[1] )) 304 | #order acc to gene significance 305 | idList <- sapply(unique(sigGeneIDs[ordGenes]), 306 | function(gene) which(geneForEachTx%in%gene)) 307 | #order tx within gene 308 | idListOrdTx <- lapply(idList, function(x) x[order(pConfirmation[x,])]) 309 | outData <- object@adjustedP[unlist(idListOrdTx),] 310 | outData <- data.frame("geneID"=sigGeneIDs[ordGenes], 311 | "txID"=unlist(lapply(strsplit(rownames(outData), 312 | split=":",fixed=TRUE), function(x) x[2] )), 313 | outData, row.names=NULL) 314 | return(outData) 315 | } else { #dont sort 316 | outData <- object@adjustedP 317 | outData <- data.frame("geneID"=unlist(lapply(strsplit(rownames(outData), 318 | split=":",fixed=TRUE), function(x) x[1] )), 319 | "txID"=unlist(lapply(strsplit(rownames(outData), 320 | split=":",fixed=TRUE), function(x) x[2] )), 321 | outData, row.names=NULL) 322 | return(outData) 323 | } 324 | } 325 | } 326 | 327 | .getResults <- function(object){ 328 | adjustedPValues <- getAdjustedPValues(object, onlySignificantGenes=FALSE, 329 | order=FALSE) 330 | results <- matrix(0,nrow=nrow(adjustedPValues),ncol=ncol(adjustedPValues), 331 | dimnames=dimnames(adjustedPValues)) 332 | results[adjustedPValues<=getAlpha(object)] = 1 333 | return(results) 334 | } 335 | 336 | 337 | #' adjust p-values in a two-stage analysis 338 | #' 339 | #' This function will adjust p-values according to a hierarchical two-stage testing paradigm. 340 | #' 341 | #' @param object an object of the \code{\link{stageRClass}} class. 342 | #' @param method Character string indicating the method used for FWER correction in the confirmation stage of the stage-wise analysis. Can be any of \code{"none"}, \code{"holm"}, \code{"dte"}, \code{"dtu"}, \code{"user"}. \code{"none"} will not adjust the p-values in the confirmation stage. \code{"holm"} is an adapted Holm procedure for a stage-wise analysis, where the method takes into account the fact that genes in the confirmation stage have already passed the screening stage, hence the procedure will be more powerful for the most significant p-value as compared to the standard Holm procedure. \code{"dte"} is the adjusted Holm-Shaffer procedure for differential transcript expression analysis. \code{"dtu"} is the adjusted Holm-Shaffer procedure for differential transcript usage. \code{"user"} indicates a user-defined adjustment that should be specified with the \code{adjustment} argument. 343 | #' @param alpha the OFDR on which to control the two-stage analysis. 344 | #' @param tx2gene Only applicable when \code{method} is \code{"dte"} or \code{"dtu"}. A \code{\link[base]{data.frame}} with transcript IDs in the first columns and gene IDs in the second column. The rownames from \code{pConfirmation} must be contained in the transcript IDs from \code{tx2gene}, and the names from \code{pScreen} must be contained in the gene IDs. 345 | #' @param adjustment a user-defined adjustment of the confirmation stage p-values. Only applicable when \code{method} is \code{"user"} and ignored otherwise. 346 | #' @param ... Additional arguments passed to \code{.stageWiseTest} 347 | #' @return 348 | #' A stageR/stageRTx object with stage-wise adjusted p-values. 349 | #' @references 350 | #' Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 351 | #' R. Heller, E. Manduchi, G. R. Grant, and W. J. Ewens, "A flexible two-stage procedure for identifying gene sets that are differentially expressed." Bioinformatics (Oxford, England), vol. 25, pp. 1019-25, 2009. 352 | #' 353 | #' S. Holm, "A Simple Sequentially Rejective Multiple Test Procedure," Scandinavian Journal of Statistics, vol. 6, no. 2, pp. 65-70, 1979. 354 | #' J. P. Shaffer, "Modified Sequentially Rejective Multiple Test Procedures," Journal of the American Statistical Association, vol. 81, p. 826, 1986. 355 | #' @examples 356 | #' pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 357 | #' names(pScreen)=paste0("gene",1:300) 358 | #' pConfirmation=matrix(runif(900),nrow=300,ncol=3) 359 | #' dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 360 | #' stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 361 | #' stageRObj <- stageWiseAdjustment(stageRObj, method="holm", alpha=0.05) 362 | #' getAdjustedPValues(stageRObj, onlySignificantGenes=TRUE, order=TRUE) 363 | #' @name stageWiseAdjustment 364 | #' @rdname stageWiseAdjustment 365 | #' @aliases stageWiseAdjustment stageWiseAdjustment,stageR stageWiseAdjustment,stageRTx 366 | #' @export 367 | setMethod("stageWiseAdjustment",signature=signature(object="stageR", 368 | method="character", 369 | alpha="numeric"), 370 | definition=function(object, method, alpha, adjustment=NULL, ...){ 371 | pScreen <- getPScreen(object) 372 | pConfirmation <- getPConfirmation(object) 373 | pScreenAdjusted <- isPScreenAdjusted(object) 374 | stageAdjPValues <- .stageWiseTest(pScreen=pScreen, 375 | pConfirmation=pConfirmation, 376 | alpha=alpha, 377 | method=method, 378 | pScreenAdjusted=pScreenAdjusted, 379 | adjustment=adjustment, ...) 380 | # if allowNA=TRUE, pScreen and pConfirmation may have been filtered 381 | object@pScreen <- stageAdjPValues[["pScreen"]] 382 | object@pConfirmation <- stageAdjPValues[["pConfirmation"]] 383 | object@adjustedP <- stageAdjPValues[["pAdjStage"]] 384 | object@alphaAdjusted <- stageAdjPValues[["alphaAdjusted"]] 385 | object@method <- method 386 | object@alpha <- alpha 387 | object@adjusted <- TRUE 388 | return(object) 389 | }) 390 | #' @describeIn stageWiseAdjustment Adjust p-values in a two-stage analysis 391 | setMethod( 392 | "stageWiseAdjustment", 393 | signature = signature( 394 | object = "stageRTx", 395 | method = "character", 396 | alpha = "numeric" 397 | ), 398 | definition = function(object, method, alpha, tx2gene, ...) { 399 | pScreen <- getPScreen(object) 400 | pConfirmation <- getPConfirmation(object) 401 | pScreenAdjusted <- isPScreenAdjusted(object) 402 | tx2gene <- getTx2gene(object) 403 | stageAdjPValues <- 404 | .stageWiseTest( 405 | pScreen = pScreen, 406 | pConfirmation = pConfirmation, 407 | alpha = alpha, 408 | method = method, 409 | pScreenAdjusted = pScreenAdjusted, 410 | tx2gene = tx2gene, 411 | ... 412 | ) 413 | # if allowNA=TRUE, pScreen and pConfirmation may have been filtered 414 | object@pScreen <- stageAdjPValues[["pScreen"]] 415 | object@pConfirmation <- stageAdjPValues[["pConfirmation"]] 416 | object@adjustedP <- stageAdjPValues[["pAdjStage"]] 417 | object@alphaAdjusted <- stageAdjPValues[["alphaAdjusted"]] 418 | object@method <- method 419 | object@alpha <- alpha 420 | object@adjusted <- TRUE 421 | return(object) 422 | } 423 | ) 424 | 425 | #' Return screening hypothesis p-values from a \code{\link{stageRClass}} object. 426 | #' 427 | #' @param object an object of the \code{\link{stageRClass}} class. 428 | #' @param ... Additional arguments 429 | #' @return 430 | #' A vector of screening stage (aggregated) p-values. 431 | #' @examples 432 | #' pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 433 | #' names(pScreen)=paste0("gene",1:300) 434 | #' pConfirmation=matrix(runif(900),nrow=300,ncol=3) 435 | #' dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 436 | #' stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 437 | #' getPScreen(stageRObj) 438 | #' @name getPScreen 439 | #' @rdname getPScreen 440 | #' @aliases getPScreen getPScreen,stageR getPScreen,stageRTx 441 | #' @export 442 | setMethod("getPScreen",signature=signature(object="stageR"), 443 | definition=function(object){return(object@pScreen)}) 444 | #' @describeIn getPScreen Return screening hypothesis p-values from a \code{\link{stageRClass}} object. 445 | setMethod("getPScreen",signature=signature(object="stageRTx"), 446 | definition=function(object){return(object@pScreen)}) 447 | 448 | #' Return unadjusted confirmation hypothesis p-values from a \code{\link{stageRClass}} object. 449 | #' 450 | #' @param object an object of the \code{\link{stageRClass}} class. 451 | #' @return 452 | #' A matrix of the unadjusted p-values to be used in the confirmation stage. 453 | #' @examples 454 | #' pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 455 | #' names(pScreen)=paste0("gene",1:300) 456 | #' pConfirmation=matrix(runif(900),nrow=300,ncol=3) 457 | #' dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 458 | #' stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 459 | #' getPConfirmation(stageRObj) 460 | #' @aliases getPConfirmation getPConfirmation,stageR getPConfirmation,stageRTx 461 | #' @name getPConfirmation 462 | #' @rdname getPConfirmation 463 | #' @export 464 | setMethod("getPConfirmation",signature=signature(object="stageR"), 465 | definition=function(object){return(object@pConfirmation)}) 466 | #' @describeIn getPConfirmation Return unadjusted confirmation hypothesis p-values from a \code{\link{stageRClass}} object. 467 | setMethod("getPConfirmation",signature=signature(object="stageRTx"), 468 | definition=function(object){return(object@pConfirmation)}) 469 | 470 | 471 | #' Retrieve the stage-wise adjusted p-values. 472 | #' 473 | #' This functions returns the stage-wise adjusted p-values for an object from the \code{\link{stageRClass}} class. Note, that the p-values should have been adjusted with the \code{\link{stageWiseAdjustment,stageR,character,numeric-method}} function prior to calling this function. 474 | #' 475 | #' @param object an object of the \code{\link{stageRClass}} class. 476 | #' @param onlySignificantGenes logical. If FALSE (default), all genes are returned. If TRUE, only the genes significant for the screening hypothesis are returned. 477 | #' @param order logical. If TRUE (default), the returned matrix of adjusted p-values are ordered based on the screening hypothesis adjusted p-value. 478 | #' @param ... Other arguments passed to .getAdjustedP or .getAdjustedPTx 479 | #' @return 480 | #' For complex DGE experiments (stageR object), a matrix of adjusted p-values where every row corresponds to a gene, and every column corresponds to a contrast. The first column will be the BH FDR adjusted p-value from the screening step. 481 | #' For transcript-level experiments (stageRTx object), a matrix of adjusted p-values where every row corresponds to a transcript. 482 | #' @details 483 | #' The function returns FDR adjusted p-values for the screening hypothesis and stage-wise adjusted p-values for the confirmation hypothesis p-values. For features that were not significant in the screening hypothesis, the confirmation stage adjusted p-values are set to \code{NA}. 484 | #' The adjusted p-values in the output of \code{getAdjustedPValues} can directly be compared to alpha, the OFDR level specified in \code{stageWiseAdjustment}, to flag significant features. 485 | #' @examples 486 | #' pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 487 | #' names(pScreen)=paste0("gene",1:300) 488 | #' pConfirmation=matrix(runif(900),nrow=300,ncol=3) 489 | #' dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 490 | #' stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 491 | #' stageRObj <- stageWiseAdjustment(stageRObj, method="holm", alpha=0.05) 492 | #' head(getAdjustedPValues(stageRObj, onlySignificantGenes=TRUE, order=TRUE)) 493 | #' @references 494 | #' Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 495 | #' 496 | #' @name getAdjustedPValues 497 | #' @rdname getAdjustedPValues 498 | #' @aliases getAdjustedPValues getAdjustedPValues,stageR getAdjustedPValues,stageRTx 499 | #' @export 500 | setMethod("getAdjustedPValues", 501 | signature=signature(object="stageR", 502 | onlySignificantGenes="logical", 503 | order="logical"), 504 | definition=function(object, onlySignificantGenes, order, ...){ 505 | if(!isAdjusted(object)){ 506 | stop("adjust p-values first using stageWiseAdjustment")} 507 | return(.getAdjustedP(object=object, onlySignificantGenes, order, ...)) 508 | }) 509 | #' @describeIn getAdjustedPValues Retrieve the stage-wise adjusted p-values. 510 | setMethod("getAdjustedPValues", 511 | signature=signature(object="stageRTx", 512 | onlySignificantGenes="logical", 513 | order="logical"), 514 | definition=function(object, onlySignificantGenes, order, ...){ 515 | if(!isAdjusted(object)){ 516 | stop("adjust p-values first using stageWiseAdjustment")} 517 | return(.getAdjustedPTx(object=object, onlySignificantGenes, order, ...)) 518 | }) 519 | 520 | #' Get adjusted significance level from the screening stage. 521 | #' 522 | #' This functions returns the adjusted significance level from the screening stage that should be used to compare confirmation stage FWER adjusted p-values against. 523 | #' 524 | #' @param object an object of the \code{\link{stageRClass}} class. 525 | #' @details 526 | #' The adjusted significance level is calculated as the fraction of significant features in the screening stage multiplied the alpha level. 527 | #' @return 528 | #' Scalar, the adjusted significance level from the screening stage. 529 | #' @examples 530 | #' pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 531 | #' names(pScreen)=paste0("gene",1:300) 532 | #' pConfirmation=matrix(runif(900),nrow=300,ncol=3) 533 | #' dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 534 | #' stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation, pScreenAdjusted=FALSE) 535 | #' stageRObj <- stageWiseAdjustment(stageRObj, method="holm", alpha=0.05) 536 | #' adjustedAlphaLevel(stageRObj) 537 | #'# @method stageR-method 538 | #' @references 539 | #' Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 540 | #' 541 | #' R. Heller, E. Manduchi, G. R. Grant, and W. J. Ewens, "A flexible two-stage procedure for identifying gene sets that are differentially expressed." Bioinformatics (Oxford, England), vol. 25, pp. 1019-25, 2009. 542 | #' 543 | #' @seealso \code{\link{stageR}}, \code{\link{stageRClass}} 544 | #' @name adjustedAlphaLevel 545 | #' @rdname adjustedAlphaLevel 546 | #' @aliases adjustedAlphaLevel adjustedAlphaLevel,stageR adjustedAlphaLevel,stageRTx 547 | #' @export 548 | setMethod("adjustedAlphaLevel",signature=signature(object="stageR"), 549 | definition=function(object){ 550 | if(!isAdjusted(object)){ 551 | stop("adjust p-values first using stageWiseAdjustment")} 552 | return(object@alphaAdjusted) 553 | }) 554 | #' @describeIn adjustedAlphaLevel Get adjusted significance level from the screening stage. 555 | setMethod("adjustedAlphaLevel",signature=signature(object="stageRTx"), 556 | definition=function(object){ 557 | if(!isAdjusted(object)){ 558 | stop("adjust p-values first using stageWiseAdjustment")} 559 | return(object@alphaAdjusted) 560 | }) 561 | 562 | #' Get significance results according to a stage-wise analysis. 563 | #' 564 | #' This functions returns a matrix that indicates whether a specific feature is significant for a specific hypothesis of interest according to a stage-wise analysis. The function is not applicable to transcript-level analysis. 565 | #' 566 | #' @param object an object of the \code{\link{stageRClass}} class. 567 | #' @details 568 | #' The FDR adjusted screening hypothesis p-values are compared to the alpha level specified. The FWER adjusted confirmation stage p-values are compared to the adjusted significance level from the screening stage. 569 | #' @return 570 | #' A logical matrix with rows corresponding to genes and columns corresponding to contrasts, where the first column represents the screening stage on the aggregated p-values. A 0 represents a non-significant test, a 1 represents a significant test according to the stage-wise analysis. 571 | #' @examples 572 | #' pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 573 | #' names(pScreen)=paste0("gene",1:300) 574 | #' pConfirmation=matrix(runif(900),nrow=300,ncol=3) 575 | #' dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 576 | #' stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 577 | #' stageRObj <- stageWiseAdjustment(stageRObj, method="holm", alpha=0.05) 578 | #' head(getResults(stageRObj)) 579 | #' @references 580 | #' Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 581 | #' 582 | #' @name getResults 583 | #' @rdname getResults 584 | #' @aliases getResults getResults,stageR getResults,stageRTx 585 | #' @export 586 | setMethod("getResults",signature=signature(object="stageR"), 587 | definition=function(object){ 588 | if(!isAdjusted(object)){ 589 | stop("adjust p-values first using stageWiseAdjustment")} 590 | return(.getResults(object)) 591 | }) 592 | 593 | #' Return significant genes when performing transcript-level analysis. 594 | #' 595 | #' This functions returns a matrix with significant genes by aggregated testing of its respective transcripts. 596 | #' 597 | #' @param object an object of the \code{\link{stageRClass}} class. 598 | #' @return 599 | #' A matrix with significant genes and their corresponding FDR-adjusted screening stage (aggregated) p-value. 600 | #' @examples 601 | #' #make identifiers linking transcripts to genes 602 | #' set.seed(1) 603 | #' genes=paste0("gene",sample(1:200,1000,replace=TRUE)) 604 | #' nGenes=length(table(genes)) 605 | #' transcripts=paste0("tx",1:1000) 606 | #' tx2gene=data.frame(transcripts,genes) 607 | #' #gene-wise q-values 608 | #' pScreen=c(seq(1e-10,1e-2,length.out=nGenes-100),seq(1e-2,.2,length.out=50),seq(50)) 609 | #' names(pScreen)=names(table(genes)) #discards genes that are not simulated 610 | #' pConfirmation=matrix(runif(1000),nrow=1000,ncol=1) 611 | #' rownames(pConfirmation)=transcripts 612 | #' stageRObj <- stageRTx(pScreen=pScreen, pConfirmation=pConfirmation ,pScreenAdjusted=TRUE, tx2gene=tx2gene) 613 | #' stageRObj <- stageWiseAdjustment(stageRObj, method="dte", alpha=0.05) 614 | #' head(getSignificantGenes(stageRObj)) 615 | #' @references 616 | #' Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 617 | #' 618 | #' @name getSignificantGenes 619 | #' @rdname getSignificantGenes 620 | #' @aliases getSignificantGenes getSignificantGenes,stageRTx 621 | #' @export 622 | setMethod("getSignificantGenes",signature=signature(object="stageRTx"), 623 | definition=function(object){ 624 | if(!isAdjusted(object)){ 625 | stop("adjust p-values first using stageWiseAdjustment")} 626 | if(class(object)!="stageRTx"){ 627 | stop("this function only works on an object of class stageRTx")} 628 | adjustedPValues <- getAdjustedPValues(object, 629 | onlySignificantGenes=FALSE, 630 | order=FALSE) 631 | geneIDs <- adjustedPValues$geneID 632 | pScreenAdjusted <- adjustedPValues[,"gene"] 633 | significantGeneIDs <- which(pScreenAdjusted<=getAlpha(object)) 634 | significantGeneNames <- geneIDs[significantGeneIDs] 635 | geneAdjustedPValues <- adjustedPValues[significantGeneIDs,"gene"] 636 | dups <- duplicated(significantGeneNames) 637 | significantGenes <- matrix(geneAdjustedPValues[!dups], 638 | ncol=1, 639 | dimnames=list(significantGeneNames[!dups], 640 | "FDR adjusted p-value")) 641 | return(significantGenes) 642 | }) 643 | 644 | 645 | #' Return significant transcripts when performing transcript-level analysis. 646 | #' 647 | #' This functions returns a matrix with significant transctripts according to a stage-wise analysis. 648 | #' 649 | #' @param object an object of the \code{\link{stageRClass}} class. 650 | #' @return 651 | #' A matrix of significant transcripts with their corresponding stage-wise adjusted p-value (i.e. FDR and FWER correction). 652 | #' @references 653 | #' Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 654 | #' @examples 655 | #' #make identifiers linking transcripts to genes 656 | #' set.seed(1) 657 | #' genes=paste0("gene",sample(1:200,1000,replace=TRUE)) 658 | #' nGenes=length(table(genes)) 659 | #' transcripts=paste0("tx",1:1000) 660 | #' tx2gene=data.frame(transcripts,genes) 661 | #' #gene-wise q-values 662 | #' pScreen=c(seq(1e-10,1e-2,length.out=nGenes-100),seq(1e-2,.2,length.out=50),seq(50)) 663 | #' names(pScreen)=names(table(genes)) #discards genes that are not simulated 664 | #' pConfirmation=matrix(runif(1000),nrow=1000,ncol=1) 665 | #' rownames(pConfirmation)=transcripts 666 | #' stageRObj <- stageRTx(pScreen=pScreen, pConfirmation=pConfirmation ,pScreenAdjusted=TRUE, tx2gene=tx2gene) 667 | #' stageRObj <- stageWiseAdjustment(stageRObj, method="dte", alpha=0.05) 668 | #' head(getSignificantTx(stageRObj)) 669 | #' @name getSignificantTx 670 | #' @rdname getSignificantTx 671 | #' @aliases getSignificantTx getSignificantTx,stageR getSignificantTx,stageRTx 672 | #' @export 673 | setMethod("getSignificantTx",signature=signature(object="stageRTx"), 674 | definition=function(object){ 675 | if(!isAdjusted(object)){ 676 | stop("adjust p-values first using stageWiseAdjustment")} 677 | if(class(object)!="stageRTx"){ 678 | stop("this function only works on an object of class stageRTx")} 679 | adjustedPValues <- getAdjustedPValues(object, 680 | onlySignificantGenes=FALSE, 681 | order=FALSE) 682 | txIDs <- adjustedPValues$txID 683 | significantTxIDs <- which(adjustedPValues[,"transcript"]<=getAlpha(object)) 684 | significantTxNames <- txIDs[significantTxIDs] 685 | significantTranscripts <- matrix(adjustedPValues[significantTxIDs,"transcript"], 686 | ncol=1, 687 | dimnames=list(significantTxNames, 688 | "stage-wise adjusted p-value")) 689 | return(significantTranscripts) 690 | }) 691 | 692 | #' Retrieve the significance level for the stage-wise adjustment. 693 | #' 694 | #' This functions returns the significance level on which the stage-wise adjustment is based. 695 | #' 696 | #' @param object an object of the \code{\link{stageRClass}} or \code{stageRTxClass} class. 697 | #' @param ... Additional arguments 698 | #' @return 699 | #' Returns a calar vector with the OFDR alpha level that was specified by the user. 700 | #' @examples 701 | #' pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 702 | #' names(pScreen)=paste0("gene",1:300) 703 | #' pConfirmation=matrix(runif(900),nrow=300,ncol=3) 704 | #' dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 705 | #' stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 706 | #' stageRObj <- stageWiseAdjustment(stageRObj, method="holm", alpha=0.05) 707 | #' getAlpha(stageRObj) 708 | #' @references 709 | #' Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 710 | #' 711 | #' @name getAlpha 712 | #' @rdname getAlpha 713 | #' @aliases getAlpha getAlpha,stageR getAlpha,stageRTx 714 | #' @export 715 | setMethod("getAlpha",signature=signature(object="stageR"), 716 | definition=function(object, ...){ 717 | if(is.null(object@alpha)){ 718 | stop("No significance level was specified yet. 719 | Maybe you need to run stageWiseAdjustment first.")} 720 | return(object@alpha) 721 | }) 722 | #' @describeIn getAlpha Retrieve the significance level for the stage-wise adjustment. 723 | setMethod("getAlpha",signature=signature(object="stageRTx"), 724 | definition=function(object, ...){ 725 | if(is.null(object@alpha)){ 726 | stop("No significance level was specified yet. 727 | Maybe you need to run stageWiseAdjustment first.")} 728 | return(object@alpha) 729 | }) 730 | 731 | #' Retrieve the data frame linking genes to transcripts. 732 | #' 733 | #' This functions returns a data frame that links the genes with the transcripts being analysed. 734 | #' 735 | #' @param object an object of the \code{stageRTxClass} class. 736 | #' @param ... Additional arguments 737 | #' @return 738 | #' A matrix linking gene to transcript identifiers. 739 | #' @examples 740 | #' #make identifiers linking transcripts to genes 741 | #' set.seed(1) 742 | #' genes=paste0("gene",sample(1:200,1000,replace=TRUE)) 743 | #' nGenes=length(table(genes)) 744 | #' transcripts=paste0("tx",1:1000) 745 | #' tx2gene=data.frame(transcripts,genes) 746 | #' #gene-wise q-values 747 | #' pScreen=c(seq(1e-10,1e-2,length.out=nGenes-100),seq(1e-2,.2,length.out=50),seq(50)) 748 | #' names(pScreen)=names(table(genes)) #discards genes that are not simulated 749 | #' pConfirmation=matrix(runif(1000),nrow=1000,ncol=1) 750 | #' rownames(pConfirmation)=transcripts 751 | #' stageRObj <- stageRTx(pScreen=pScreen, pConfirmation=pConfirmation ,pScreenAdjusted=TRUE, tx2gene=tx2gene) 752 | #' getTx2gene(stageRObj) 753 | #' @references 754 | #' Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 755 | #' 756 | #' @name getTx2gene 757 | #' @rdname getTx2gene 758 | #' @aliases getTx2gene getTx2gene,stageRTx 759 | #' @export 760 | setMethod("getTx2gene",signature=signature(object="stageRTx"), 761 | definition=function(object, ...){ 762 | return(object@tx2gene) 763 | }) 764 | 765 | #' Are the screening p-values adjusted for multiplicity? 766 | #' 767 | #' This functions returns a logical stating whether the screening hypothesis p-values are already adjusted for multiple testing according to the BH FDR criterion. 768 | #' 769 | #' @param object an object of the \code{\link{stageRClass}} or \code{stageRTxClass} class. 770 | #' @param ... Additional arguments 771 | #' @return 772 | #' A logical stating whether the screening hypothesis p-values are already adjusted for multiple testing according to the BH FDR criterion. 773 | #' @examples 774 | #' pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 775 | #' names(pScreen)=paste0("gene",1:300) 776 | #' pConfirmation=matrix(runif(900),nrow=300,ncol=3) 777 | #' dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 778 | #' stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 779 | #' isPScreenAdjusted(stageRObj) 780 | #' @references 781 | #' Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 782 | #' 783 | #' @name isPScreenAdjusted 784 | #' @rdname isPScreenAdjusted 785 | #' @aliases isPScreenAdjusted isPScreenAdjusted,stageR isPSCreenAdjusted,stageRTx 786 | #' @export 787 | setMethod("isPScreenAdjusted",signature=signature(object="stageR"), 788 | definition=function(object, ...){ 789 | return(object@pScreenAdjusted) 790 | }) 791 | #' @describeIn isPScreenAdjusted Are the screening p-values adjusted for multiplicity? 792 | setMethod("isPScreenAdjusted",signature=signature(object="stageRTx"), 793 | definition=function(object, ...){ 794 | return(object@pScreenAdjusted) 795 | }) 796 | 797 | #' Has stage-wise adjustment already been performed on the object? 798 | #' 799 | #' This functions returns a logical stating whether the p-values have already been adjusted according to the stage-wise method. 800 | #' 801 | #' @param object an object of the \code{\link{stageRClass}} or \code{stageRTxClass} class. 802 | #' @param ... Additional arguments 803 | #' @return 804 | #' A logical stating whether the p-values have already been adjusted according to the stage-wise method 805 | #' @examples 806 | #' pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 807 | #' names(pScreen)=paste0("gene",1:300) 808 | #' pConfirmation=matrix(runif(900),nrow=300,ncol=3) 809 | #' dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 810 | #' stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 811 | #' isAdjusted(stageRObj) 812 | #' stageRObj <- stageWiseAdjustment(stageRObj, method="holm", alpha=0.05) 813 | #' isAdjusted(stageRObj) 814 | #' @references 815 | #' Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 816 | #' 817 | #' @name isAdjusted 818 | #' @rdname isAdjusted 819 | #' @aliases isAdjusted isAdjusted,stageR isAdjusted,stageRTx 820 | #' @export 821 | setMethod("isAdjusted",signature=signature(object="stageR"), 822 | definition=function(object, ...){ 823 | return(object@adjusted) 824 | }) 825 | #' @describeIn isAdjusted Has stage-wise adjustment already been performed on the object? 826 | setMethod("isAdjusted",signature=signature(object="stageRTx"), 827 | definition=function(object, ...){ 828 | return(object@adjusted) 829 | }) 830 | 831 | #' Retrieve FWER correction method. 832 | #' 833 | #' This functions retrieves the method used for FWER multiple testing correction in the confirmation stage of a stage-wise analysis. 834 | #' 835 | #' @param object an object of the \code{\link{stageRClass}} or \code{stageRTxClass} class. 836 | #' @param ... Additional arguments 837 | #' @return 838 | #' Returns a character vector of length 1 specifying the FWER correction method that is used in the confirmation stage of the stage-wise analysis. 839 | #' @examples 840 | #' pScreen=c(seq(1e-10,1e-2,length.out=100),seq(1e-2,.2,length.out=100),seq(.2,1,length.out=100)) 841 | #' names(pScreen)=paste0("gene",1:300) 842 | #' pConfirmation=matrix(runif(900),nrow=300,ncol=3) 843 | #' dimnames(pConfirmation)=list(paste0("gene",1:300),c("H1","H2","H3")) 844 | #' stageRObj <- stageR(pScreen=pScreen, pConfirmation=pConfirmation) 845 | #' stageRObj <- stageWiseAdjustment(stageRObj, method="holm", alpha=0.05) 846 | #' getMethod(stageRObj) 847 | #' @references 848 | #' Van den Berge K., Soneson C., Robinson M.D., Clement L. (2017). stageR: a general stage-wise method for controlling the gene-level false discovery rate in differential expression and differential transcript usage. Genome Biology 18:151. https://doi.org/10.1186/s13059-017-1277-0 849 | #' 850 | #' @name getMethod 851 | #' @rdname getMethod 852 | #' @aliases getMethod getMethod,stageR getMethod,stageRTx 853 | #' @export 854 | setMethod("getMethod",signature=signature(object="stageR"), 855 | definition=function(object, ...){ 856 | return(object@method) 857 | }) 858 | #' @describeIn getMethod Retrieve FWER correction method. 859 | setMethod("getMethod",signature=signature(object="stageRTx"), 860 | definition=function(object, ...){ 861 | return(object@method) 862 | }) 863 | --------------------------------------------------------------------------------