├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── R
    ├── Classes.R
    ├── DBanalysis.R
    ├── DBresults.R
    ├── GenericFunctions.R
    ├── countReads.R
    ├── data.R
    ├── peakreference.R
    ├── plots.R
    ├── timeclust.R
    └── timecourseTable.R
├── README.md
├── TCseq.Rproj
├── data
    ├── countsTable.rda
    ├── experiment.rda
    ├── experiment_BAMfile.rda
    ├── genomicIntervals.rda
    └── tca_ATAC.rda
├── man
    ├── DBanalysis.Rd
    ├── DBresult.Rd
    ├── TCA.Rd
    ├── TCA.accessors.Rd
    ├── clust-class.Rd
    ├── clust.accessors.Rd
    ├── countReads.Rd
    ├── counts.Rd
    ├── countsTable.Rd
    ├── experiment.Rd
    ├── experiment_BAMfile.Rd
    ├── genomicIntervals.Rd
    ├── peakreference.Rd
    ├── tca_ATAC.Rd
    ├── timeclust.Rd
    ├── timeclustplot.Rd
    └── timecourseTable.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   └── test_TCseq.R
└── vignettes
    ├── TCseq.Rnw
    ├── clusterRes.png
    └── subcluster.png


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: TCseq
 2 | Type: Package
 3 | Title: Time course sequencing data analysis
 4 | Version: 1.23.2
 5 | Author: Mengjun Wu <minervajunjun@gmail.com>, Lei Gu <leigu@broadinstitute.org>
 6 | Maintainer: Mengjun Wu <minervajunjun@gmail.com>
 7 | Description: Quantitative and differential analysis of epigenomic and
 8 |     transcriptomic time course sequencing data, clustering analysis
 9 |     and visualization of temporal patterns of time course data.
10 | Depends: R (>= 3.4)
11 | License: GPL (>= 2)
12 | LazyData: TRUE
13 | Imports: edgeR, BiocGenerics, reshape2, GenomicRanges, IRanges,
14 |     SummarizedExperiment, GenomicAlignments, Rsamtools, e1071,
15 |     cluster, ggplot2, grid, grDevices, stats, utils, methods, locfit
16 | Suggests: testthat
17 | biocViews: Epigenetics, TimeCourse, Sequencing, ChIPSeq, RNASeq,
18 |     DifferentialExpression, Clustering, Visualization
19 | RoxygenNote: 7.2.3
20 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(DBanalysis)
 4 | export(DBresult)
 5 | export(DBresult.cluster)
 6 | export(TCA)
 7 | export(TCAFromSummarizedExperiment)
 8 | export(clustCenters)
 9 | export(clustCluster)
10 | export(clustData)
11 | export(clustMembership)
12 | export(clustResults)
13 | export(countReads)
14 | export(genomicFeature)
15 | export(peakreference)
16 | export(tcTable)
17 | export(timeclust)
18 | export(timeclustplot)
19 | export(timecourseTable)
20 | exportClasses(TCA)
21 | exportClasses(clust)
22 | exportMethods('counts<-')
23 | exportMethods(clustCenters)
24 | exportMethods(clustCluster)
25 | exportMethods(clustData)
26 | exportMethods(clustMembership)
27 | exportMethods(clustResults)
28 | exportMethods(counts)
29 | exportMethods(design)
30 | exportMethods(tcTable)
31 | import(GenomicRanges)
32 | import(SummarizedExperiment)
33 | import(cluster)
34 | import(e1071)
35 | import(edgeR)
36 | import(ggplot2)
37 | import(grid)
38 | import(locfit)
39 | import(reshape2)
40 | importFrom(BiocGenerics,"counts<-")
41 | importFrom(BiocGenerics,counts)
42 | importFrom(BiocGenerics,design)
43 | importFrom(GenomicAlignments,summarizeOverlaps)
44 | importFrom(IRanges,IRanges)
45 | importFrom(Rsamtools,BamFile)
46 | importFrom(Rsamtools,BamFileList)
47 | importFrom(grDevices,rainbow)
48 | importFrom(methods,new)
49 | importFrom(methods,validObject)
50 | importFrom(stats,as.dist)
51 | importFrom(stats,complete.cases)
52 | importFrom(stats,cor)
53 | importFrom(stats,cutree)
54 | importFrom(stats,hclust)
55 | importFrom(stats,kmeans)
56 | importFrom(stats,model.matrix)
57 | importFrom(stats,sd)
58 | importFrom(stats,time)
59 | importFrom(utils,capture.output)
60 | importFrom(utils,read.table)
61 | 


--------------------------------------------------------------------------------
/R/Classes.R:
--------------------------------------------------------------------------------
  1 | #' clust class
  2 | #'
  3 | #'\code{clust} is a S4 class for storing results of the clustering
  4 | #'analysis of time course data.
  5 | #'
  6 | #'@section Slots:
  7 | #'Object of \code{clust} class contains the following slots:
  8 | #'\describe{
  9 | #'  \item{\code{method}}{clustering method used}
 10 | #'  \item{\code{dist}}{distance metric used}
 11 | #'  \item{\code{data}}{a matrix of original or standardized data used
 12 | #'  in the analysis}
 13 | #'  \item{\code{centers}}{a matrix of cluster centers}
 14 | #'  \item{\code{cluster}}{an integer vector of length \eqn{n} (the 
 15 | #'  integers are the indices of clusters the data points belong to. 
 16 | #'  For the fuzzy cmeans clustering method, a data point is assigned 
 17 | #'  to the closest cluster to which the data point has highest 
 18 | #'  membership value.}
 19 | #'  \item{\code{membership}}{a matrix of membership values of the
 20 | #'  data points to each clusters}
 21 | #'}
 22 | #'@details
 23 | #'The clust objects are returned from \code{\link{timeclust}} and have
 24 | #'a show method printing a compact summary of their contents
 25 | #'
 26 | #'@author Mengjun Wu
 27 | #'
 28 | #'@seealso \code{\link{timeclust}}, \code{\link{@}}
 29 | #'@exportClass clust
 30 | 
 31 | clust <- setClass("clust", slots = c(method = "character",
 32 |                                      dist = "character",
 33 |                                      data = "matrix",
 34 |                                      centers = "matrix",
 35 |                                      cluster = "integer",
 36 |                                      membership = "matrix"))
 37 | 
 38 | #'@rdname TCA
 39 | #'@export
 40 | setClass("TCA", slots = c(design = "data.frame", counts = "matrix",
 41 |                           genomicFeature = "data.frame",
 42 |                           DBfit = "DGEGLM",contrasts = "matrix",
 43 |                           tcTable = "matrix", clusterRes = "clust"),
 44 |          prototype = list(counts = matrix(0L, 0L, 0L),
 45 |                           design = data.frame()))
 46 | 
 47 | setValidity("TCA", function(object) {
 48 |   counts <- object@counts
 49 |   design <- object@design
 50 |   genomicFeature <- object@genomicFeature
 51 |   if (!is.numeric(counts)) {
 52 |     stop("All counts must be numeric.")
 53 |   }
 54 |   if (any(is.na(counts))) {
 55 |     stop("NA values are not allowed in counts. ")
 56 |   }
 57 |   if (any(counts < 0)) {
 58 |     stop("counts contain negative number(s), all counts must be positive")
 59 |   }
 60 |   if (!is.integer(counts)) {
 61 |     if (any(round(counts) != counts)) {
 62 |       stop("All counts must be intergers.")
 63 |     } else {
 64 |       mode(counts) <- "integer"
 65 |       warning("All counts are coerced to integers.")
 66 |     }
 67 |   }
 68 |   if (!identical(matrix(0L, 0L, 0L), counts)) {
 69 |     if (ncol(counts) != nrow(design)) {
 70 |       stop("Number of columns in 'counts' must equal to number of rows in 'design'.")
 71 |     }
 72 |     if (nrow(counts) != nrow(genomicFeature)) {
 73 |       stop("Number of rows in 'counts' must equal to number of rows in 'genomicFeature'")
 74 |     }
 75 |   }
 76 |   if (!sum(c("sampleid", "timepoint", "group") %in%
 77 |            tolower(colnames(design))) == 3) {
 78 |     err <- paste0("One or more following required fields in 'design' are missing: 'sampleid', 'timepoint', 'group', check if the columns are correctly named or if the corresponding information is provided.")
 79 |     stop(err)
 80 |   }
 81 |   if (!sum(c("id", "chr", "start", "end") %in%
 82 |            tolower(colnames(genomicFeature))) == 4) {
 83 |     err <- paste0("One or more following required fields in 'genomicFeature' are missing: 'id', 'chr', 'start','end', check if the columns are correctly named or if the corresponding information is provided.")
 84 |     stop(err)
 85 |   }
 86 |   TRUE
 87 | })
 88 | 
 89 | #'TCA class and constructor
 90 | #'
 91 | #'\code{TCA} is a S4 class for storing input data, results of
 92 | #'differential analysis and clustering analysis. A \code{TCA} object
 93 | #'can be created by the constructor function taking a table of sample
 94 | #'information, a table of the genomic coordinates of features, and read
 95 | #'count table (optional).
 96 | #'
 97 | #'@param design a data frame containing information of
 98 | #'samples/libraries. For time course analysis, design table should 
 99 | #'contain at least three columns (case insensitive): \code{sampleid},
100 | #'\code{timepoint} and \code{group} providing time point and group
101 | #'information of each sample/library. If \code{counts} is not provided
102 | #'when creating \code{TCA} object, an optional column \code{bamfile} can 
103 | #'be used to provide BAM filename of each sample/library and generate 
104 | #'count table using \code{\link{countReads}} function later.
105 | #'
106 | #'@param counts an integer matrix containing read counts. Rows
107 | #'correspond to genomic features and columns to samples/libraries. 
108 | #'The name of column s should be the same as the time points 
109 | #'in \code{design}. 
110 | #'
111 | #'@param genomicFeature a data frame or a GRanges object containing
112 | #'genomic coordinates of features of interest (e.g. genes in RNA-seq,
113 | #'binding regions in ChIP-seq). If genomicFeature is a data frame,
114 | #'four columns are required in \code{genomicFeature}: \code{id},
115 | #'\code{chr}, \code{start}, \code{end}; if genomicFeature is a Granges
116 | #'object, the metadata column "\code{id}" is required. For
117 | #'\code{TCAFromSummarizedExperiment}, genomicFeature must be
118 | #'provided if \code{se} is a SummarizedExperiment object.
119 | #'
120 | #'
121 | #'@param se A SummarizedExperiment or a RangedSummarizedExperiment
122 | #'object. The object might contain multiple assays in the assay list, 
123 | #'only the first one will be taken to construct TCA object. 
124 | #'For SummarizedExperiment object, \code{genomicFeature}
125 | #'must be provided while for RangedSummarizedExperiment object,
126 | #'the genomic features will be extracted directly from the object.
127 | #'
128 | #'@param zero.based Logical. If TRUE, the start positions of the
129 | #'genomic ranges in the returned \code{TCA} object are \emph{0-based},
130 | #'if FALSE, the start positions will be \emph{1-based}.
131 | #'
132 | #'@return A TCA object
133 | #'
134 | #'@details A TCA object can be created without providing read counts,
135 | #'read counts can be provided by \code{\link{counts}} or generated by
136 | #'\code{\link{countReads}}. For the read counts, the number of rows 
137 | #'should equal to that in '\code{genomicFeature} and the number of columns 
138 | #'should equal to number of rows in \code{design}; in addition, the name 
139 | #'of column names should be the same as the time points in \code{design}. 
140 | #'Input data and analysis results in a TCA object can be accessed by using 
141 | #'corresponding accessors and functions.
142 | #'The TCA objects also have a show method printing a compact summary of
143 | #'their contents see \code{\link{counts}}, \code{\link{TCA.accessors}},
144 | #'\code{\link{DBresult}}, \code{\link{tcTable}}, \code{\link{timeclust}}.
145 | #'\code{clust}
146 | #'@author Mengjun Wu
147 | #'@seealso \code{\link{counts}}, \code{\link{TCA.accessors}},
148 | #'\code{\link{DBresult}}, \code{\link{timeclust}}, \code{\link{clust}}
149 | #'
150 | #'@author Mengjun Wu
151 | #'
152 | #'@examples
153 | #'#create data frame of experiment design: 4 time points and 2 replicates for each time point.
154 | #'d <- data.frame(sampleID = 1:8, group = rep(c(1, 2, 3, 4), 2),
155 | #'                timepoint = rep(c('0h', '24h', '48h', '72h'), 2))
156 | #'
157 | #'
158 | #'#create data frame of genomic intervals of interest
159 | #'gf <- data.frame(chr = c(rep('chr1', 3), rep('chr2', 2), rep('chr4', 2)),
160 | #'                 start = seq(100, 2000, by = 300),
161 | #'                 end = seq(100, 2000, by = 300) + 150,
162 | #'                 id = paste0('peak', 1:7))
163 | #'tca <- TCA(design = d, genomicFeature = gf)
164 | #'genomicFeature(tca)
165 | #'
166 | #'#if count table is available
167 | #'c <- matrix(sample(1000, 56), nrow = 7, dimnames = list(paste0('peak', 1:7), 1:8))
168 | #'tca <- TCA(design = d, counts = c, genomicFeature = gf)
169 | #'# replace the count table of a \code{TCA} object
170 | #'c2 <- matrix(sample(500, 56), nrow = 7, dimnames = list(paste0('peak', 1:7), 1:8))
171 | #'counts(tca) <- c2
172 | #'
173 | #'
174 | #'@export
175 | TCA <- function(design, counts = matrix(0L, 0L, 0L), genomicFeature,
176 |                 zero.based = TRUE) {
177 | 
178 |   if (!is.numeric(counts)) {
179 |     stop("All counts must be numeric.")
180 |   }
181 |   if (any(is.na(counts))) {
182 |     stop("NA values are not allowed in counts.")
183 |   }
184 |   if (any(counts < 0)) {
185 |     stop("counts contain negative number(s), all counts must be positive")
186 |   }
187 |   if (!is.integer(counts)) {
188 |     if (any(round(counts) != counts)) {
189 |       stop("All counts must be intergers.")
190 |     } else {
191 |       mode(counts) <- "integer"
192 |       warning("All counts are coerced to integers.")
193 |     }
194 |   }
195 |   if (!is.data.frame(design)) {
196 |     stop("design must be 'data.frame'.")
197 |   }
198 |   if (!is.data.frame(genomicFeature) &&
199 |       !is(genomicFeature, "GRanges")) {
200 |     stop("genomicFeature must be a data frame or a GRanges object.")
201 |   }
202 |   if (is.data.frame(genomicFeature)) {
203 |     if (sum(c("id", "chr", "start", "end") %in%
204 |             tolower(colnames(genomicFeature))) != 4) {
205 |       err <- paste0("One or more following required fields in genomicFeature are missing: 'id', 'chr', 'start','end', check if the columns are correctly named or if the corresponding information is provided. ")
206 |       stop(err)
207 |     }
208 |     if (sum(c("id", "chr", "start", "end") %in%
209 |             colnames(genomicFeature)) != 4) {
210 |       colnames(genomicFeature) <- tolower(colnames(genomicFeature))
211 |       warning("colnames of genomicFeature are all forced to lowercase.")
212 |     }
213 |     if (!zero.based) {
214 |       genomicFeature$start <- genomicFeature$start + 1
215 |     }
216 |   }
217 |   if (is(genomicFeature, "GRanges")) {
218 |     if(!"id" %in% tolower(colnames(elementMetadata(genomicFeature)))) {
219 |       stop("Required metadata of genomicFeature is mising: 'id'.")
220 |     }
221 |     if(!"id" %in% colnames(elementMetadata(genomicFeature))) {
222 |       colnames(elementMetadata(genomicFeature)) <- tolower(colnames(elementMetadata(genomicFeature)))
223 |       warning("Names of metadata of genomicFeature are all forced to lowercase.")
224 |     }
225 |     genomicFeature <- as(genomicFeature, "data.frame")
226 |     if (zero.based) {
227 |       enomicFeature$start <- genomicFeature$start - 1
228 |     }
229 |   }
230 |   if (!identical(matrix(0L, 0L, 0L), counts)) {
231 |     if (ncol(counts) != nrow(design)) {
232 |       stop("number of columns in 'counts' must equal to number of rows in 'design'.")
233 |     }
234 |     if (nrow(counts) != nrow(genomicFeature)) {
235 |       stop("Number of rows in 'counts' must equal to number of rows in 'genomicFeature'")
236 |     }
237 |   }
238 | 
239 |   if (!sum(c("sampleid", "timepoint", "group") %in%
240 |            tolower(colnames(design))) == 3) {
241 |     err <- paste0("One or more following required fields in 'design' are missing: 'sampleid', 
242 |                   'timepoint', 'group', check if the columns are correctly named or if the corresponding information is provided.")
243 |     stop(err)
244 |   }
245 |   colnames(design) <- tolower(colnames(design))
246 |   if(class(design$timepoint) != "character"){
247 |     design$timepoint <- as.character(design$timepoint)
248 |     warning("time points in 'design' are not characters, converted to characters")
249 |   }
250 |   object <- new("TCA", design = design, counts = counts,
251 |                 genomicFeature = genomicFeature)
252 |   object
253 | }
254 | 
255 | #'@rdname TCA
256 | #'@export
257 | TCAFromSummarizedExperiment <-function(se, genomicFeature=NULL){
258 |   if (!is(se, "SummarizedExperiment") &&
259 |       !is(se, "RangedSummarizedExperiment")) {
260 |     stop("se must be a SummarizedExperiment or a RangedSummarizedExperiment object.")
261 |   }
262 |   if (is(se, "SummarizedExperiment")) {
263 |     if (is.null(genomicFeature)) {
264 |       stop("genomicFeature must be provided")
265 |     }
266 |     design <- as(colData(se), "data.frame")
267 |     counts <- assay(se,1)
268 |   }
269 |   if (is(se, "RangedSummarizedExperiment")) {
270 |     design <- as(colData(se), "data.frame")
271 |     counts <- assay(se,1)
272 |     genomicFeature <- rowRanges(se)
273 |   }
274 |   object <- TCA(design = design, counts = counts,
275 |                 genomicFeature = genomicFeature)
276 |   object
277 | }
278 | 
279 | #Set inheritance
280 | #The Class LargeDataObject from limma has \code{show} method for objects of the class.
281 | 
282 | setIs("clust", "LargeDataObject")
283 | setIs("TCA", "LargeDataObject")
284 | 


--------------------------------------------------------------------------------
/R/DBanalysis.R:
--------------------------------------------------------------------------------
  1 | #' Perform differential expression analysis
  2 | #'
  3 | #' This function is a wrapper for the \code{\link{glmFit}} in edgeR package.
  4 | #'
  5 | #' @param object a \code{TCA} object.
  6 | #'
  7 | #' @param categories character string giving which column in \code{design} 
  8 | #' will be used for differential analysis. For time course analysis, the default
  9 | #' column is "\code{timepoint}".
 10 | #'
 11 | #' @param norm.lib logical indicating whether or not use effective
 12 | #' library size when perform normalization. See \code{\link{counts}} for more 
 13 | #' details.
 14 | #'
 15 | #' @param filter.type character string indicating which type of count
 16 | #' (raw or normalized) is used when performing filtering. Options are
 17 | #' "\code{raw}", "\code{cpm}", "\code{rpkm}", "\code{NULL}". No filtering will 
 18 | #' be performed when using "\code{NULL}'.
 19 | #'
 20 | #' @param filter.value a numberic value; minimum values of selected
 21 | #' \code{filter.type} ("\code{raw}", "\code{cpm}", "\code{rpkm}"). It is used in 
 22 | #' combination with \code{samplePassfilter}.
 23 | #'
 24 | #' @param samplePassfilter a numberic value indicating the minimum number
 25 | #' of samples/libraries in which a genomic feature has counts value 
 26 | #' (raw or normalized) more than \code{filter.value}. Smaller than this number, 
 27 | #' the genomic feature will be filtered out.
 28 | #'
 29 | #' @param ... additional arguments passed to \code{\link{glmFit}} from
 30 | #' \code{edgeR} package.
 31 | #'
 32 | #' @details The differetial event is detected by using the generalized
 33 | #' linear model (GLM) methods (McCarthy et al, 2012). This function
 34 | #' fits the read counts of each genes to a negative binomial glm by
 35 | #' using \code{\link{glmFit}} function from edgeR. To further test the
 36 | #' significance of changes, see \code{DBresult}, \code{TopDBresult}
 37 | #'
 38 | 
 39 | #' @return
 40 | #' A \code{TCA} object
 41 | #'
 42 | #' @author
 43 | #' Mengjun Wu, Lei Gu
 44 | #'
 45 | #' @references McCarthy,D.J.,Chen, Y., & Smyth, G. K.(2012). Differential
 46 | #' expression analysis of multifactor RNA-Seq experiments with respect to
 47 | #' biological variation. Nucleic acids research 40, 4288-4297.
 48 | #'
 49 | #' @seealso \code{DBresult}, \code{TopDBresult}
 50 | #'
 51 | #' @examples
 52 | #' data(tca_ATAC)
 53 | #' tca_ATAC <- DBanalysis(tca_ATAC)
 54 | #'
 55 | #' @export
 56 | DBanalysis <- function(object, categories = "timepoint", norm.lib = TRUE,
 57 |                        filter.type = NULL, filter.value = NULL,
 58 |                        samplePassfilter = 2, ...) {
 59 |   if (!categories %in% colnames(object@design)) {
 60 |     err <- paste0("Can not find ", categories, " in design, please check if the correspoinding field is missing or a different name is used.")
 61 |     stop(err)
 62 |   }
 63 | 
 64 |   object@contrasts <- contrastMatrix(object, categories)
 65 | 
 66 |   # require(edgeR)
 67 |   group <- object@design[[categories]]
 68 |   y <- DGEList(counts = object@counts, group = group)
 69 |   if (norm.lib) {
 70 |     y <- calcNormFactors(y)
 71 |   }
 72 |   if (!is.null(filter.type)) {
 73 |     if (is.null(filter.value)) {
 74 |       err <- paste0("\"filter.value\" is required to be specified for the chosen filter.type ",
 75 |                     filter.type, ".")
 76 |       stop("\"filter.value\" is required to be specified for chosen .")
 77 |     } else {
 78 |       y <- switch(filter.type, raw = {
 79 |         ind <- rowSums(y$counts > filter.value) >= samplePassfilter
 80 |         y <- y[ind, , keep.lib.sizes = FALSE]
 81 |         y
 82 |       }, cpm = {
 83 |         ind <- rowSums(cpm(y, ...) > filter.value) >= samplePassfilter
 84 |         y <- y[ind, , keep.lib.sizes = FALSE]
 85 |         y
 86 | 
 87 |       }, rpkm = {
 88 |         giwidth <- object@genomicFeature$end - object@genomicFeature$start
 89 |         ind <- rowSums(rpkm(y, gene.length = giwidth, ...) > filter.value) >= samplePassfilter
 90 |         y <- y[ind, , keep.lib.sizes = FALSE]
 91 |         y
 92 |       })
 93 |     }
 94 |   }
 95 | 
 96 |   design <- model.matrix(~0 + group, data = y$samples)
 97 |   colnames(design) <- levels(y$samples$group)
 98 |   design <- design[, unique(group)]
 99 |   y <- estimateDisp(y, design)
100 |   fit <- glmFit(y, design, ...)
101 |   object@DBfit <- fit
102 |   object
103 | }
104 | 
105 | # initialize a contrast table with all possible comibinations of group in defined categories
106 | contrastMatrix <- function(object, categories) {
107 |   ca <- unique(object@design[[categories]])
108 |   a <- length(ca)
109 |   b <- 2 * choose(a, 2)
110 |   contrastM <- matrix(0, a, b)
111 |   name <- vector(mode = "character", length = b)
112 |   count <- 1
113 |   count.col <- -1
114 |   count.col2 <- 0
115 |   for (i in seq_len((a - 1))) {
116 |     count = count + 1
117 |     for (j in count:a) {
118 |       count.col <- count.col + 2
119 |       count.col2 <- count.col2 + 2
120 |       n <- paste0(ca[j], "vs", ca[i])
121 |       n1 <- paste0(ca[i], "vs", ca[j])
122 |       name[count.col] <- n
123 |       name[count.col2] <- n1
124 |       contrastM[i, count.col] = -1
125 |       contrastM[j, count.col] = 1
126 |       contrastM[j, count.col2] = -1
127 |       contrastM[i, count.col2] = 1
128 |     }
129 |   }
130 |   dimnames(contrastM) <- list(ca, name)
131 |   contrastM
132 | }
133 | 
134 | 


--------------------------------------------------------------------------------
/R/DBresults.R:
--------------------------------------------------------------------------------
  1 | #' This function tests for differential expression 
  2 | #'
  3 | #' This function is a wrapper for \code{\link{glmLRT}} in edgeR package. 
  4 | #' It performs likelihood ratio tests for given coefficinets contrasts 
  5 | #' after fitting read counts to a negative binomial glm by
  6 | #' \code{\link{DBanalysis}}. \code{DBresult} also extracts the
  7 | #' diffential analysis results of given contrasts at a chosen significance level. 
  8 | #' \code{DBresult.cluster} returns similar results but only 
  9 | #' contain genomic features belong to a given cluster.
 10 | #'
 11 | #' @name DBresult
 12 | #'
 13 | #' @param object a \code{TCA} object, for \code{DBresult},
 14 | #' \code{DBanalysis} should already be called on the object;
 15 | #' for \code{DBresult.cluster}, both \code{DBanalysis} and
 16 | #' \code{timeclust} should be already called.
 17 | #'
 18 | #' @param group1 character string giving the group to be compared with,
 19 | #' i.e., the denominator in the fold changes. group1 can be set NULL and 
 20 | #' will be ignored if the comparisons are passed to \code{contrasts}
 21 | #'
 22 | #' @param group2 a character vetor giving the other groups to 
 23 | #' compare with \code{group1}, i.e., the numerator in the fold changes.
 24 | #' group2 can be set NULL and will be ignored if the comparisons are 
 25 | #' passed to \code{contrasts}
 26 | #'
 27 | #' @param contrasts a character vector, each string in
 28 | #' the vector gives a contrast of two groups with the format
 29 | #' "group2vsgroup1", group1 is the denominator level in the fold
 30 | #' changes and group2 is the numerator
 31 | #' level in the fold changes.
 32 | #'
 33 | #' @param p.adjust character string specifying a correction method
 34 | #' for p-values. Options are "\code{holm}", "\code{hochberg}", 
 35 | #' "\code{hommel}", "\code{bonferroni}", "\code{BH}", "\code{BY}", 
 36 | #' "\code{fdr}", and "\code{none}". 
 37 | #'
 38 | #' @param top.sig logical if TRUE, only genomic regions with
 39 | #' given log2-fold changes and significance levels (p-value) 
 40 | #' will be returned. Log2-fold changes are defined by \code{abs.fold}
 41 | #' and \code{direction}; significance levels are defined by \code{pvalue} 
 42 | #' and \code{pvalue.threshold}
 43 | #'  
 44 | #' @param pvalue character string specify the type of p-values
 45 | #' used for defining the significance level(\code{PValue}
 46 | #' or adjusted p-value \code{paj})
 47 | #'
 48 | #' @param pvalue.threshold a numeric value giving threshold of
 49 | #' selected p-value, Significant changes have lower
 50 | #' (adjusted) p-values than the threshold.
 51 | #'
 52 | #' @param abs.fold a numeric value, the minimum absolute log2-fold
 53 | #' changes. The returned genomic regions have changes 
 54 | #' with absolute log2-fold changes exceeding \code{abs.fold}.
 55 | #'
 56 | #' @param direction character string specify the direction of fold
 57 | #' changes. "\code{up}": positive fold changes; "\code{down}":
 58 | #' negative fold changes; "\code{both}": both positive and
 59 | #' negative fold changes.  
 60 | #'
 61 | #' @param cluster an integer giving the number of cluster from which 
 62 | #' genomic features are extracted.
 63 | #'
 64 | #' @param  cmthreshold a numeric value, this argument is applicable
 65 | #' only if \code{cmeans}' clustering method is selected when calling
 66 | #' \code{\link{timeclust}} function. if not NULL, the result table of
 67 | #' genomic features that belong to the defined \code{cluster} and
 68 | #' the membership values to this cluster exceed \code{cmthreshold}
 69 | #' are extracted.
 70 | #'
 71 | #' @param result.type character string giving the data type of return
 72 | #' value. Options are "GRangesList" and "list".
 73 | #'
 74 | #' @details This function uses \code{\link{glmLRT}} from edgeR which
 75 | #' perform likelihood ratio tests for the significance of changes.
 76 | #' For more deatils,
 77 | #' see \code{\link{glmLRT}}
 78 | #'
 79 | #' @note If not NULL \code{group1}, \code{group2} and \code{contrasts},
 80 | #' result tables are extracted from comparisons in \code{constrasts}.
 81 | #'
 82 | #' @return
 83 | #' A list or a GRangesList.
 84 | #' If \code{result.type} is "GRangesList", a GRangesList is returned containing
 85 | #' the differential analysis results for all provided contrasts. Each GRanges 
 86 | #' object of the list is one contrast, the analysis results are contained in 4 
 87 | #' metadata columns:
 88 | #'
 89 | #' @return \code{logFC} log2-fold changes between two groups.
 90 | #'
 91 | #' @return \code{PValue} p-values.
 92 | #'
 93 | #' @return \code{paj} adjusted p-values
 94 | #'
 95 | #' @return \code{id} name of genomic features 
 96 | #'
 97 | #' If \code{result.type} is "list", a list of data frames is returned.
 98 | #' Each data frame contains one contrast with the following columns:
 99 | #'
100 | #' @return \code{logFC} log2-fold changes between two groups.
101 | #'
102 | #' @return \code{PValue} p-values.
103 | #'
104 | #' @return \code{paj} adjusted p-values
105 | #'
106 | #' @return \code{chr}  name of chromosomes 
107 | #'
108 | #' @return \code{start} starting positions of features in the 
109 | #' chromosomes
110 | #'
111 | #' @return \code{end} ending postitions of features in the chromosomes
112 | #'
113 | #' @return \code{id} name of genomic features
114 | #'
115 | #' @author
116 | #' Mengjun Wu, Lei Gu
117 | #'
118 | #' @seealso
119 | #'
120 | #' \code{\link{glmLRT}}
121 | #'
122 | #' @examples
123 | #' data(tca_ATAC)
124 | #' tca_ATAC <- DBanalysis(tca_ATAC)
125 | #' ### extract differntial analysis of 24h, 72h to 0h
126 | #' # set the contrasts using the 'group1' and 'group2' paramters
127 | #' res1 <- DBresult(tca_ATAC, group1 = '0h', group2 = c('24h', '72h'))
128 | #' # one can get the same result by setting the contrasts using hte 'contrasts' parameter
129 | #' res2 <- DBresult(tca_ATAC, contrasts = c('24hvs0h', '72hvs0h'))
130 | #' # extract significant diffential events
131 | #' res.sig <- DBresult(tca_ATAC, contrasts = c('24hvs0h', '72hvs0h'),
132 | #'                    top.sig = TRUE)
133 | #'
134 | #' # extract differntial analysis of 24h, 72h to 0h of a given cluster
135 | #' tca_ATAC <- timecourseTable(tca_ATAC, filter = TRUE)
136 | #' tca_ATAC <- timeclust(tca_ATAC, algo = 'cm', k = 6)
137 | #' res_cluster1 <- DBresult.cluster(tca_ATAC, group1 = '0h',
138 | #'                                  group2 = c('24h', '72h'),
139 | #'                                  cluster = 1)
140 | #'
141 | #'
142 | #'
143 | #' @export
144 | DBresult <- function(object, group1 = NULL, group2 = NULL,
145 |                      contrasts = NULL, p.adjust = "fdr",
146 |                      top.sig = FALSE, pvalue = "paj",
147 |                      pvalue.threshold = 0.05, abs.fold = 2,
148 |                      direction = "both", result.type = "GRangesList") {
149 |   if (is.null(group1) && is.null(group2) && is.null(contrasts)) {
150 |     stop("Either information of groups to compare or \"contrasts\" should be provided")
151 |   }
152 |   if (!is.null(contrasts)){
153 |     contrasts <- contrasts
154 |   }else{
155 |     if (sum(group1 %in% group2) > 0) {
156 |       warning("Members in group1 are also found in group2, overlapped members are removed from group2.")
157 |       group2 <- group2[-which(group2 %in% group1)]
158 |     }
159 |     contrasts <- contrastNames(group1, group2)
160 |   }
161 |   if (!p.adjust %in% c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")) {
162 |     stop("Method for adjusting P-values should be one of following methods: 'holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none'. Character string is case sensitive.")
163 |   }
164 |   fit <- object@DBfit
165 |   contrast.table <- object@contrasts
166 |   gi <- object@genomicFeature[object@genomicFeature$id %in%
167 |                                 row.names(fit$coefficients), ]
168 |   gi <- gi[, c("chr", "start", "end", "id")]
169 |   res <- list()
170 |   for (i in contrasts) {
171 |     tmp <- glmLRT(fit, contrast = contrast.table[, i])
172 |     restmp <- tmp$table[, c(1, 4)]
173 |     adjustp <- p.adjust(restmp$PValue, method = p.adjust)
174 |     restmp <- cbind(restmp, adjustp)
175 |     colnames(restmp)[length(restmp[1, ])] <- "paj"
176 |     restmp <- cbind(restmp, gi, stringsAsFactors = FALSE)
177 |     res[[i]] <- restmp
178 |   }
179 |   if (top.sig) {
180 |     res <- DBresult.filter(x = res, pvalue = pvalue,
181 |                            pvalue.threshold = pvalue.threshold,
182 |                            abs.fold = abs.fold,
183 |                            direction = direction)
184 |   }
185 |   if (tolower(result.type) == "grangeslist") {
186 |     gr <- as(do.call(rbind, unname(res)), "GRanges")
187 |     res <- suppressWarnings(split(gr, rep(names(res), lengths(res))))
188 |   }
189 | 
190 |   res
191 | }
192 | 
193 | #' @rdname DBresult
194 | #' @export
195 | DBresult.cluster <- function(object, group1 = NULL, group2 = NULL,
196 |                              contrasts = NULL, p.adjust = "fdr",
197 |                              top.sig = FALSE, pvalue = "paj",
198 |                              pvalue.threshold = 0.05, abs.fold = 2,
199 |                              direction = "both",cluster, cmthreshold = NULL,
200 |                              result.type = "GRangesList") {
201 |   if (length(object@clusterRes@cluster) == 0) {
202 |     stop("No cluster information provided, clustering analysis must be performed first")
203 |   }
204 |   DBres <- DBresult(object, group1 = group1, group2 = group2,
205 |                     contrasts = contrasts, p.adjust = p.adjust,
206 |                     top.sig = top.sig, pvalue = pvalue,
207 |                     pvalue.threshold = pvalue.threshold,
208 |                     abs.fold = abs.fold,
209 |                     direction = direction, result.type = "list")
210 |   names <- names(object@clusterRes@cluster)
211 |   res <- list()
212 |   contrast_name <- names(DBres)
213 |   counter <- 0
214 |   for (i in DBres) {
215 |     restmp <- i
216 |     counter <- counter + 1
217 |     clusters <- object@clusterRes@cluster
218 |     clusternames <- names[which(clusters == cluster)]
219 |     if (!is.null(cmthreshold)) {
220 |       membership <- object@clusterRes@membership[clusters == cluster,
221 |                                                  cluster]
222 |       if (is.null(membership)) {
223 |         stop("No membership matrix found. To get membership matrix, please choose 'cmeans' clustering method when calling timeclust")
224 |       } else {
225 |         clusternames <- clusternames[which(membership > cmthreshold)]
226 |       }
227 |     }
228 |     restmp <- restmp[clusternames, ]
229 |     contrast <- contrast_name[counter]
230 |     res[[contrast]] <- restmp
231 |   }
232 |   if (tolower(result.type) == "grangeslist") {
233 |     gr <- as(do.call(rbind, unname(res)), "GRanges")
234 |     res <- suppressWarnings(split(gr, rep(names(res), lengths(res))))
235 |   }
236 |   res
237 | }
238 | 
239 | # contrast contrast by given strings, group1 is a string, group2 can be a string or a vector of strings
240 | contrastNames <- function(group1, group2) {
241 |   b <- length(group2)
242 |   name <- vector(mode = "character", length = b)
243 |   for (i in seq_len(b)) {
244 |     n <- paste0(group2[i], "vs", group1)
245 |     name[i] <- n
246 |   }
247 |   name
248 | }
249 | 
250 | DBresult.filter <- function(x, pvalue = "paj", pvalue.threshold = 0.05,
251 |                             abs.fold = 2, direction = "both") {
252 |   if (abs.fold < 0) {
253 |     err <- paste0("\"abs.fold\" should be postive number.")
254 |     stop(err)
255 |   }
256 |   d <- x
257 |   for (i in seq_len(length(d))) {
258 |     dt <- d[[i]]
259 |     if (direction == "up") {
260 |       dt <- dt[which(dt$logFC >= abs.fold), ]
261 |     }
262 | 
263 |     if (direction == "down") {
264 |       dt <- dt[which(dt$logFC <= -abs.fold), ]
265 |     }
266 |     if (direction == "both") {
267 |       if (abs.fold == 0) {
268 |         dt <- rbind(dt[which(dt$logFC >= abs.fold), ],
269 |                     dt[which(dt$logFC < -abs.fold), ])
270 |       } else {
271 |         dt <- rbind(dt[which(dt$logFC >= abs.fold), ],
272 |                     dt[which(dt$logFC <= -abs.fold), ])
273 |       }
274 |     }
275 |     dt <- dt[which(dt[, pvalue] < pvalue.threshold), ]
276 |     d[[i]] <- dt
277 |   }
278 | d
279 | }
280 | 


--------------------------------------------------------------------------------
/R/GenericFunctions.R:
--------------------------------------------------------------------------------
  1 | #' @import ggplot2
  2 | #' @import edgeR
  3 | #' @import e1071
  4 | #' @import cluster
  5 | #' @import reshape2
  6 | #' @import grid
  7 | #' @import locfit
  8 | #' @import GenomicRanges
  9 | #' @import SummarizedExperiment
 10 | #' @importFrom BiocGenerics counts counts<- design
 11 | #' @importFrom IRanges IRanges
 12 | #' @importFrom Rsamtools BamFile BamFileList
 13 | #' @importFrom GenomicAlignments summarizeOverlaps
 14 | #' @importFrom grDevices rainbow
 15 | #' @importFrom stats as.dist complete.cases cor cutree hclust kmeans model.matrix sd time
 16 | #' @importFrom utils capture.output read.table
 17 | #' @importFrom methods new validObject
 18 | NULL
 19 | 
 20 | counts.TCA <- function(object, normalization = "none", lib.norm = TRUE,
 21 |                        log = FALSE, ...) {
 22 |   if (!normalization %in% c("none", "rpkm", "cpm")) {
 23 |     stop("'normalization method should one of 'none', 'rpkm', 'cpm'.")
 24 |   }
 25 |   if (normalization == "none") {
 26 |     t <- object@counts
 27 |   }
 28 |   if (normalization != "none") {
 29 |     genomicFeature <- object@genomicFeature
 30 |     group <- object@design$group
 31 |     y <- DGEList(counts = object@counts, group = group)
 32 |     if (lib.norm) {
 33 |       y <- calcNormFactors(y)
 34 |     }
 35 |     c <- switch(normalization, rpkm = {
 36 |       giwidth <- genomicFeature$end - genomicFeature$start
 37 |       t <- rpkm(y, normalized.lib.sizes = lib.norm, gene.length = giwidth,
 38 |                 log = log, ...)
 39 |       t
 40 |     }, cpm = {
 41 |       t <- cpm(y, normalized.lib.sizes = lib.norm, log = log, ...)
 42 |       t
 43 |     })
 44 |   }
 45 |   t
 46 | }
 47 | 
 48 | #' Extracts counts of a TCA object.
 49 | #'
 50 | #' \code{counts} extract raw read counts  stored in a \code{TCA} object 
 51 | #' or compute normalized counts from the raw counts.
 52 | #'
 53 | #' @name counts
 54 | #' @aliases counts counts,TCA-method counts<-,TCA-method
 55 | #' @param object a \code{TCA} object.
 56 | #'
 57 | #' @param normalization character string giving the normalization method.
 58 | #' Options are "\code{none}" (original raw counts), "\code{cpm}" (counts
 59 | #' per million),
 60 | #' "\code{rpkm}" (reads per kilobase per million).
 61 | #'
 62 | #' @param lib.norm logical indicating whether or not use effective library
 63 | #' size (see Details below) when \code{normalization} is "\code{cpm}" or
 64 | #' "\code{rpkm}".
 65 | #'
 66 | #' @param log logical if \code{TRUE}, the returned value will be on a log2
 67 | #' scale.
 68 | #'
 69 | #' @param value an integer matrix.
 70 | #'
 71 | #' @param ... additional arguments passed to \code{\link{cpm}} or
 72 | #' \code{\link{rpkm}} in the edgeR package.
 73 | #'
 74 | #' @details when calculating normalized counts, library size can be rescaled
 75 | #' to minimize the log-fold changes between samples for most genomic features
 76 | #' (e.g. genes, binding sites) by multiplying a scale factor. The rescaled
 77 | #' library size is called effective library size. In this function, the scale
 78 | #' factor is calculated using the weighted trimmed mean of M-values (TMM,
 79 | #' Robinson et al (2010))
 80 | #'
 81 | #' If log2 values are computed, a small count would be added to avoid logarithm 
 82 | #' of zero. The actual added count will be scaled according to the library size,
 83 | #' for details see \code{\link{addPriorCount}} in the edgeR package
 84 | #' when not specified, the prior count is set to 0.25 by default.
 85 | #'
 86 | #' @references
 87 | #' Robinson, M. D., & Oshlack, A. (2010). A scaling normalization method for
 88 | #' differential expression analysis of RNA-seq data. Genome biology, 11(3), 1.
 89 | #'
 90 | #' @return
 91 | #' An integer matrix
 92 | #'
 93 | #' @author
 94 | #' Mengjun Wu
 95 | #'
 96 | #' @examples
 97 | #' data(tca_ATAC)
 98 | #' c <- counts(tca_ATAC)
 99 | #' # normalized counts table
100 | #' c_norm <- counts(tca_ATAC, normalization='rpkm')
101 | #' @export
102 | setMethod("counts", "TCA", counts.TCA)
103 | 
104 | #' @rdname counts
105 | #' @exportMethod 'counts<-'
106 | setMethod("counts<-", "TCA", function(object, value) {
107 |   object@counts <- value
108 |   validObject(object)
109 |   object
110 | })
111 | 
112 | 
113 | #' Accessors to extract slots of a TCA class.
114 | #'
115 | #' Accessors are provided to extract \code{design}, \code{genomicFeature},
116 | #' \code{tcTable}, \code{clustResults} slots of a TCA class. The \code{design}
117 | #' slot stores experimental information of samples/libraries, the
118 | #' \code{genomicFeature} slot stores genomic coordinates of features, the
119 | #' \code{tcTable} slot stores time couse data as a matrix, where rows are
120 | #' genomic features and columns time points. The \code{clustResults} slot
121 | #' stores results of clustering analysis as a \code{clust} object.
122 | #'
123 | #' @name TCA.accessors
124 | #' @aliases design design,TCA-method genomicFeature,TCA-method
125 | #' tcTable,TCA-method clustResults,TCA-method
126 | #'
127 | #' @param object \code{TCA} object object
128 | #' @return
129 | #' \code{design} returns a data frame. \code{genomicFeature} returns a data frame.
130 | #' \code{tcTable} returns a numeric matrix. \code{clustResults} returns a
131 | #' \code{clust} object, see \code{\link{clust}} for details.
132 | #'
133 | #' @author
134 | #' Mengjun Wu
135 | #'
136 | #' @seealso
137 | #' \code{\link{clust}}
138 | #'
139 | #' @examples
140 | #' data(tca_ATAC)
141 | #' genomicFeature(tca_ATAC)
142 | #' tcTable(tca_ATAC)
143 | 
144 | #' @rdname TCA.accessors
145 | #' @export
146 | setMethod("design", "TCA", function(object) {
147 |   object@design
148 | })
149 | 
150 | #' @rdname TCA.accessors
151 | #' @export
152 | 
153 | setGeneric("genomicFeature", function(object) standardGeneric("genomicFeature"))
154 | setMethod("genomicFeature", "TCA", function(object) {
155 |   object@genomicFeature
156 | })
157 | 
158 | #' @rdname TCA.accessors
159 | #' @export
160 | 
161 | setGeneric("tcTable", function(object) standardGeneric("tcTable"))
162 | 
163 | #' @rdname TCA.accessors
164 | #' @export
165 | 
166 | setMethod("tcTable", "TCA", function(object) {
167 |   object@tcTable
168 | })
169 | 
170 | #' @rdname TCA.accessors
171 | #' @export
172 | 
173 | setGeneric("clustResults", function(object) standardGeneric("clustResults"))
174 | 
175 | #' @rdname TCA.accessors
176 | #' @export
177 | 
178 | setMethod("clustResults", "TCA", function(object) {
179 |   object@clusterRes
180 | })
181 | 
182 | #' Accessors to extract slots of a clust class.
183 | #'
184 | #' Accessors are provided to extract \code{data}, \code{centers}, \code{cluster}, 
185 | #' and \code{membership} slots stored in a clust class.
186 | #' @name clust.accessors
187 | #' @aliases clustData clustData,clust-method clustCenters,clust-method
188 | #' clustCluster,clust-method clustMembership,clust-method
189 | #'
190 | #' @param object \code{clust} object object
191 | #' @return
192 | #' \code{clustData} returns a data matrix. \code{clustCenters} returns a matrix of
193 | #' centers. \code{clustCluster} returns an integer vector. \code{clustMembership}
194 | #' returns a matrix of membership, see \code{\link{clust}} for details.
195 | #'
196 | #' @author
197 | #' Mengjun Wu
198 | #'
199 | #' @seealso
200 | #' \code{\link{clust}}
201 | 
202 | #' @rdname clust.accessors
203 | #' @export
204 | setGeneric("clustData", function(object) standardGeneric("clustData"))
205 | 
206 | #' @rdname clust.accessors
207 | #' @export
208 | setMethod("clustData", "clust", function(object) {
209 |   object@data
210 | })
211 | 
212 | #' @rdname clust.accessors
213 | #' @export
214 | setGeneric("clustCenters", function(object) standardGeneric("clustCenters"))
215 | 
216 | #' @rdname clust.accessors
217 | #' @export
218 | 
219 | setMethod("clustCenters", "clust", function(object) {
220 |   object@centers
221 | })
222 | 
223 | #' @rdname clust.accessors
224 | #' @export
225 | setGeneric("clustCluster", function(object) standardGeneric("clustCluster"))
226 | 
227 | #' @rdname clust.accessors
228 | #' @export
229 | 
230 | setMethod("clustCluster", "clust", function(object) {
231 |   object@cluster
232 | })
233 | 
234 | #' @rdname clust.accessors
235 | #' @export
236 | setGeneric("clustMembership", function(object) standardGeneric("clustMembership"))
237 | 
238 | #' @rdname clust.accessors
239 | #' @export
240 | 
241 | setMethod("clustMembership", "clust", function(object) {
242 |   object@membership
243 | })
244 | 


--------------------------------------------------------------------------------
/R/countReads.R:
--------------------------------------------------------------------------------
  1 | #' count mapped reads overlap genomic intervals
  2 | #'
  3 | #' This function counts mapped reads from multiple BAM files 
  4 | #' overlapping genomic intervals in \code{genomicFeature} in a 
  5 | #' \code{TCA} object. The resulted count table is stored in 
  6 | #' \code{count} slot of the \code{TCA} object.
  7 | #'
  8 | #' @param object a \code{TCA} object.
  9 | #'
 10 | #' @param dir character string giving the directory of BAM files.
 11 | #'
 12 | #' @param method character string giving the counting method. Options
 13 | #' are "\code{summarizeOverlaps}" and "\code{featureCounts}". For
 14 | #' Windows system, only "\code{summarizeOverlaps}" can be used, For
 15 | #' Linux system, both methods can be used.
 16 | #'
 17 | #' @param ... additional arguments passed to
 18 | #' \code{\link{summarizeOverlaps}} in GenomicAlignments package 
 19 | #' or \code{\link{featureCounts}} in Rsubread package.
 20 | #'
 21 | #' @param zero.based Logical. If TRUE, the start positions of the
 22 | #' genomic intervals are \emph{0-based}, if FALSE, the start positions
 23 | #' will be \emph{1-based}.
 24 | #'
 25 | #' @details
 26 | #' This function provides two options to count the mapped reads: 
 27 | #' "\code{summarizeOverlaps}" in the GenomicAlignments package and 
 28 | #' "\code{featureCounts}" in the Rsubread package. As Rsubread package 
 29 | #' is only avaible for linux systems, Windows users can only choose
 30 | #' "\code{summarizeOverlaps}". The user could further customize the 
 31 | #' counting paramters by passing additional arguments (...), otherwise 
 32 | #' the default settings of the two methods will be used. For details 
 33 | #' of the counting parameters, see \code{\link{summarizeOverlaps}}, 
 34 | #' \code{\link{featureCounts}}.
 35 | #'
 36 | #'
 37 | #' @return
 38 | #' A TCA object with updated \code{count} slot.
 39 | #'
 40 | #' @author
 41 | #' Mengjun Wu
 42 | #'
 43 | #' @seealso
 44 | #' \code{\link{summarizeOverlaps}}, \code{\link{featureCounts}} 
 45 | #'
 46 | #'
 47 | #' @export
 48 | countReads <- function(object, dir, method = "summarizeoverlaps",
 49 |                        zero.based = TRUE,...) {
 50 |   name.col.tmp <- colnames(object@design)
 51 |   name.col.tmp <- tolower(name.col.tmp)
 52 |   colnames(object@design) <- name.col.tmp
 53 |   if (!"bamfile" %in% colnames(object@design)) {
 54 |     err <- paste0("Can not find information of bam files in design, please check whether the correspoinding field is missing or the column name is the same as required.")
 55 |     stop(err)
 56 |   }
 57 |   old <- setwd(tempdir())
 58 |   on.exit(setwd(old), add = TRUE)
 59 |   setwd(dir)
 60 |   bamfiles <- as.vector(object@design$bamfile)
 61 |   features <- object@genomicFeature
 62 |   ignore.strand <- NULL
 63 |   if (is.null(features$strand)) {
 64 |     warning("No strand information is provided, strand is ignored in reads counting")
 65 |     ignore.strand <- TRUE
 66 |   }
 67 |   gi <- makeGRangesFromDataFrame(features, keep.extra.columns = TRUE,
 68 |                                  starts.in.df.are.0based = zero.based)
 69 |   method1 <- tolower(method)
 70 |   if (method1 == "featureCounts" && .Platform$OS.type == "windows") {
 71 |     stop(" 'featureCounts' is only available in Linux/Mac OS system.")
 72 |   }
 73 |   count.table <- switch(method1, summarizeoverlaps = {
 74 |     bamfl <- Rsamtools::BamFileList(bamfiles, yieldSize = 1e+06)
 75 |     c <- GenomicAlignments::summarizeOverlaps(gi, bamfl,
 76 |                                               ignore.strand = ignore.strand, ...)
 77 |     count.table <- SummarizedExperiment::assays(c)$counts
 78 |     row.names(count.table) <- features$id
 79 |     count.table
 80 |   }, featureCounts = {
 81 |     warning("To use the featureCounts, you need to load 'Rsubread' package first")
 82 |     gi_rsubread <- createAnnotationFile(gi)
 83 |     stra <- 0
 84 |     if (!ignore.strand) {
 85 |       stra <- 1
 86 |     }
 87 |     for (i in bamfiles) {
 88 |       m <- paste0("counting reads in bamfile ", i)
 89 |       message(m)
 90 |       o <- capture.output(x <- featureCounts(i, annot.ext = gi_rsubread,
 91 |                                              strandSpecific = stra, ...))
 92 |       count.table <- x$counts
 93 |     }
 94 |     rm(o)
 95 |     count.table
 96 |   })
 97 |   count.table <- as.matrix(count.table)
 98 |   counts(object) <- count.table
 99 |   object
100 | }
101 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' An example read Counts table
 2 | #'
 3 | #' A dataset of exemplary read counts
 4 | #'
 5 | #' @docType data
 6 | #' @keywords datasets
 7 | #' @name countsTable
 8 | #' @usage data(countsTable)
 9 | #' @format A data frame containing experiment design information
10 | #' for 12 samples/libraries.
11 | #' @return
12 | #' A data frame
13 | #' @examples
14 | #' data(countsTable)
15 | NULL
16 | 
17 | #' An example experiment design without BAM file infomration
18 | #'
19 | #' A dataset of exemplary experiment design without BAM file
20 | #' infomration
21 | #'
22 | #' @docType data
23 | #' @keywords datasets
24 | #' @name experiment
25 | #' @usage data(experiment)
26 | #' @format A data frame containing experiment design information
27 | #' for 12 samples/libraries.
28 | #' @return
29 | #' A data frame
30 | #' @examples
31 | #' data(experiment)
32 | NULL
33 | 
34 | #' An example experiment design with BAM file infomration
35 | #'
36 | #' A dataset of exemplary experiment design with BAM file
37 | #' infomration
38 | #'
39 | #' @docType data
40 | #' @keywords datasets
41 | #' @name experiment_BAMfile
42 | #' @usage data(experiment_BAMfile)
43 | #' @format A data frame containing experiment design information
44 | #' for 12 samples/libraries.
45 | #' @return
46 | #' A data frame
47 | #' @examples
48 | #' data(experiment_BAMfile)
49 | NULL
50 | 
51 | #' An example reference genomic regions
52 | #'
53 | #' A dataset of exemplary genomic regions
54 | #'
55 | #' @docType data
56 | #' @keywords datasets
57 | #' @name genomicIntervals
58 | #' @usage data(genomicIntervals)
59 | #' @format A data frame containing 2751 genomic regions.
60 | #' @return
61 | #' A data frame
62 | #' @examples
63 | #' data(genomicIntervals)
64 | NULL
65 | 
66 | #' An example TCA object
67 | #'
68 | #' A TCA object storing exemplary ATAC-seq time course data,
69 | #' including the experiment design, read counts, reference
70 | #' genomic regions.
71 | #'
72 | #' @docType data
73 | #' @keywords datasets
74 | #' @name tca_ATAC
75 | #' @usage data(tca_ATAC)
76 | #' @format A TCA object of exemplary ATAC-seq time course data
77 | #' @return
78 | #' A TCA object
79 | #' @examples
80 | #' data(tca_ATAC)
81 | NULL
82 | 
83 | 


--------------------------------------------------------------------------------
/R/peakreference.R:
--------------------------------------------------------------------------------
  1 | #' combine and merge multiple BED files
  2 | #'
  3 | #' This function merges overlapping genomic regions into a single feature. 
  4 | #' The merged single feature represents the widest genomic interval 
  5 | #' that covers all overlapping regions.
  6 | #'
  7 | #' @param data a data frame containg coordinates information of peaks
  8 | #' to be merged. Columns of the data frame should be consistent with
  9 | #' the BED format where the first column contains chromosome information,
 10 | #' the second column the starting position, and the third column 
 11 | #' the ending position.
 12 | #'
 13 | #' @param dir a character string giving the directory where BED files
 14 | #' are stored. If \code{data} is not given, the function will reads
 15 | #' in the BED files under \code{code}.
 16 | #'
 17 | #' @param pattern an \code{\link{regular expression}}, only files that
 18 | #' have names match the regular expression will be read in.
 19 | #'
 20 | #' @param merge logical indicating whether to merge overlapped regions
 21 | #' or not. If False, regions are simply combined.
 22 | #'
 23 | #' @param overlap a numberic value giving the least number of base(s)
 24 | #' two regions should overlap when merging them.
 25 | #'
 26 | #' @param ratio a numberic value giving the thresold of overlapping
 27 | #' ratio between two regions to merge them. See '\code{Details}' below
 28 | #' for the definition of the overlapping ratio.
 29 | #'
 30 | #' @return a data frame with four columns: \code{chr}, \code{start},
 31 | #' \code{stop}, \code{id}
 32 | #'
 33 | #' @details
 34 | #' The overlapping ratio (OR) is defined as:
 35 | #'
 36 | #' \deqn{ OR = \frac{n}{\min(length(a), length(b)}}
 37 | #'
 38 | #' \eqn{a}, \eqn{b} are two genomic regions, \eqn{n} is the number of
 39 | #' overlapping bases between region \eqn{a} and region \eqn{b}.
 40 | #'
 41 | #' @author
 42 | #' Mengjun Wu, Lei Gu
 43 | #'
 44 | #' @examples
 45 | #' peaks <- data.frame(chr = c(rep('chr1',4),rep('chr2', 3), rep('chr3',2)),
 46 | #'                     start = c(100,148,230,300,330,480,1000,700,801),
 47 | #'                     end = c(150,220,500,450,600,900,1050,760,900))
 48 | #'
 49 | #' merged_peaks <- peakreference(data = peaks, merge = TRUE, overlap = 1)
 50 | #'
 51 | #' @export
 52 | 
 53 | peakreference <- function(data = NULL, dir = NULL, pattern = NULL,
 54 |                           merge = TRUE, overlap = 1, ratio = NULL) {
 55 |   if (is.null(data) && is.null(dir)) {
 56 |     stop("Either a data.frame of genomic coordinates or a directory 
 57 |          for the BED files should be given")
 58 |   }
 59 |   if (!is.null(data)) {
 60 |     checkBEDformat(data)
 61 |     data[, 1] <- factor(data[, 1])
 62 |     peakset <- data
 63 |   }
 64 |   if (is.null(data) && !is.null(dir)) {
 65 |     old <- setwd(tempdir())
 66 |     on.exit(setwd(old), add = TRUE)
 67 |     setwd(dir)
 68 |     filenames <- list.files(pattern = pattern)
 69 |     if (length(filenames) == 0) {
 70 |       err <- paste0("Can not find file names containing '",
 71 |                     pattern, "'.")
 72 |       stop(err)
 73 |     }
 74 |     datalist <- lapply(filenames, function(x) {
 75 |       read.table(file = x, header = FALSE)
 76 |     })
 77 |     peakset <- do.call(rbind, datalist)
 78 |     checkBEDformat(peakset)
 79 |   }
 80 |   peakset <- peakset[order(peakset[, 1], peakset[, 2]), ]
 81 |   if (merge) {
 82 |     if (overlap <= 0 || round(overlap) != overlap) {
 83 |       stop("\"overlap\" must be integer and greater than 0.")
 84 |     }
 85 |     peakset.sub <- split(peakset, peakset[, 1],
 86 |                          drop = TRUE)
 87 |     level <- names(peakset.sub)
 88 |     mergedpeak <- c()
 89 |     for (i in seq_len(length(peakset.sub))) {
 90 |       temp <- peakset.sub[[i]]
 91 |       if (is.null(ratio)) {
 92 |         submerge <- intervalmerge(temp[, 2], temp[, 3],
 93 |                                   overlap = overlap)
 94 |       } else {
 95 |         submerge <- intervalmerge(temp[, 2], temp[, 3],
 96 |                                   ratio = ratio)
 97 |       }
 98 | 
 99 |       chr <- rep(level[i], length(submerge[, 1]))
100 |       submerge1 <- data.frame(chr, submerge)
101 |       mergedpeak <- rbind(mergedpeak, submerge1)
102 |     }
103 |     name <- paste0("peak", seq_len(length(mergedpeak[, 1])))
104 |     mergedpeak <- data.frame(mergedpeak, name)
105 |     colnames(mergedpeak) <- c("chr", "start", "end", "id")
106 |     mergedpeak
107 |   } else {
108 |     peakset
109 |   }
110 | 
111 | }
112 | 
113 | 
114 | checkBEDformat <- function(data) {
115 |   if (ncol(data) < 3) {
116 |     stop("At least three columns should be provided. The first column contains chromosome name,
117 |          the second column contains starting position, the third column contains ending position.")
118 |   }
119 |   if (class(as.vector(data[, 1])) != "character") {
120 |     stop("The first column contains chromosome name and must be character.")
121 |   }
122 |   if (any(round(data[,2]) != data[,2]) &&
123 |       any(round(data[,3]) != data[,3])) {
124 |     stop("the second and third column contain starting and ending positions, must be numeric.")
125 |   }
126 |   }
127 | 
128 | intervalmerge <- function(a0, b0, overlap = NULL,
129 |                           ratio = NULL) {
130 |   if (length(a0) > 1) {
131 |     a1 <- c(a0[1])
132 |     b1 <- c(b0[1])
133 |     merge <- NULL
134 |     for (i in seq_len(length(a0) - 1)) {
135 |       if (is.null(ratio)) {
136 |         if (b1[length(b1)] - a0[i + 1] < overlap) {
137 |           a1 <- append(a1, a0[i + 1])
138 |           b1 <- append(b1, b0[i + 1])
139 |         } else {
140 |           b1[length(b1)] <- max(b1[length(b1)], b0[i + 1])
141 |         }
142 |       }
143 |       if (is.null(overlap)) {
144 |         len <- min((b1[length(b1)] - a1[length(b1)]),
145 |                    (b0[i + 1] - a0[i + 1]))
146 |         rt <- (b1[length(b1)] - a0[i + 1])/len
147 |         if (rt < ratio) {
148 |           a1 <- append(a1, a0[i + 1])
149 |           b1 <- append(b1, b0[i + 1])
150 |         } else {
151 |           b1[length(b1)] <- max(b1[length(b1)], b0[i + 1])
152 |         }
153 |       }
154 |     }
155 |     merge <- cbind(a1, b1)
156 |   }
157 |   if (length(a0) <= 1) {
158 |     a1 <- c(a0[1])
159 |     b1 <- c(b0[1])
160 |     merge <- cbind(a1, b1)
161 |   }
162 |   merge
163 | }
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 


--------------------------------------------------------------------------------
/R/plots.R:
--------------------------------------------------------------------------------
  1 | #' Plot clustering results for time course data.
  2 | #'
  3 | #' This function plots the clusters generated from
  4 | #' \code{\link{timeclust}}. For fuzzy cmeans clustering, data points
  5 | #' are color-coded according to membership values, the color palettes
  6 | #' can be customized.
  7 | #'
  8 | #' @param object a \code{TCA} object or a \code{clust} object
  9 | #'
 10 | #' @param categories character string giving the x-axis label
 11 | #'
 12 | #' @param value character string giving the y-axis label
 13 | #'
 14 | #' @param cols integer value specifying number of columns in the final
 15 | #' layout.
 16 | #'
 17 | #' @param cl.color  character string specifying a color for hard
 18 | #' clustering.
 19 | #'
 20 | #' @param membership.color  color palettes, a character vector of
 21 | #' n colors
 22 | #'
 23 | #' @param title.size numeric value specifying the font size of title
 24 | #' of each
 25 | #' plot in the layout
 26 | #'
 27 | #' @param axis.line.size numeric value specifying the size of both
 28 | #' axis lines
 29 | #'
 30 | #' @param axis.title.size numeric value specifying the font size of
 31 | #' titles of both axis
 32 | #'
 33 | #' @param axis.text.size numeric value specifying the font size of
 34 | #' labels of both axis
 35 | #'
 36 | #' @param legend.title.size numeric value specifying the font size
 37 | #' of legend title
 38 | #'
 39 | #' @param legend.text.size numeric value specifying the font size of
 40 | #' legend text
 41 | #'
 42 | #' @return
 43 | #' Plot all clusters in one plot and return a list of ggplot objects,
 44 | #' each object is for one cluster. The ggplot object can be drawed by
 45 | #' calling \code{\link{print.ggplot}}
 46 | #'
 47 | #' @examples
 48 | #' x <- matrix(sample(500, 1600, replace = TRUE), nrow = 200,
 49 | #'             dimnames = list(paste0('peak', 1:200), 1:8))
 50 | #' clust_res <- timeclust(x, algo = 'cm', k = 4, standardize = TRUE)
 51 | #' p <- timeclustplot(clust_res, cols =2)
 52 | #' # to plot a individual cluster
 53 | #' print (p[[2]]) # plot cluster 2
 54 | #' print (p[[3]]) # plot cluster 3
 55 | #'
 56 | #' @author
 57 | #' Mengjun Wu
 58 | #' @export
 59 | 
 60 | timeclustplot <- function(object = NULL, categories = "timepoint",
 61 |                           value = "expression", cols = NULL,
 62 |                           cl.color = "gray50",
 63 |                           membership.color = rainbow(30, s = 3/4, v = 1, start = 1/6),
 64 |                           title.size = 18, axis.line.size = 0.6,
 65 |                           axis.title.size = 18,
 66 |                           axis.text.size = 16, legend.title.size = 14,
 67 |                           legend.text.size = 14) {
 68 | 
 69 |   if (class(object) != "clust" && class(object) != "TCA") {
 70 |     stop("object should be a 'timeclust' object or a 'TCA' object")
 71 |   }
 72 |   if (class(object) == "clust") {
 73 |     data <- object@data
 74 |     cluster <- object@cluster
 75 |     membership <- object@membership
 76 |   }
 77 |   if (class(object) == "TCA") {
 78 |     data <- object@clusterRes@data
 79 |     cluster <- object@clusterRes@cluster
 80 |     membership <- object@clusterRes@membership
 81 |   }
 82 |   ncl <- max(cluster)
 83 |   membercolor <- vector(length = length(cluster))
 84 |   membervalue <- list()
 85 |   counter <- 0
 86 |   if (!sum(dim(membership) == 0) == 2) {
 87 |     color <- membership.color
 88 |     colorseq <- seq(0, 1, length = length(color))
 89 |     for (i in seq_len(ncl)) {
 90 |       mtmp <- membership[cluster == i, i]
 91 |       membervalue[[i]] <- mtmp
 92 |       for (j in seq_len(length(mtmp))) {
 93 |         counter <- counter + 1
 94 |         ind <- which(abs(colorseq - mtmp[j]) == min(abs(colorseq - mtmp[j])))
 95 |         membercolor[counter] <- color[ind]
 96 |       }
 97 |     }
 98 |     membervalue <- unlist(membervalue)
 99 |     names(membercolor) <- membervalue
100 |   }
101 | 
102 |   plotlist <- list()
103 |   for (i in seq_len(ncl)) {
104 |     title <- paste0("Cluster ", i)
105 |     dtmp <- data[cluster == i, ]
106 |     a <- which(cluster == i)
107 |     if (length(a) == 1) {
108 |       dtmp <- data.frame(time = 1:length(dtmp), value = dtmp)
109 |       if (!sum(dim(membership) == 0) == 2) {
110 |         m <- membership[cluster == i, i]
111 |         colorname = toString(m)
112 |         plotlist[[i]] <- ggplot(dtmp, aes(x = time, y = value)) +
113 |           geom_line(colour = membercolor[colorname]) + theme_bw() +
114 |           ggtitle(title) +
115 |           scale_x_continuous(breaks = dtmp$time,
116 |                              labels = row.names(dtmp)) +
117 |           labs(x = categories, y = value) +
118 |           theme(plot.title = element_text(size = title.size),
119 |                 axis.line.x = element_line(color = "black",
120 |                                            size = axis.line.size),
121 |                 axis.line.y = element_line(color = "black",
122 |                                            size = axis.line.size),
123 |                 axis.title = element_text(size = axis.title.size),
124 |                 axis.text = element_text(size = axis.text.size),
125 |                 legend.position = "none", panel.border = element_blank(),
126 |                 panel.grid.major = element_blank(),
127 |                 panel.grid.minor = element_blank())
128 |       } else {
129 |         plotlist[[i]] <- ggplot(dtmp, aes(x = time, y = value)) +
130 |           geom_line(colour = cl.color) + theme_bw() + ggtitle(title) +
131 |           scale_x_continuous(breaks = dtmp$time,
132 |                              labels = row.names(dtmp)) +
133 |           labs(x = categories, y = value) +
134 |           theme(plot.title = element_text(size = title.size),
135 |                 axis.line.x = element_line(color = "black",
136 |                                            size = axis.line.size),
137 |                 axis.line.y = element_line(color = "black",
138 |                                            size = axis.line.size),
139 |                 axis.title = element_text(size = axis.title.size),
140 |                 axis.text = element_text(size = axis.text.size),
141 |                 legend.position = "none", panel.border = element_blank(),
142 |                 panel.grid.major = element_blank(),
143 |                 panel.grid.minor = element_blank())
144 |       }
145 |     } else {
146 |       dtmp_m <- melt(dtmp)
147 |       colnames(dtmp_m) <- c("group", "time", "value")
148 |       if (sum(dim(membership) == 0) == 2) {
149 |         plotlist[[i]] <- ggplot(dtmp_m, aes(x = time, y = value)) +
150 |           geom_line(aes(group = group), colour = cl.color) +
151 |           theme_bw() + ggtitle(title) +
152 |           labs(x = categories, y = value) +
153 |           theme(plot.title = element_text(size = title.size),
154 |                 axis.line.x = element_line(color = "black",
155 |                                            size = axis.line.size),
156 |                 axis.line.y = element_line(color = "black",
157 |                                            size = axis.line.size),
158 |                 axis.title = element_text(size = axis.title.size),
159 |                 axis.text = element_text(size = axis.text.size),
160 |                 legend.position = "none", panel.border = element_blank(),
161 |                 panel.grid.major = element_blank(),
162 |                 panel.grid.minor = element_blank())
163 |       }
164 |       if (!sum(dim(membership) == 0) == 2) {
165 |         mem <- membership[cluster == i, i]
166 |         mem1 <- data.frame(group = names(mem), member = mem)
167 |         dtmp_m1 <- merge(dtmp_m, mem1, by = "group")
168 |         colnames(dtmp_m1) <- c("group", "time", "value", "membership")
169 |         dtmp_m1 <- dtmp_m1[order(dtmp_m1[, 4]), ]
170 |         new.factor <- unique(as.vector(dtmp_m1$group))
171 |         dtmp_m1$group <- factor(dtmp_m1$group, levels = new.factor)
172 | 
173 |         plotlist[[i]] <- ggplot(dtmp_m1, aes(x = time, y = value,
174 |                                              colour = membership)) +
175 |           geom_line(aes(group = group)) +
176 |           scale_colour_gradientn(colours = membership.color) +
177 |           guides(colour = guide_colourbar()) + theme_bw() +
178 |           ggtitle(title) + labs(x = categories, y = value) +
179 |           theme(plot.title = element_text(size = title.size),
180 |                 axis.line.x = element_line(color = "black",
181 |                                            size = axis.line.size),
182 |                 axis.line.y = element_line(color = "black",
183 |                                            size = axis.line.size),
184 |                 axis.title = element_text(size = axis.title.size),
185 |                 axis.text = element_text(size = axis.text.size),
186 |                 legend.title = element_text(size = legend.title.size),
187 |                 legend.text = element_text(size = legend.title.size),
188 |                 panel.border = element_blank(),
189 |                 panel.grid.major = element_blank(),
190 |                 panel.grid.minor = element_blank())
191 | 
192 | 
193 |       }
194 |     }
195 | 
196 |   }
197 |   suppressWarnings(multiplot(plotlist = plotlist, cols = cols))
198 |   plotlist
199 | }
200 | 
201 | multiplot <- function(..., plotlist = NULL, cols = 1, layout = NULL) {
202 |   plots <- c(list(...), plotlist)
203 |   numPlots = length(plots)
204 |   if (is.null(layout)) {
205 |     layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
206 |                      ncol = cols, nrow = ceiling(numPlots/cols))
207 |   }
208 |   if (numPlots == 1) {
209 |     print(plots[[1]])
210 | 
211 |   } else {
212 |     grid.newpage()
213 |     pushViewport(viewport(layout = grid.layout(nrow(layout),
214 |                                                ncol(layout))))
215 |     for (i in 1:numPlots) {
216 |       matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
217 | 
218 |       print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
219 |                                       layout.pos.col = matchidx$col))
220 |     }
221 |   }
222 | }
223 | 


--------------------------------------------------------------------------------
/R/timeclust.R:
--------------------------------------------------------------------------------
  1 | #' time couse data clustering
  2 | #'
  3 | #' This function performs clustering analysis of the time course data.
  4 | #'
  5 | #' @param x a \code{TCA} object returned from
  6 | #' \code{\link{timecourseTable}} or a matrix
  7 | #'
  8 | #' @param algo a character string giving a clustering method. Options
  9 | #' are "\code{km}" (kmeans), "\code{pam}" (partitioning around medoids),
 10 | #' "\code{hc}" (hierachical clustering), "\code{cm}" (cmeans).
 11 | #'
 12 | #' @param k a numeric value between \eqn{1} and \eqn{n - 1} (\eqn{n}
 13 | #' is the number of data points to be clustered).
 14 | #' 
 15 | #' @param dist a character string specifying either "\code{distance}" or 
 16 | #' "\code{correlation}" will be used to measure the distance between data points.
 17 | #' 
 18 | #' @param dist.method a character string. It can be chosen from one of 
 19 | #' the correlation methods in \code{\link{cor}} function ("\code{pearson}", 
 20 | #' "\code{spearman}", "\code{kendall}") if \code{dist} is "\code{correlation}", 
 21 | #' or one of the distance measure methods in \code{\link{dist}} function 
 22 | #' (for example, "\code{euclidean}", "\code{manhattan}") if \code{dist} is 
 23 | #' "\code{distance}".
 24 | #'
 25 | #' @param centers a numeric matrix giving intial centers for kmeams,
 26 | #' pam or cmeans. If given, number of rows of the matrix must be equal
 27 | #' to k.
 28 | #'
 29 | #' @param standardize logical, if TRUE, z-score transformation will
 30 | #' performed on the data before clustering. See 'Details' below.
 31 | #'
 32 | #' @param ... additional arguments passing to \code{\link{kmeans}},
 33 | #' \code{\link{pam}}, \code{\link{hclust}}, \code{\link{cmeans}}
 34 | #'
 35 | #' @details
 36 | #' two types of clustering methods are provided: hard clustering
 37 | #' (\code{\link{kmeans}}, \code{\link{pam}}, \code{\link{hclust}})
 38 | #' and soft clustering(\code{\link{cmeans}}). In hard clustering,
 39 | #' a data point can only be allocated to exactly one cluster
 40 | #' (for \code{\link{hclust}}, \code{\link{cutree}} is used to cut
 41 | #' a tree into clusters), while in soft clustering (also known as
 42 | #' fuzzy clustering), a data point can be assigned to multiple
 43 | #' clusters, membership values are used to indicate to what
 44 | #' degree a data point belongs to each cluster.
 45 | #'
 46 | #' To better capture the differences of temporal patterns rather 
 47 | #' than expression levels, z-score transformation can be applied 
 48 | #' to covert the the expression values to z-scores by performing 
 49 | #' the following formula:
 50 | #'
 51 | #' \deqn{z = \frac{x - \mu}{\sigma}}
 52 | #'
 53 | #' \eqn{x} is the value to be converted (e.g., expression value of a
 54 | #' genomic feature in one condition), \eqn{\mu} is the population
 55 | #' mean (e.g., average expression value of a genomic feature across
 56 | #' different conditions), \eqn{\sigma} is the standard deviation
 57 | #' (e.g., standard deviation of the expression values of a genomic 
 58 | #' feature across different conditions).
 59 | #'
 60 | #'
 61 | #' @return
 62 | #' If x is a \code{TCA} object, a \code{TCA} object will be returned.
 63 | #' If x is a matrix, a \code{clust} object will be returned
 64 | #'
 65 | #' @examples
 66 | #'
 67 | #' example.mat <- matrix(rnorm(1600,sd=0.3), nrow = 200,
 68 | #'             dimnames = list(paste0('peak', 1:200), 1:8))
 69 | #' clust_res <- timeclust(x = example.mat, algo = 'cm', k = 4) 
 70 | #' # return a clust object
 71 | #' 
 72 | #' @author
 73 | #' Mengjun Wu
 74 | #'
 75 | #' @seealso \code{\link{clust}}, \code{\link{kmeans}},
 76 | #' \code{\link{pam}}, \code{\link{hclust}}, \code{\link{cutree}}
 77 | #'
 78 | #' @export
 79 | timeclust <- function(x, algo, k, dist = "distance", dist.method = "euclidean", 
 80 |                       centers = NULL, standardize = TRUE, ...) {
 81 |   if (is.matrix(x)) {
 82 |     data.tmp <- x
 83 |   }else{
 84 |     data.tmp <- x@tcTable
 85 |   }
 86 |   if (standardize) {
 87 |     for (i in seq_len(nrow(data.tmp))) {
 88 |       data.tmp[i, ] <- (data.tmp[i, ] - mean(data.tmp[i, ], na.rm = TRUE))/sd(data.tmp[i, ], na.rm = TRUE)
 89 |     }
 90 |     data.tmp <- data.tmp[complete.cases(data.tmp), ]
 91 |   }
 92 |   object <- new("clust")
 93 |   object@method <- algo
 94 |   object@dist <- dist
 95 |   object@data <- data.tmp
 96 |   
 97 |   res <- .timeclust(data = data.tmp, algo = algo, k = k, 
 98 |                     dist = dist, dist.method = dist.method,
 99 |                     centers = centers, ...)
100 |   
101 |   if (algo == "cm") {
102 |     object@cluster <- res$cluster
103 |     object@membership <- res$membership
104 |     object@centers <- res$centers
105 |   } else {
106 |     object@cluster <- res$cluster
107 |     object@centers <- res$centers
108 |   }
109 |   if (is.matrix(x)) {
110 |     object
111 |   } else {
112 |     x@clusterRes <- object
113 |     x
114 |   }
115 | }
116 | 
117 | # perform time course clustering
118 | .timeclust <- function(data, algo, k, centers = NULL,
119 |                        dist = "distance", dist.method = "euclidean", ...) {
120 |   if (!algo %in% c("pam", "km", "hc", "cm")) {
121 |     stop("clustering method should be one of 'pam','km','hc','cm'")
122 |   }
123 |   if (!dist %in% c("distance", "correlation")) {
124 |     stop("Distance can only be one of either 'distance' or 'correlation'")
125 |   }
126 |   if (!dist.method %in% c("pearson", "kendall", "spearman", "euclidean", "maximum", 
127 |                           "manhattan", "canberra", "binary", "minkowski")) {
128 |     stop("Distance metric should either one of correlation measures in cor function or 
129 |          one of the distance measures in dist function")
130 |   }
131 |   if (algo == "km") {
132 |     if(dist.method != "euclidean"){
133 |       stop("kmeans only support euclidean metric; for other distance metrices, please see the help page")
134 |     }
135 |   }
136 |   if (algo == "cm" ) {
137 |     if(!dist.method %in% c("euclidean", "manhattan")){
138 |       stop("cmeans only support euclidean or mahattan distance metrics")
139 |     }
140 |   }
141 |   
142 |   d <- NULL
143 |   if (algo %in% c("pam", "hc")) {
144 |     if (dist == "correlation") {
145 |       d <- as.dist(1 - cor(t(data), method = dist.method))
146 |     }
147 |     if (dist == "distance") {
148 |       d <- dist(data, method = dist.method)
149 |     }
150 |   }
151 |   clustres <- list()
152 |   if (algo != "hc") {
153 |     if (!is.null(centers)) {
154 |       if (nrow(centers) != k) {
155 |         stop("Number of rows of centers must be equal to k")
156 |       }
157 |     }
158 |   }
159 |   clustres <- switch(algo, km = {
160 |     if (!is.null(centers)) {
161 |       res <- kmeans(data, centers = centers, ...)
162 |     } else {
163 |       res <- kmeans(data, centers = k, ...)
164 |     }
165 |     clustres$cluster <- res$cluster
166 |     clustres$centers <- res$centers
167 |     clustres
168 |   }, pam = {
169 |     if (!is.null(centers)) {
170 |       ind <- data[, 1] %in% centers[, 1]
171 |       ind <- which(ind)
172 |       if (length(ind) != k) {
173 |         stop("For 'pam', centers must be chosen from the data")
174 |       } else {
175 |         res <- pam(d, k = k, medoids = ind, ...)
176 |       }
177 |     }
178 |     res <- pam(d, k = k, ...)
179 |     clustres$cluster <- res$clustering
180 |     clustres$centers <- data[res$medoids, ]
181 |     clustres
182 |   }, hc = {
183 |     tree <- hclust(d, ...)
184 |     res <- cutree(tree, k = k)
185 |     clustres$cluster <- res
186 |     clustres$centers <- matrix(0, 0, 0)
187 |     clustres
188 |   }, cm = {
189 |     if (!is.null(centers)) {
190 |       res <- cmeans(data, centers = centers, ...)
191 |     } else {
192 |       res <- cmeans(data, centers = k, ...)
193 |     }
194 |     clustres$cluster <- res$cluster
195 |     clustres$centers <- res$centers
196 |     clustres$membership <- res$membership
197 |     clustres
198 |   })
199 |   clustres
200 | }
201 | 


--------------------------------------------------------------------------------
/R/timecourseTable.R:
--------------------------------------------------------------------------------
  1 | #' constructs time course table for clustering analysis
  2 | #'
  3 | #' This function constructs a time course table of which rows
  4 | #' are genomic features and columns time points.
  5 | #' values can be normalized expression levels or log2-fold
  6 | #' changes compared to a control time point. The time course
  7 | #' table is used for clustering analysis.
  8 | #'
  9 | #' @param object a \code{TCA} object returned by \code{DBanalysis}.
 10 | #'
 11 | #' @param value a character string, either "\code{expression}" or
 12 | #' "\code{FC}". "\code{expression}" is the mean normalized read
 13 | #' counts of replicates, "\code{FC}" is the log2-fold changes
 14 | #' compared to the first time point.
 15 | #' 
 16 | #' @param control.group a character string giving the time point to 
 17 | #' be compared with, i.e., the denominator in the fold changes. It 
 18 | #' should match one of the time points in the \code{design} table 
 19 | #' in the \code{TCA} object. 
 20 | #' 
 21 | #' @param lib.norm logical indicating whether or not use effective
 22 | #' library size (see "Details" in \code{\link{counts}}).
 23 | #'
 24 | #' @param norm.method a character string specifying the normalization
 25 | #' method if \code{value} is "\code{expression}"
 26 | #'
 27 | #' @param subset an optinal character vector giving a subset of
 28 | #' genomic features, if not NULL, time course table is generated
 29 | #' for only this subset of genomic features.
 30 | #'
 31 | #' @param filter logical, whether to drop the genomic features
 32 | #' shows no significant changes (defined by \code{pvalue},
 33 | #' \code{pvalue.threshold},\code{abs.fold} and \code{direction})
 34 | #' between any two time points.
 35 | #'
 36 | #' @param pvalue character string specify the type of p-values:
 37 | #' "\code{none}" is unadjusted p-value or one of adjusted p-value 
 38 | #' "\code{holm}", "\code{hochberg}", "\code{hommel}", "\code{bonferroni}", 
 39 | #' "\code{BH}", "\code{BY}", "\code{fdr}". 
 40 | #'
 41 | #' @param pvalue.threshold a numeric value giving threshold of
 42 | #' selected p-value, significant changes have lower
 43 | #' (adjusted) p-values than the threshold.
 44 | #'
 45 | #' @param abs.fold a numeric value, the least minimum log2-fold
 46 | #' changes. The returned genomic regions have changes 
 47 | #' with absolute log2-fold changes exceeding \code{abs.fold}.
 48 | #'
 49 | #' @param direction character string specify the direction of fold
 50 | #' changes. "\code{up}": positive fold changes; "\code{down}":
 51 | #' negative fold changes; "\code{both}": both positive and
 52 | #' negative fold changes.
 53 | #'
 54 | #' @param ... additional arguments passing to \code{\link{rpkm}},
 55 | #' \code{\link{cpm}}
 56 | #' @note
 57 | #' If "\code{expression}" in \code{value} is chosen, the average 
 58 | #' normalized expression values of replicates for each group will 
 59 | #' be calculated and returned.
 60 | #'
 61 | #' @return
 62 | #' A \code{TCA} object
 63 | #'
 64 | #' @author
 65 | #' Mengjun Wu
 66 | #'
 67 | #' @examples
 68 | #' data(tca_ATAC)
 69 | #' tca_ATAC <- DBanalysis(tca_ATAC)
 70 | #' tca_ATAC <- timecourseTable(tca_ATAC, value = 'expression',
 71 | #'                             lib.norm = TRUE, norm.method = 'rpkm')
 72 | #'
 73 | #' @export
 74 | #'
 75 | #'
 76 | timecourseTable <- function(object, value = "expression", control.group=NULL,
 77 |                             lib.norm = TRUE, norm.method = "rpkm", 
 78 |                             subset = NULL, filter = FALSE, pvalue = "fdr",
 79 |                             pvalue.threshold = 0.05, abs.fold = 2,
 80 |                             direction = "both", ...) {
 81 |   if (!value %in% c("expression", "FC")) {
 82 |     err <- paste0("The value of time course table should be either 
 83 |                   normalized expression table (value=\"expression\") or 
 84 |                   logarithm of fold changes (value=\"FC\")")
 85 |     stop(err)
 86 |   }
 87 |   group <- unique(object@design$timepoint)
 88 |   genointerval <- object@genomicFeature[object@genomicFeature$id %in%
 89 |                                           row.names(object@DBfit$counts), ]
 90 |   if (value == "expression") {
 91 |     count <- object@DBfit$counts
 92 |     if (lib.norm) {
 93 |       y <- DGEList(counts = count, group = object@design$timepoint)
 94 |       y <- calcNormFactors(y)
 95 |     } else {
 96 |       y <- DGEList(counts = count, group = object@design$timepoint)
 97 |     }
 98 |     if (!norm.method %in% c("rpkm", "cpm")) {
 99 |       err <- paste0("norm.method should be one of \"rpkm\" or \"cpm\".")
100 |       stop(err)
101 |     }
102 |     tc <- switch(norm.method, rpkm = {
103 |       giwidth <- genointerval$end - genointerval$start
104 |       t <- rpkm(y, normalized.lib.size = lib.norm, gene.length = giwidth, ...)
105 |       t
106 |     }, cpm = {
107 |       t <- cpm(y, normalized.lib.size = lib.norm, ...)
108 |       t
109 |     })
110 |     tc <- data.frame(tc, stringsAsFactors = FALSE)
111 |     colnames(tc) <- object@design$timepoint
112 |     tc <- as.data.frame(sapply(unique(names(tc)), function(col) rowMeans(tc[names(tc) == col])))
113 |   }
114 |   if (value == "FC") {
115 |     tc <- NULL
116 |     if(is.null(control.group)){
117 |       err <- paste0("control group needs to be specified.")
118 |       stop(err)
119 |     }
120 |     group1 <- control.group
121 |     tc <- cbind(tc, rep(0, length(genointerval[, 1])))
122 |     group2 <- group[group != group1]
123 |     t <- DBresult(object, group1 = group1, group2 = group2,
124 |                   top.sig = FALSE, result.type = "list")
125 |     t <- as(t, "list")
126 |     for (i in t) {
127 |       tc <- cbind(tc, i$logFC)
128 |     }
129 |     colnames(tc) <- group
130 |     rownames(tc) <- genointerval$id
131 |   }
132 |   tc <- as.matrix(tc)
133 | 
134 |   if (filter) {
135 |     contrasts <- colnames(object@contrasts)
136 |     if (pvalue == "PValue") {
137 |       p <- "none"
138 |       p2 <- "PValue"
139 |     } else {
140 |       p <- pvalue
141 |       p2 <- "paj"
142 |     }
143 |     DBtmpfilter <- DBresult(object, contrasts = contrasts,
144 |                             p.adjust = p, result.type = "list", 
145 |                             pvalue.threshold = pvalue.threshold, 
146 |                             abs.fold = abs.fold,
147 |                             top.sig = TRUE)
148 |     feature.filter <- c()
149 |     for (i in DBtmpfilter) {
150 |       feature.filter <- c(feature.filter, rownames(i))
151 |     }
152 |     tc <- tc[unique(feature.filter), ]
153 |   }
154 | 
155 |   if (!is.null(subset)) {
156 |     tc <- tc[row.names(tc) %in% subset, ]
157 |   }
158 | 
159 |   object@tcTable <- tc
160 |   object
161 | }
162 | 
163 | 
164 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TCseq
2 | An R package for time course sequencing data analysis
3 | 


--------------------------------------------------------------------------------
/TCseq.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------
/data/countsTable.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MengjunWu/TCseq/f2708be21fed9fa0ea3a5c2c3f72f607cb24e84c/data/countsTable.rda


--------------------------------------------------------------------------------
/data/experiment.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MengjunWu/TCseq/f2708be21fed9fa0ea3a5c2c3f72f607cb24e84c/data/experiment.rda


--------------------------------------------------------------------------------
/data/experiment_BAMfile.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MengjunWu/TCseq/f2708be21fed9fa0ea3a5c2c3f72f607cb24e84c/data/experiment_BAMfile.rda


--------------------------------------------------------------------------------
/data/genomicIntervals.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MengjunWu/TCseq/f2708be21fed9fa0ea3a5c2c3f72f607cb24e84c/data/genomicIntervals.rda


--------------------------------------------------------------------------------
/data/tca_ATAC.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MengjunWu/TCseq/f2708be21fed9fa0ea3a5c2c3f72f607cb24e84c/data/tca_ATAC.rda


--------------------------------------------------------------------------------
/man/DBanalysis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DBanalysis.R
 3 | \name{DBanalysis}
 4 | \alias{DBanalysis}
 5 | \title{Perform differential expression analysis}
 6 | \usage{
 7 | DBanalysis(
 8 |   object,
 9 |   categories = "timepoint",
10 |   norm.lib = TRUE,
11 |   filter.type = NULL,
12 |   filter.value = NULL,
13 |   samplePassfilter = 2,
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{object}{a \code{TCA} object.}
19 | 
20 | \item{categories}{character string giving which column in \code{design} 
21 | will be used for differential analysis. For time course analysis, the default
22 | column is "\code{timepoint}".}
23 | 
24 | \item{norm.lib}{logical indicating whether or not use effective
25 | library size when perform normalization. See \code{\link{counts}} for more 
26 | details.}
27 | 
28 | \item{filter.type}{character string indicating which type of count
29 | (raw or normalized) is used when performing filtering. Options are
30 | "\code{raw}", "\code{cpm}", "\code{rpkm}", "\code{NULL}". No filtering will 
31 | be performed when using "\code{NULL}'.}
32 | 
33 | \item{filter.value}{a numberic value; minimum values of selected
34 | \code{filter.type} ("\code{raw}", "\code{cpm}", "\code{rpkm}"). It is used in 
35 | combination with \code{samplePassfilter}.}
36 | 
37 | \item{samplePassfilter}{a numberic value indicating the minimum number
38 | of samples/libraries in which a genomic feature has counts value 
39 | (raw or normalized) more than \code{filter.value}. Smaller than this number, 
40 | the genomic feature will be filtered out.}
41 | 
42 | \item{...}{additional arguments passed to \code{\link{glmFit}} from
43 | \code{edgeR} package.}
44 | }
45 | \value{
46 | A \code{TCA} object
47 | }
48 | \description{
49 | This function is a wrapper for the \code{\link{glmFit}} in edgeR package.
50 | }
51 | \details{
52 | The differetial event is detected by using the generalized
53 | linear model (GLM) methods (McCarthy et al, 2012). This function
54 | fits the read counts of each genes to a negative binomial glm by
55 | using \code{\link{glmFit}} function from edgeR. To further test the
56 | significance of changes, see \code{DBresult}, \code{TopDBresult}
57 | }
58 | \examples{
59 | data(tca_ATAC)
60 | tca_ATAC <- DBanalysis(tca_ATAC)
61 | 
62 | }
63 | \references{
64 | McCarthy,D.J.,Chen, Y., & Smyth, G. K.(2012). Differential
65 | expression analysis of multifactor RNA-Seq experiments with respect to
66 | biological variation. Nucleic acids research 40, 4288-4297.
67 | }
68 | \seealso{
69 | \code{DBresult}, \code{TopDBresult}
70 | }
71 | \author{
72 | Mengjun Wu, Lei Gu
73 | }
74 | 


--------------------------------------------------------------------------------
/man/DBresult.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/DBresults.R
  3 | \name{DBresult}
  4 | \alias{DBresult}
  5 | \alias{DBresult.cluster}
  6 | \title{This function tests for differential expression}
  7 | \usage{
  8 | DBresult(
  9 |   object,
 10 |   group1 = NULL,
 11 |   group2 = NULL,
 12 |   contrasts = NULL,
 13 |   p.adjust = "fdr",
 14 |   top.sig = FALSE,
 15 |   pvalue = "paj",
 16 |   pvalue.threshold = 0.05,
 17 |   abs.fold = 2,
 18 |   direction = "both",
 19 |   result.type = "GRangesList"
 20 | )
 21 | 
 22 | DBresult.cluster(
 23 |   object,
 24 |   group1 = NULL,
 25 |   group2 = NULL,
 26 |   contrasts = NULL,
 27 |   p.adjust = "fdr",
 28 |   top.sig = FALSE,
 29 |   pvalue = "paj",
 30 |   pvalue.threshold = 0.05,
 31 |   abs.fold = 2,
 32 |   direction = "both",
 33 |   cluster,
 34 |   cmthreshold = NULL,
 35 |   result.type = "GRangesList"
 36 | )
 37 | }
 38 | \arguments{
 39 | \item{object}{a \code{TCA} object, for \code{DBresult},
 40 | \code{DBanalysis} should already be called on the object;
 41 | for \code{DBresult.cluster}, both \code{DBanalysis} and
 42 | \code{timeclust} should be already called.}
 43 | 
 44 | \item{group1}{character string giving the group to be compared with,
 45 | i.e., the denominator in the fold changes. group1 can be set NULL and 
 46 | will be ignored if the comparisons are passed to \code{contrasts}}
 47 | 
 48 | \item{group2}{a character vetor giving the other groups to 
 49 | compare with \code{group1}, i.e., the numerator in the fold changes.
 50 | group2 can be set NULL and will be ignored if the comparisons are 
 51 | passed to \code{contrasts}}
 52 | 
 53 | \item{contrasts}{a character vector, each string in
 54 | the vector gives a contrast of two groups with the format
 55 | "group2vsgroup1", group1 is the denominator level in the fold
 56 | changes and group2 is the numerator
 57 | level in the fold changes.}
 58 | 
 59 | \item{p.adjust}{character string specifying a correction method
 60 | for p-values. Options are "\code{holm}", "\code{hochberg}", 
 61 | "\code{hommel}", "\code{bonferroni}", "\code{BH}", "\code{BY}", 
 62 | "\code{fdr}", and "\code{none}".}
 63 | 
 64 | \item{top.sig}{logical if TRUE, only genomic regions with
 65 | given log2-fold changes and significance levels (p-value) 
 66 | will be returned. Log2-fold changes are defined by \code{abs.fold}
 67 | and \code{direction}; significance levels are defined by \code{pvalue} 
 68 | and \code{pvalue.threshold}}
 69 | 
 70 | \item{pvalue}{character string specify the type of p-values
 71 | used for defining the significance level(\code{PValue}
 72 | or adjusted p-value \code{paj})}
 73 | 
 74 | \item{pvalue.threshold}{a numeric value giving threshold of
 75 | selected p-value, Significant changes have lower
 76 | (adjusted) p-values than the threshold.}
 77 | 
 78 | \item{abs.fold}{a numeric value, the minimum absolute log2-fold
 79 | changes. The returned genomic regions have changes 
 80 | with absolute log2-fold changes exceeding \code{abs.fold}.}
 81 | 
 82 | \item{direction}{character string specify the direction of fold
 83 | changes. "\code{up}": positive fold changes; "\code{down}":
 84 | negative fold changes; "\code{both}": both positive and
 85 | negative fold changes.}
 86 | 
 87 | \item{result.type}{character string giving the data type of return
 88 | value. Options are "GRangesList" and "list".}
 89 | 
 90 | \item{cluster}{an integer giving the number of cluster from which 
 91 | genomic features are extracted.}
 92 | 
 93 | \item{cmthreshold}{a numeric value, this argument is applicable
 94 | only if \code{cmeans}' clustering method is selected when calling
 95 | \code{\link{timeclust}} function. if not NULL, the result table of
 96 | genomic features that belong to the defined \code{cluster} and
 97 | the membership values to this cluster exceed \code{cmthreshold}
 98 | are extracted.}
 99 | }
100 | \value{
101 | A list or a GRangesList.
102 | If \code{result.type} is "GRangesList", a GRangesList is returned containing
103 | the differential analysis results for all provided contrasts. Each GRanges 
104 | object of the list is one contrast, the analysis results are contained in 4 
105 | metadata columns:
106 | 
107 | \code{logFC} log2-fold changes between two groups.
108 | 
109 | \code{PValue} p-values.
110 | 
111 | \code{paj} adjusted p-values
112 | 
113 | \code{id} name of genomic features 
114 | 
115 | If \code{result.type} is "list", a list of data frames is returned.
116 | Each data frame contains one contrast with the following columns:
117 | 
118 | \code{logFC} log2-fold changes between two groups.
119 | 
120 | \code{PValue} p-values.
121 | 
122 | \code{paj} adjusted p-values
123 | 
124 | \code{chr}  name of chromosomes
125 | 
126 | \code{start} starting positions of features in the 
127 | chromosomes
128 | 
129 | \code{end} ending postitions of features in the chromosomes
130 | 
131 | \code{id} name of genomic features
132 | }
133 | \description{
134 | This function is a wrapper for \code{\link{glmLRT}} in edgeR package. 
135 | It performs likelihood ratio tests for given coefficinets contrasts 
136 | after fitting read counts to a negative binomial glm by
137 | \code{\link{DBanalysis}}. \code{DBresult} also extracts the
138 | diffential analysis results of given contrasts at a chosen significance level. 
139 | \code{DBresult.cluster} returns similar results but only 
140 | contain genomic features belong to a given cluster.
141 | }
142 | \details{
143 | This function uses \code{\link{glmLRT}} from edgeR which
144 | perform likelihood ratio tests for the significance of changes.
145 | For more deatils,
146 | see \code{\link{glmLRT}}
147 | }
148 | \note{
149 | If not NULL \code{group1}, \code{group2} and \code{contrasts},
150 | result tables are extracted from comparisons in \code{constrasts}.
151 | }
152 | \examples{
153 | data(tca_ATAC)
154 | tca_ATAC <- DBanalysis(tca_ATAC)
155 | ### extract differntial analysis of 24h, 72h to 0h
156 | # set the contrasts using the 'group1' and 'group2' paramters
157 | res1 <- DBresult(tca_ATAC, group1 = '0h', group2 = c('24h', '72h'))
158 | # one can get the same result by setting the contrasts using hte 'contrasts' parameter
159 | res2 <- DBresult(tca_ATAC, contrasts = c('24hvs0h', '72hvs0h'))
160 | # extract significant diffential events
161 | res.sig <- DBresult(tca_ATAC, contrasts = c('24hvs0h', '72hvs0h'),
162 |                    top.sig = TRUE)
163 | 
164 | # extract differntial analysis of 24h, 72h to 0h of a given cluster
165 | tca_ATAC <- timecourseTable(tca_ATAC, filter = TRUE)
166 | tca_ATAC <- timeclust(tca_ATAC, algo = 'cm', k = 6)
167 | res_cluster1 <- DBresult.cluster(tca_ATAC, group1 = '0h',
168 |                                  group2 = c('24h', '72h'),
169 |                                  cluster = 1)
170 | 
171 | 
172 | 
173 | }
174 | \seealso{
175 | \code{\link{glmLRT}}
176 | }
177 | \author{
178 | Mengjun Wu, Lei Gu
179 | }
180 | 


--------------------------------------------------------------------------------
/man/TCA.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/Classes.R
  3 | \docType{class}
  4 | \name{TCA-class}
  5 | \alias{TCA-class}
  6 | \alias{TCA}
  7 | \alias{TCAFromSummarizedExperiment}
  8 | \title{TCA class and constructor}
  9 | \usage{
 10 | TCA(design, counts = matrix(0L, 0L, 0L), genomicFeature, zero.based = TRUE)
 11 | 
 12 | TCAFromSummarizedExperiment(se, genomicFeature = NULL)
 13 | }
 14 | \arguments{
 15 | \item{design}{a data frame containing information of
 16 | samples/libraries. For time course analysis, design table should 
 17 | contain at least three columns (case insensitive): \code{sampleid},
 18 | \code{timepoint} and \code{group} providing time point and group
 19 | information of each sample/library. If \code{counts} is not provided
 20 | when creating \code{TCA} object, an optional column \code{bamfile} can 
 21 | be used to provide BAM filename of each sample/library and generate 
 22 | count table using \code{\link{countReads}} function later.}
 23 | 
 24 | \item{counts}{an integer matrix containing read counts. Rows
 25 | correspond to genomic features and columns to samples/libraries. 
 26 | The name of column s should be the same as the time points 
 27 | in \code{design}.}
 28 | 
 29 | \item{genomicFeature}{a data frame or a GRanges object containing
 30 | genomic coordinates of features of interest (e.g. genes in RNA-seq,
 31 | binding regions in ChIP-seq). If genomicFeature is a data frame,
 32 | four columns are required in \code{genomicFeature}: \code{id},
 33 | \code{chr}, \code{start}, \code{end}; if genomicFeature is a Granges
 34 | object, the metadata column "\code{id}" is required. For
 35 | \code{TCAFromSummarizedExperiment}, genomicFeature must be
 36 | provided if \code{se} is a SummarizedExperiment object.}
 37 | 
 38 | \item{zero.based}{Logical. If TRUE, the start positions of the
 39 | genomic ranges in the returned \code{TCA} object are \emph{0-based},
 40 | if FALSE, the start positions will be \emph{1-based}.}
 41 | 
 42 | \item{se}{A SummarizedExperiment or a RangedSummarizedExperiment
 43 | object. The object might contain multiple assays in the assay list, 
 44 | only the first one will be taken to construct TCA object. 
 45 | For SummarizedExperiment object, \code{genomicFeature}
 46 | must be provided while for RangedSummarizedExperiment object,
 47 | the genomic features will be extracted directly from the object.}
 48 | }
 49 | \value{
 50 | A TCA object
 51 | }
 52 | \description{
 53 | \code{TCA} is a S4 class for storing input data, results of
 54 | differential analysis and clustering analysis. A \code{TCA} object
 55 | can be created by the constructor function taking a table of sample
 56 | information, a table of the genomic coordinates of features, and read
 57 | count table (optional).
 58 | }
 59 | \details{
 60 | A TCA object can be created without providing read counts,
 61 | read counts can be provided by \code{\link{counts}} or generated by
 62 | \code{\link{countReads}}. For the read counts, the number of rows 
 63 | should equal to that in '\code{genomicFeature} and the number of columns 
 64 | should equal to number of rows in \code{design}; in addition, the name 
 65 | of column names should be the same as the time points in \code{design}. 
 66 | Input data and analysis results in a TCA object can be accessed by using 
 67 | corresponding accessors and functions.
 68 | The TCA objects also have a show method printing a compact summary of
 69 | their contents see \code{\link{counts}}, \code{\link{TCA.accessors}},
 70 | \code{\link{DBresult}}, \code{\link{tcTable}}, \code{\link{timeclust}}.
 71 | \code{clust}
 72 | }
 73 | \examples{
 74 | #create data frame of experiment design: 4 time points and 2 replicates for each time point.
 75 | d <- data.frame(sampleID = 1:8, group = rep(c(1, 2, 3, 4), 2),
 76 |                timepoint = rep(c('0h', '24h', '48h', '72h'), 2))
 77 | 
 78 | 
 79 | #create data frame of genomic intervals of interest
 80 | gf <- data.frame(chr = c(rep('chr1', 3), rep('chr2', 2), rep('chr4', 2)),
 81 |                 start = seq(100, 2000, by = 300),
 82 |                 end = seq(100, 2000, by = 300) + 150,
 83 |                 id = paste0('peak', 1:7))
 84 | tca <- TCA(design = d, genomicFeature = gf)
 85 | genomicFeature(tca)
 86 | 
 87 | #if count table is available
 88 | c <- matrix(sample(1000, 56), nrow = 7, dimnames = list(paste0('peak', 1:7), 1:8))
 89 | tca <- TCA(design = d, counts = c, genomicFeature = gf)
 90 | # replace the count table of a \code{TCA} object
 91 | c2 <- matrix(sample(500, 56), nrow = 7, dimnames = list(paste0('peak', 1:7), 1:8))
 92 | counts(tca) <- c2
 93 | 
 94 | 
 95 | }
 96 | \seealso{
 97 | \code{\link{counts}}, \code{\link{TCA.accessors}},
 98 | \code{\link{DBresult}}, \code{\link{timeclust}}, \code{\link{clust}}
 99 | }
100 | \author{
101 | Mengjun Wu
102 | }
103 | 


--------------------------------------------------------------------------------
/man/TCA.accessors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/GenericFunctions.R
 3 | \name{TCA.accessors}
 4 | \alias{TCA.accessors}
 5 | \alias{design}
 6 | \alias{design,TCA-method}
 7 | \alias{genomicFeature,TCA-method}
 8 | \alias{tcTable,TCA-method}
 9 | \alias{clustResults,TCA-method}
10 | \alias{genomicFeature}
11 | \alias{tcTable}
12 | \alias{clustResults}
13 | \title{Accessors to extract slots of a TCA class.}
14 | \usage{
15 | \S4method{design}{TCA}(object)
16 | 
17 | genomicFeature(object)
18 | 
19 | tcTable(object)
20 | 
21 | \S4method{tcTable}{TCA}(object)
22 | 
23 | clustResults(object)
24 | 
25 | \S4method{clustResults}{TCA}(object)
26 | }
27 | \arguments{
28 | \item{object}{\code{TCA} object object}
29 | }
30 | \value{
31 | \code{design} returns a data frame. \code{genomicFeature} returns a data frame.
32 | \code{tcTable} returns a numeric matrix. \code{clustResults} returns a
33 | \code{clust} object, see \code{\link{clust}} for details.
34 | }
35 | \description{
36 | Accessors are provided to extract \code{design}, \code{genomicFeature},
37 | \code{tcTable}, \code{clustResults} slots of a TCA class. The \code{design}
38 | slot stores experimental information of samples/libraries, the
39 | \code{genomicFeature} slot stores genomic coordinates of features, the
40 | \code{tcTable} slot stores time couse data as a matrix, where rows are
41 | genomic features and columns time points. The \code{clustResults} slot
42 | stores results of clustering analysis as a \code{clust} object.
43 | }
44 | \examples{
45 | data(tca_ATAC)
46 | genomicFeature(tca_ATAC)
47 | tcTable(tca_ATAC)
48 | }
49 | \seealso{
50 | \code{\link{clust}}
51 | }
52 | \author{
53 | Mengjun Wu
54 | }
55 | 


--------------------------------------------------------------------------------
/man/clust-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Classes.R
 3 | \docType{class}
 4 | \name{clust-class}
 5 | \alias{clust-class}
 6 | \alias{clust}
 7 | \title{clust class}
 8 | \description{
 9 | \code{clust} is a S4 class for storing results of the clustering
10 | analysis of time course data.
11 | }
12 | \details{
13 | The clust objects are returned from \code{\link{timeclust}} and have
14 | a show method printing a compact summary of their contents
15 | }
16 | \section{Slots}{
17 | 
18 | Object of \code{clust} class contains the following slots:
19 | \describe{
20 |  \item{\code{method}}{clustering method used}
21 |  \item{\code{dist}}{distance metric used}
22 |  \item{\code{data}}{a matrix of original or standardized data used
23 |  in the analysis}
24 |  \item{\code{centers}}{a matrix of cluster centers}
25 |  \item{\code{cluster}}{an integer vector of length \eqn{n} (the 
26 |  integers are the indices of clusters the data points belong to. 
27 |  For the fuzzy cmeans clustering method, a data point is assigned 
28 |  to the closest cluster to which the data point has highest 
29 |  membership value.}
30 |  \item{\code{membership}}{a matrix of membership values of the
31 |  data points to each clusters}
32 | }
33 | }
34 | 
35 | \seealso{
36 | \code{\link{timeclust}}, \code{\link{@}}
37 | }
38 | \author{
39 | Mengjun Wu
40 | }
41 | 


--------------------------------------------------------------------------------
/man/clust.accessors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/GenericFunctions.R
 3 | \name{clust.accessors}
 4 | \alias{clust.accessors}
 5 | \alias{clustData}
 6 | \alias{clustData,clust-method}
 7 | \alias{clustCenters,clust-method}
 8 | \alias{clustCluster,clust-method}
 9 | \alias{clustMembership,clust-method}
10 | \alias{clustCenters}
11 | \alias{clustCluster}
12 | \alias{clustMembership}
13 | \title{Accessors to extract slots of a clust class.}
14 | \usage{
15 | clustData(object)
16 | 
17 | \S4method{clustData}{clust}(object)
18 | 
19 | clustCenters(object)
20 | 
21 | \S4method{clustCenters}{clust}(object)
22 | 
23 | clustCluster(object)
24 | 
25 | \S4method{clustCluster}{clust}(object)
26 | 
27 | clustMembership(object)
28 | 
29 | \S4method{clustMembership}{clust}(object)
30 | }
31 | \arguments{
32 | \item{object}{\code{clust} object object}
33 | }
34 | \value{
35 | \code{clustData} returns a data matrix. \code{clustCenters} returns a matrix of
36 | centers. \code{clustCluster} returns an integer vector. \code{clustMembership}
37 | returns a matrix of membership, see \code{\link{clust}} for details.
38 | }
39 | \description{
40 | Accessors are provided to extract \code{data}, \code{centers}, \code{cluster}, 
41 | and \code{membership} slots stored in a clust class.
42 | }
43 | \seealso{
44 | \code{\link{clust}}
45 | }
46 | \author{
47 | Mengjun Wu
48 | }
49 | 


--------------------------------------------------------------------------------
/man/countReads.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/countReads.R
 3 | \name{countReads}
 4 | \alias{countReads}
 5 | \title{count mapped reads overlap genomic intervals}
 6 | \usage{
 7 | countReads(object, dir, method = "summarizeoverlaps", zero.based = TRUE, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a \code{TCA} object.}
11 | 
12 | \item{dir}{character string giving the directory of BAM files.}
13 | 
14 | \item{method}{character string giving the counting method. Options
15 | are "\code{summarizeOverlaps}" and "\code{featureCounts}". For
16 | Windows system, only "\code{summarizeOverlaps}" can be used, For
17 | Linux system, both methods can be used.}
18 | 
19 | \item{zero.based}{Logical. If TRUE, the start positions of the
20 | genomic intervals are \emph{0-based}, if FALSE, the start positions
21 | will be \emph{1-based}.}
22 | 
23 | \item{...}{additional arguments passed to
24 | \code{\link{summarizeOverlaps}} in GenomicAlignments package 
25 | or \code{\link{featureCounts}} in Rsubread package.}
26 | }
27 | \value{
28 | A TCA object with updated \code{count} slot.
29 | }
30 | \description{
31 | This function counts mapped reads from multiple BAM files 
32 | overlapping genomic intervals in \code{genomicFeature} in a 
33 | \code{TCA} object. The resulted count table is stored in 
34 | \code{count} slot of the \code{TCA} object.
35 | }
36 | \details{
37 | This function provides two options to count the mapped reads: 
38 | "\code{summarizeOverlaps}" in the GenomicAlignments package and 
39 | "\code{featureCounts}" in the Rsubread package. As Rsubread package 
40 | is only avaible for linux systems, Windows users can only choose
41 | "\code{summarizeOverlaps}". The user could further customize the 
42 | counting paramters by passing additional arguments (...), otherwise 
43 | the default settings of the two methods will be used. For details 
44 | of the counting parameters, see \code{\link{summarizeOverlaps}}, 
45 | \code{\link{featureCounts}}.
46 | }
47 | \seealso{
48 | \code{\link{summarizeOverlaps}}, \code{\link{featureCounts}}
49 | }
50 | \author{
51 | Mengjun Wu
52 | }
53 | 


--------------------------------------------------------------------------------
/man/counts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/GenericFunctions.R
 3 | \name{counts}
 4 | \alias{counts}
 5 | \alias{counts,TCA-method}
 6 | \alias{counts<-,TCA-method}
 7 | \title{Extracts counts of a TCA object.}
 8 | \usage{
 9 | \S4method{counts}{TCA}(object, normalization = "none", lib.norm = TRUE, log = FALSE, ...)
10 | 
11 | \S4method{counts}{TCA}(object) <- value
12 | }
13 | \arguments{
14 | \item{object}{a \code{TCA} object.}
15 | 
16 | \item{normalization}{character string giving the normalization method.
17 | Options are "\code{none}" (original raw counts), "\code{cpm}" (counts
18 | per million),
19 | "\code{rpkm}" (reads per kilobase per million).}
20 | 
21 | \item{lib.norm}{logical indicating whether or not use effective library
22 | size (see Details below) when \code{normalization} is "\code{cpm}" or
23 | "\code{rpkm}".}
24 | 
25 | \item{log}{logical if \code{TRUE}, the returned value will be on a log2
26 | scale.}
27 | 
28 | \item{...}{additional arguments passed to \code{\link{cpm}} or
29 | \code{\link{rpkm}} in the edgeR package.}
30 | 
31 | \item{value}{an integer matrix.}
32 | }
33 | \value{
34 | An integer matrix
35 | }
36 | \description{
37 | \code{counts} extract raw read counts  stored in a \code{TCA} object 
38 | or compute normalized counts from the raw counts.
39 | }
40 | \details{
41 | when calculating normalized counts, library size can be rescaled
42 | to minimize the log-fold changes between samples for most genomic features
43 | (e.g. genes, binding sites) by multiplying a scale factor. The rescaled
44 | library size is called effective library size. In this function, the scale
45 | factor is calculated using the weighted trimmed mean of M-values (TMM,
46 | Robinson et al (2010))
47 | 
48 | If log2 values are computed, a small count would be added to avoid logarithm 
49 | of zero. The actual added count will be scaled according to the library size,
50 | for details see \code{\link{addPriorCount}} in the edgeR package
51 | when not specified, the prior count is set to 0.25 by default.
52 | }
53 | \examples{
54 | data(tca_ATAC)
55 | c <- counts(tca_ATAC)
56 | # normalized counts table
57 | c_norm <- counts(tca_ATAC, normalization='rpkm')
58 | }
59 | \references{
60 | Robinson, M. D., & Oshlack, A. (2010). A scaling normalization method for
61 | differential expression analysis of RNA-seq data. Genome biology, 11(3), 1.
62 | }
63 | \author{
64 | Mengjun Wu
65 | }
66 | 


--------------------------------------------------------------------------------
/man/countsTable.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{countsTable}
 5 | \alias{countsTable}
 6 | \title{An example read Counts table}
 7 | \format{
 8 | A data frame containing experiment design information
 9 | for 12 samples/libraries.
10 | }
11 | \usage{
12 | data(countsTable)
13 | }
14 | \value{
15 | A data frame
16 | }
17 | \description{
18 | A dataset of exemplary read counts
19 | }
20 | \examples{
21 | data(countsTable)
22 | }
23 | \keyword{datasets}
24 | 


--------------------------------------------------------------------------------
/man/experiment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{experiment}
 5 | \alias{experiment}
 6 | \title{An example experiment design without BAM file infomration}
 7 | \format{
 8 | A data frame containing experiment design information
 9 | for 12 samples/libraries.
10 | }
11 | \usage{
12 | data(experiment)
13 | }
14 | \value{
15 | A data frame
16 | }
17 | \description{
18 | A dataset of exemplary experiment design without BAM file
19 | infomration
20 | }
21 | \examples{
22 | data(experiment)
23 | }
24 | \keyword{datasets}
25 | 


--------------------------------------------------------------------------------
/man/experiment_BAMfile.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{experiment_BAMfile}
 5 | \alias{experiment_BAMfile}
 6 | \title{An example experiment design with BAM file infomration}
 7 | \format{
 8 | A data frame containing experiment design information
 9 | for 12 samples/libraries.
10 | }
11 | \usage{
12 | data(experiment_BAMfile)
13 | }
14 | \value{
15 | A data frame
16 | }
17 | \description{
18 | A dataset of exemplary experiment design with BAM file
19 | infomration
20 | }
21 | \examples{
22 | data(experiment_BAMfile)
23 | }
24 | \keyword{datasets}
25 | 


--------------------------------------------------------------------------------
/man/genomicIntervals.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{genomicIntervals}
 5 | \alias{genomicIntervals}
 6 | \title{An example reference genomic regions}
 7 | \format{
 8 | A data frame containing 2751 genomic regions.
 9 | }
10 | \usage{
11 | data(genomicIntervals)
12 | }
13 | \value{
14 | A data frame
15 | }
16 | \description{
17 | A dataset of exemplary genomic regions
18 | }
19 | \examples{
20 | data(genomicIntervals)
21 | }
22 | \keyword{datasets}
23 | 


--------------------------------------------------------------------------------
/man/peakreference.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/peakreference.R
 3 | \name{peakreference}
 4 | \alias{peakreference}
 5 | \title{combine and merge multiple BED files}
 6 | \usage{
 7 | peakreference(
 8 |   data = NULL,
 9 |   dir = NULL,
10 |   pattern = NULL,
11 |   merge = TRUE,
12 |   overlap = 1,
13 |   ratio = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{data}{a data frame containg coordinates information of peaks
18 | to be merged. Columns of the data frame should be consistent with
19 | the BED format where the first column contains chromosome information,
20 | the second column the starting position, and the third column 
21 | the ending position.}
22 | 
23 | \item{dir}{a character string giving the directory where BED files
24 | are stored. If \code{data} is not given, the function will reads
25 | in the BED files under \code{code}.}
26 | 
27 | \item{pattern}{an \code{\link{regular expression}}, only files that
28 | have names match the regular expression will be read in.}
29 | 
30 | \item{merge}{logical indicating whether to merge overlapped regions
31 | or not. If False, regions are simply combined.}
32 | 
33 | \item{overlap}{a numberic value giving the least number of base(s)
34 | two regions should overlap when merging them.}
35 | 
36 | \item{ratio}{a numberic value giving the thresold of overlapping
37 | ratio between two regions to merge them. See '\code{Details}' below
38 | for the definition of the overlapping ratio.}
39 | }
40 | \value{
41 | a data frame with four columns: \code{chr}, \code{start},
42 | \code{stop}, \code{id}
43 | }
44 | \description{
45 | This function merges overlapping genomic regions into a single feature. 
46 | The merged single feature represents the widest genomic interval 
47 | that covers all overlapping regions.
48 | }
49 | \details{
50 | The overlapping ratio (OR) is defined as:
51 | 
52 | \deqn{ OR = \frac{n}{\min(length(a), length(b)}}
53 | 
54 | \eqn{a}, \eqn{b} are two genomic regions, \eqn{n} is the number of
55 | overlapping bases between region \eqn{a} and region \eqn{b}.
56 | }
57 | \examples{
58 | peaks <- data.frame(chr = c(rep('chr1',4),rep('chr2', 3), rep('chr3',2)),
59 |                     start = c(100,148,230,300,330,480,1000,700,801),
60 |                     end = c(150,220,500,450,600,900,1050,760,900))
61 | 
62 | merged_peaks <- peakreference(data = peaks, merge = TRUE, overlap = 1)
63 | 
64 | }
65 | \author{
66 | Mengjun Wu, Lei Gu
67 | }
68 | 


--------------------------------------------------------------------------------
/man/tca_ATAC.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{tca_ATAC}
 5 | \alias{tca_ATAC}
 6 | \title{An example TCA object}
 7 | \format{
 8 | A TCA object of exemplary ATAC-seq time course data
 9 | }
10 | \usage{
11 | data(tca_ATAC)
12 | }
13 | \value{
14 | A TCA object
15 | }
16 | \description{
17 | A TCA object storing exemplary ATAC-seq time course data,
18 | including the experiment design, read counts, reference
19 | genomic regions.
20 | }
21 | \examples{
22 | data(tca_ATAC)
23 | }
24 | \keyword{datasets}
25 | 


--------------------------------------------------------------------------------
/man/timeclust.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/timeclust.R
 3 | \name{timeclust}
 4 | \alias{timeclust}
 5 | \title{time couse data clustering}
 6 | \usage{
 7 | timeclust(
 8 |   x,
 9 |   algo,
10 |   k,
11 |   dist = "distance",
12 |   dist.method = "euclidean",
13 |   centers = NULL,
14 |   standardize = TRUE,
15 |   ...
16 | )
17 | }
18 | \arguments{
19 | \item{x}{a \code{TCA} object returned from
20 | \code{\link{timecourseTable}} or a matrix}
21 | 
22 | \item{algo}{a character string giving a clustering method. Options
23 | are "\code{km}" (kmeans), "\code{pam}" (partitioning around medoids),
24 | "\code{hc}" (hierachical clustering), "\code{cm}" (cmeans).}
25 | 
26 | \item{k}{a numeric value between \eqn{1} and \eqn{n - 1} (\eqn{n}
27 | is the number of data points to be clustered).}
28 | 
29 | \item{dist}{a character string specifying either "\code{distance}" or 
30 | "\code{correlation}" will be used to measure the distance between data points.}
31 | 
32 | \item{dist.method}{a character string. It can be chosen from one of 
33 | the correlation methods in \code{\link{cor}} function ("\code{pearson}", 
34 | "\code{spearman}", "\code{kendall}") if \code{dist} is "\code{correlation}", 
35 | or one of the distance measure methods in \code{\link{dist}} function 
36 | (for example, "\code{euclidean}", "\code{manhattan}") if \code{dist} is 
37 | "\code{distance}".}
38 | 
39 | \item{centers}{a numeric matrix giving intial centers for kmeams,
40 | pam or cmeans. If given, number of rows of the matrix must be equal
41 | to k.}
42 | 
43 | \item{standardize}{logical, if TRUE, z-score transformation will
44 | performed on the data before clustering. See 'Details' below.}
45 | 
46 | \item{...}{additional arguments passing to \code{\link{kmeans}},
47 | \code{\link{pam}}, \code{\link{hclust}}, \code{\link{cmeans}}}
48 | }
49 | \value{
50 | If x is a \code{TCA} object, a \code{TCA} object will be returned.
51 | If x is a matrix, a \code{clust} object will be returned
52 | }
53 | \description{
54 | This function performs clustering analysis of the time course data.
55 | }
56 | \details{
57 | two types of clustering methods are provided: hard clustering
58 | (\code{\link{kmeans}}, \code{\link{pam}}, \code{\link{hclust}})
59 | and soft clustering(\code{\link{cmeans}}). In hard clustering,
60 | a data point can only be allocated to exactly one cluster
61 | (for \code{\link{hclust}}, \code{\link{cutree}} is used to cut
62 | a tree into clusters), while in soft clustering (also known as
63 | fuzzy clustering), a data point can be assigned to multiple
64 | clusters, membership values are used to indicate to what
65 | degree a data point belongs to each cluster.
66 | 
67 | To better capture the differences of temporal patterns rather 
68 | than expression levels, z-score transformation can be applied 
69 | to covert the the expression values to z-scores by performing 
70 | the following formula:
71 | 
72 | \deqn{z = \frac{x - \mu}{\sigma}}
73 | 
74 | \eqn{x} is the value to be converted (e.g., expression value of a
75 | genomic feature in one condition), \eqn{\mu} is the population
76 | mean (e.g., average expression value of a genomic feature across
77 | different conditions), \eqn{\sigma} is the standard deviation
78 | (e.g., standard deviation of the expression values of a genomic 
79 | feature across different conditions).
80 | }
81 | \examples{
82 | 
83 | example.mat <- matrix(rnorm(1600,sd=0.3), nrow = 200,
84 |             dimnames = list(paste0('peak', 1:200), 1:8))
85 | clust_res <- timeclust(x = example.mat, algo = 'cm', k = 4) 
86 | # return a clust object
87 | 
88 | }
89 | \seealso{
90 | \code{\link{clust}}, \code{\link{kmeans}},
91 | \code{\link{pam}}, \code{\link{hclust}}, \code{\link{cutree}}
92 | }
93 | \author{
94 | Mengjun Wu
95 | }
96 | 


--------------------------------------------------------------------------------
/man/timeclustplot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plots.R
 3 | \name{timeclustplot}
 4 | \alias{timeclustplot}
 5 | \title{Plot clustering results for time course data.}
 6 | \usage{
 7 | timeclustplot(
 8 |   object = NULL,
 9 |   categories = "timepoint",
10 |   value = "expression",
11 |   cols = NULL,
12 |   cl.color = "gray50",
13 |   membership.color = rainbow(30, s = 3/4, v = 1, start = 1/6),
14 |   title.size = 18,
15 |   axis.line.size = 0.6,
16 |   axis.title.size = 18,
17 |   axis.text.size = 16,
18 |   legend.title.size = 14,
19 |   legend.text.size = 14
20 | )
21 | }
22 | \arguments{
23 | \item{object}{a \code{TCA} object or a \code{clust} object}
24 | 
25 | \item{categories}{character string giving the x-axis label}
26 | 
27 | \item{value}{character string giving the y-axis label}
28 | 
29 | \item{cols}{integer value specifying number of columns in the final
30 | layout.}
31 | 
32 | \item{cl.color}{character string specifying a color for hard
33 | clustering.}
34 | 
35 | \item{membership.color}{color palettes, a character vector of
36 | n colors}
37 | 
38 | \item{title.size}{numeric value specifying the font size of title
39 | of each
40 | plot in the layout}
41 | 
42 | \item{axis.line.size}{numeric value specifying the size of both
43 | axis lines}
44 | 
45 | \item{axis.title.size}{numeric value specifying the font size of
46 | titles of both axis}
47 | 
48 | \item{axis.text.size}{numeric value specifying the font size of
49 | labels of both axis}
50 | 
51 | \item{legend.title.size}{numeric value specifying the font size
52 | of legend title}
53 | 
54 | \item{legend.text.size}{numeric value specifying the font size of
55 | legend text}
56 | }
57 | \value{
58 | Plot all clusters in one plot and return a list of ggplot objects,
59 | each object is for one cluster. The ggplot object can be drawed by
60 | calling \code{\link{print.ggplot}}
61 | }
62 | \description{
63 | This function plots the clusters generated from
64 | \code{\link{timeclust}}. For fuzzy cmeans clustering, data points
65 | are color-coded according to membership values, the color palettes
66 | can be customized.
67 | }
68 | \examples{
69 | x <- matrix(sample(500, 1600, replace = TRUE), nrow = 200,
70 |             dimnames = list(paste0('peak', 1:200), 1:8))
71 | clust_res <- timeclust(x, algo = 'cm', k = 4, standardize = TRUE)
72 | p <- timeclustplot(clust_res, cols =2)
73 | # to plot a individual cluster
74 | print (p[[2]]) # plot cluster 2
75 | print (p[[3]]) # plot cluster 3
76 | 
77 | }
78 | \author{
79 | Mengjun Wu
80 | }
81 | 


--------------------------------------------------------------------------------
/man/timecourseTable.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/timecourseTable.R
 3 | \name{timecourseTable}
 4 | \alias{timecourseTable}
 5 | \title{constructs time course table for clustering analysis}
 6 | \usage{
 7 | timecourseTable(
 8 |   object,
 9 |   value = "expression",
10 |   control.group = NULL,
11 |   lib.norm = TRUE,
12 |   norm.method = "rpkm",
13 |   subset = NULL,
14 |   filter = FALSE,
15 |   pvalue = "fdr",
16 |   pvalue.threshold = 0.05,
17 |   abs.fold = 2,
18 |   direction = "both",
19 |   ...
20 | )
21 | }
22 | \arguments{
23 | \item{object}{a \code{TCA} object returned by \code{DBanalysis}.}
24 | 
25 | \item{value}{a character string, either "\code{expression}" or
26 | "\code{FC}". "\code{expression}" is the mean normalized read
27 | counts of replicates, "\code{FC}" is the log2-fold changes
28 | compared to the first time point.}
29 | 
30 | \item{control.group}{a character string giving the time point to 
31 | be compared with, i.e., the denominator in the fold changes. It 
32 | should match one of the time points in the \code{design} table 
33 | in the \code{TCA} object.}
34 | 
35 | \item{lib.norm}{logical indicating whether or not use effective
36 | library size (see "Details" in \code{\link{counts}}).}
37 | 
38 | \item{norm.method}{a character string specifying the normalization
39 | method if \code{value} is "\code{expression}"}
40 | 
41 | \item{subset}{an optinal character vector giving a subset of
42 | genomic features, if not NULL, time course table is generated
43 | for only this subset of genomic features.}
44 | 
45 | \item{filter}{logical, whether to drop the genomic features
46 | shows no significant changes (defined by \code{pvalue},
47 | \code{pvalue.threshold},\code{abs.fold} and \code{direction})
48 | between any two time points.}
49 | 
50 | \item{pvalue}{character string specify the type of p-values:
51 | "\code{none}" is unadjusted p-value or one of adjusted p-value 
52 | "\code{holm}", "\code{hochberg}", "\code{hommel}", "\code{bonferroni}", 
53 | "\code{BH}", "\code{BY}", "\code{fdr}".}
54 | 
55 | \item{pvalue.threshold}{a numeric value giving threshold of
56 | selected p-value, significant changes have lower
57 | (adjusted) p-values than the threshold.}
58 | 
59 | \item{abs.fold}{a numeric value, the least minimum log2-fold
60 | changes. The returned genomic regions have changes 
61 | with absolute log2-fold changes exceeding \code{abs.fold}.}
62 | 
63 | \item{direction}{character string specify the direction of fold
64 | changes. "\code{up}": positive fold changes; "\code{down}":
65 | negative fold changes; "\code{both}": both positive and
66 | negative fold changes.}
67 | 
68 | \item{...}{additional arguments passing to \code{\link{rpkm}},
69 | \code{\link{cpm}}}
70 | }
71 | \value{
72 | A \code{TCA} object
73 | }
74 | \description{
75 | This function constructs a time course table of which rows
76 | are genomic features and columns time points.
77 | values can be normalized expression levels or log2-fold
78 | changes compared to a control time point. The time course
79 | table is used for clustering analysis.
80 | }
81 | \note{
82 | If "\code{expression}" in \code{value} is chosen, the average 
83 | normalized expression values of replicates for each group will 
84 | be calculated and returned.
85 | }
86 | \examples{
87 | data(tca_ATAC)
88 | tca_ATAC <- DBanalysis(tca_ATAC)
89 | tca_ATAC <- timecourseTable(tca_ATAC, value = 'expression',
90 |                             lib.norm = TRUE, norm.method = 'rpkm')
91 | 
92 | }
93 | \author{
94 | Mengjun Wu
95 | }
96 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(TCseq)
3 | 
4 | test_check("TCseq")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test_TCseq.R:
--------------------------------------------------------------------------------
 1 | ## test the TCA object constructor
 2 | d1<- data.frame(timepoint = rep(c("0h", "24h", "48h", "72h"), 2), group = rep(c(1, 2, 3, 4), 2))
 3 | 
 4 | d3 <- data.frame(sampleid = 1:8, timepoint = rep(c("0h", "24h", "48h", "72h"), 2),
 5 |                  group = rep(c(1, 2, 3, 4), 2))
 6 | 
 7 | gf1 <- data.frame(chr = c(rep("chr1", 3), rep("chr2", 2), rep("chr4", 2)),
 8 |                   start = seq(100, 2000, by = 300), end = seq(100, 2000, by = 300) + 150)
 9 | 
10 | gf2 <- data.frame(CHR = c(rep("chr1", 3), rep("chr2", 2), rep("chr4", 2)),
11 |                   start = seq(100, 2000, by = 300), end = seq(100, 2000, by = 300) + 150,
12 |                   id = paste0("peak", 1:7))
13 | 
14 | gf3 <- data.frame(chr = c(rep("chr1", 3), rep("chr2", 2), rep("chr4", 2)),
15 |                   start = seq(100, 2000, by = 300), end = seq(100, 2000, by = 300) + 150,
16 |                   id = paste0("peak", 1:7))
17 | 
18 | tca <- TCA(design = d3, genomicFeature = gf3)
19 | expect_error({
20 |   tca <- TCA(design = d1, genomicFeature = gf3)
21 | })
22 | expect_error({
23 |   tca <- TCA(design = d3, genomicFeature = gf1)
24 | })
25 | expect_warning({
26 |   tca <- TCA(design = d3, genomicFeature = gf2)
27 | })
28 | 
29 | c1 <- matrix(sample(500, 56), nrow = 7, dimnames = list(paste0("peak",
30 |                                                                1:7), 1:8))
31 | c2 <- matrix(sample(500, 48), nrow = 6, dimnames = list(paste0("peak",
32 |                                                                1:6), 1:8))
33 | c3 <- matrix(sample(500, 49), nrow = 7, dimnames = list(paste0("peak",
34 |                                                                1:7), 1:7))
35 | tca <- TCA(design = d3, counts = c1, genomicFeature = gf3)
36 | expect_error({
37 |   TCA(design = d3, counts = c2, genomicFeature = gf3)
38 | })
39 | expect_error({
40 |   TCA(design = d3, counts = c3, genomicFeature = gf3)
41 | })
42 | 
43 | ## test the correctness of the merge result results
44 | peaks <- data.frame(chr = c(rep("chr1",4),rep("chr2", 3), rep("chr3",2)),
45 |                     start = c(100,148,230,300,330,480,1000,700,801),
46 |                     end = c(150,220,500,450,600,900,1050,760,900))
47 | 
48 | merged_peaks <- peakreference(data = peaks, merge = T, overlap = 1)
49 | 
50 | peaks_expect <- data.frame(chr = c(rep("chr1",2),rep("chr2", 2), rep("chr3",2)),
51 |                            start = c(100, 230, 330, 1000, 700, 801),
52 |                            end = c(220, 500, 900, 1050, 760, 900),
53 |                            id = paste0("peak", 1:6))
54 | 
55 | expect_equal(merged_peaks, peaks_expect)
56 | 
57 | merged_peaks2 <- peakreference(data = peaks, merge = T, ratio = 0.2)
58 | peaks_expect2 <- data.frame(chr = c(rep("chr1",3),rep("chr2", 2), rep("chr3",2)),
59 |                            start = c(100,148, 230, 330, 1000, 700, 801),
60 |                            end = c(150, 220, 500, 900, 1050, 760, 900),
61 |                            id = paste0("peak", 1:7))
62 | 
63 | expect_equal(merged_peaks2, peaks_expect2)
64 | 
65 | 


--------------------------------------------------------------------------------
/vignettes/TCseq.Rnw:
--------------------------------------------------------------------------------
  1 | % \VignetteIndexEntry{TCseq Vignette}
  2 | % \VignetteDepends{TCseq}
  3 | % \VignetteKeywords{Time course sequencing analysis, Clustering}
  4 | % \VignettePackage{TCseq}
  5 | 
  6 | \documentclass[a4paper]{article}
  7 | \usepackage{a4wide}
  8 | \usepackage[utf8]{inputenc}
  9 | \usepackage{float}
 10 | 
 11 | \title{TCseq: time course sequencing data analysis}
 12 | \author{Mengjun, Lei Gu}
 13 | \date{ \today }
 14 | 
 15 | \begin{document}
 16 | \SweaveOpts{concordance=TRUE}
 17 | \maketitle
 18 | 
 19 | The TCseq package provides a unified suite for analysis of different types of time course sequencing data. It can be applied to transcriptomic time course data such as RNA-seq as well as epigenomic time course data such as ATAC-seq, ChIP-seq. The main focuses of this package are on differential analysis between different time points and temporal pattern analysis and visualization.
 20 | 
 21 | Unlike RNA-seq,  the genomic regions of interest of sequencing data like ATAC-seq, ChIP-seq are not pre-defined and are specific to each experimental conditions, which limits the subsequential differential analysis between conditions. For those data type, the TCseq package provides functions to combine and merge conditionally specific genomic regions and generate a reference genomic regions for all conditions. This package then uses the negative binomial generalized linear model implemented in edgeR to provide differential analysis \cite{Robinson}. To capture the temporal patterns of the time course data, the package includes several unsupervised clustering methods to identify and a function to visualize the patterns.
 22 | 
 23 | This vignette uses an example ATAC-seq time course data to illustrate how to use the TCseq package.
 24 | 
 25 | \section{Input data}
 26 | The minimal input data for the TCseq are experiment design and reference genomic regions.
 27 | 
 28 | \subsection{Generate reference genomic regions}
 29 | For RNA-seq, the reference genomic regions are predefined (genes or exons). While for epigenome sequencing data, genomic regions of interest are usually defined as reads enriched regions which are also called peaks. peaks set for a given condition can be identified by peak callers such as MACs and is specific to that condition. The TCseq package provides a function to read in a set of peak set file in BED format, combines these files in to a single data frame, merges overlapping regions according use defined criteria and takes the largest bound as the reference region for all the overlapping regions. The merge criteria can be either absolute overlapping bases or overlapping ration (absolute overlapping bases divide mininum length of the regions to be merged).
 30 | 
 31 | If a set of BED files are availble under certain directory, say dir.peaks, the file names of the BED files to be merged have common substring "narrowpeaks", then the reference genomic regions can be generated by:
 32 | <<>>=
 33 | library(TCseq)
 34 | @
 35 | 
 36 | <<eval=FALSE>>=
 37 | dir <- dir.peaks
 38 | gf <- peakreference(dir = dir, pattern = "narrowpeaks")
 39 | @
 40 | The resulting data frame have four columns as follows:
 41 | <<>>=
 42 | data("genomicIntervals")
 43 | head(genomicIntervals)
 44 | @
 45 | 
 46 | \subsection{Create a TCA object}
 47 | The TCseq uses an S4 class TCA to store all input data for subsequential analysis. When read counts table is not available, only data frames of experiment design and reference genomic regions are required to create a TCA object, TCseq also provides a function to generate counts table, to use the function, file names of BAM files for each sample/library have to be provided in the data frame of experiment design:
 48 | <<>>=
 49 | # Experiment design
 50 | data("experiment_BAMfile")
 51 | head(experiment_BAMfile)
 52 | # create a TCA object
 53 | tca <- TCA(design = experiment_BAMfile, genomicFeature = genomicIntervals)
 54 | tca
 55 | @
 56 | The count table then can be created (suppose the BAM files are store in the directory dir.BAM):
 57 | <<eval=FALSE>>=
 58 | tca <- countReads(tca, dir = dir.BAM)
 59 | @
 60 | When the counts table is available, BAM file information is not mandatory in the experiment design. Counts table can be provides when creating a TCA object:
 61 | <<>>=
 62 | #Experiment design without BAM file information
 63 | data("experiment")
 64 | #Counts table
 65 | data("countsTable")
 66 | tca <- TCA(design = experiment, genomicFeature = genomicIntervals,
 67 |            counts = countsTable)
 68 | tca
 69 | @
 70 | The counts table can also be assigned to an existing TCA object:
 71 | <<eval=FALSE>>=
 72 | counts(tca) <- countsTable
 73 | @
 74 | In addition, a TCA object can also be created from an existing RangedSummarizedExperiment or SummarizedExperiment. For summarizedExperiment, additional reference genomic regions information must be provided, while for RangedSummarizedExperiment object, the reference genomic regions will be extracted directly from the object.
 75 | For a SummarizedExperiment object:
 76 | <<>>=
 77 | suppressWarnings(library(SummarizedExperiment))
 78 | se <- SummarizedExperiment(assays=list(counts = countsTable), colData = experiment)
 79 | tca <- TCAFromSummarizedExperiment(se = se, genomicFeature = genomicIntervals)
 80 | @
 81 | 
 82 | The TCA object with experiment design, read counts, reference genomic regions can be used for following differential analysis.
 83 | 
 84 | \section{Differential Analysis}
 85 | The differetial event is detected by using the generalized linear model (GLM) methods \cite{McCarthy} implemented in edgeR package.
 86 | <<>>=
 87 | tca <- DBanalysis(tca)
 88 | @
 89 | Low quality genomic regions (read counts are low for all the time points) can also be filtered out. The following step only keeps genomic regions with two or more more samples that have read counts more than 10.
 90 | <<>>=
 91 | tca <- DBanalysis(tca, filter.type = "raw", filter.value = 10, samplePassfilter = 2)
 92 | @
 93 | Differential analysis results between given timepoints can be extracted by:
 94 | <<>>=
 95 | DBres <- DBresult(tca, group1 = "0h", group2 = c("24h","40h","72h"))
 96 | str(DBres, strict.width =  "cut")
 97 | head(DBres$`24hvs0h`)
 98 | @
 99 | Significant differential events (log2-fold > 2 or log2-fold < -2, adjusted p-value < 0.05) can be further extracted by:
100 | <<>>=
101 | DBres.sig <- DBresult(tca, group1 = "0h", group2 = c("24h","40h","72h"), top.sig = TRUE)
102 | str(DBres.sig, strict.width =  "cut")
103 | @
104 | 
105 | \section{Temporal pattern analysis}
106 | \subsection{Construct time course table}
107 | To detect temporal patterns of the time course sequencing data, the TCseq package uses unsupervised clustering methods. First, a time course table is created for clustering analysis. The rows of the time course table are genomic regions, and the columns are time points, the values can be chosen from normalized read counts or logFC of all time points compared to a given group. Here we compare each time point with the initial time point. Such table can be created as follows:
108 | <<>>=
109 | # values are logFC
110 | tca <- timecourseTable(tca, value = "FC", control.group = "0h", norm.method = "rpkm", filter = TRUE)
111 | @
112 | or
113 | <<>>=
114 | # values are normalized read counts
115 | tca <- timecourseTable(tca, value = "expression", norm.method = "rpkm", filter = TRUE)
116 | @
117 | When the "filter" parameter is set to be TRUE, the time course table will filter out all genomic regions with no significant changes between any two time points. The table can be accessed by:
118 | <<>>=
119 | t <- tcTable(tca)
120 | head(t)
121 | @
122 | \subsection{Clustering analysis}
123 | Two types of clustering algorithms are included in the package: hard clustering (hierachical, pam, kmeans) and soft clustering (fuzzy cmeans \cite{Futschik}). The temporal patterns are analyzed using the following function:
124 | <<>>=
125 | tca <- timeclust(tca, algo = "cm", k = 6, standardize = TRUE)
126 | @
127 | Instead of absolute value of different time series, one might only focus on the change patterns and expect time series with similar pattern to be cluster in same group. In this case, "standardize" parameter gives an option to perform z-score transformation on the data to be clustered, which reduces the noises introduced by the difference in the absolute values.
128 | 
129 | \subsection{Visualize the clustering results}
130 | The clustering results can be visualized as follows:
131 | <<eval=FALSE>>=
132 | p <- timeclustplot(tca, value = "z-score(PRKM)", cols = 3)
133 | @
134 | 
135 | \begin{figure}[H]
136 | \centering
137 |         \includegraphics[width=\textwidth]{clusterRes.png}
138 |     \caption{Visualization of clustering results}
139 | \end{figure}
140 | 
141 | Individual clusters can also be plotted:
142 | <<eval=FALSE>>=
143 | #plot cluster 1:
144 | print(p[[1]])
145 | @
146 | \begin{figure}[H]
147 | \centering
148 |         \includegraphics[width=0.5\textwidth]{subcluster.png}
149 |     \caption{Visualization of cluster 1}
150 | \end{figure}
151 | 
152 | To plot the cmeans clustering results, the TCseq provides several color schemes to color code the membership values which indicate the degree to which data points belong to a cluster.
153 | 
154 | %BIBLIOGRAPHY
155 | 
156 | \begin{thebibliography}{}
157 | \bibitem {Robinson} Robinson, M.D., McCarthy, D.J. and Smyth, G.K. edgeR: a Bioconductor package for differential expression analysis of digital gene expression data, Bioinformatics, 26, 139-140,2010.
158 | \bibitem {McCarthy} McCarthy,D.J.,Chen, Y., Smyth, G. K. Differential expression analysis of multifactor RNA-Seq experiments with respect to biological variation. Nucleic acids research 40, 4288-4297,2012.
159 | \bibitem{Futschik} Futschik, M.E. and Carlisle, B. Noise-robust soft clustering of gene expression time-course data, Journal of bioinformatics and computational biology, 3, 965-988, 2005.
160 | \bibitem{lokesh} L. Kumar and M. Futschik, Mfuzz: a software package for soft clustering of microarray data, Bioinformation, 2(1),5-7,2007
161 | 
162 | \end{thebibliography}
163 | 
164 | \end{document}
165 | 


--------------------------------------------------------------------------------
/vignettes/clusterRes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MengjunWu/TCseq/f2708be21fed9fa0ea3a5c2c3f72f607cb24e84c/vignettes/clusterRes.png


--------------------------------------------------------------------------------
/vignettes/subcluster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MengjunWu/TCseq/f2708be21fed9fa0ea3a5c2c3f72f607cb24e84c/vignettes/subcluster.png


--------------------------------------------------------------------------------