├── .Rbuildignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS
├── NOTES
├── R
├── Assays-class.R
├── RangedSummarizedExperiment-class.R
├── SummarizedExperiment-class.R
├── combine-methods.R
├── coverage-methods.R
├── findOverlaps-methods.R
├── inter-range-methods.R
├── intra-range-methods.R
├── makeSummarizedExperimentFromDataFrame.R
├── makeSummarizedExperimentFromExpressionSet.R
├── makeSummarizedExperimentFromLoom.R
├── nearest-methods.R
└── zzz.R
├── README.md
├── inst
├── extdata
│ └── example.loom
├── scripts
│ └── Find_and_update_objects
│ │ ├── README
│ │ ├── collect_rda_objects.R
│ │ ├── collect_rda_objects_to_update.R
│ │ ├── data_store_RDA_OBJECTS_TO_UPDATE
│ │ ├── pkgs_RDA_OBJECTS_TO_UPDATE
│ │ └── update_rda_objects.R
└── unitTests
│ ├── test_Assays-class.R
│ ├── test_RangedSummarizedExperiment-class.R
│ ├── test_SummarizedExperiment-class.R
│ ├── test_combine-methods.R
│ ├── test_coverage-methods.R
│ ├── test_findOverlaps-methods.R
│ ├── test_inter-range-methods.R
│ ├── test_intra-range-methods.R
│ ├── test_makeSummarizedExperimentFromDataFrame.R
│ ├── test_makeSummarizedExperimentFromExpressionSet.R
│ └── test_nearest-methods.R
├── man
├── Assays-class.Rd
├── RangedSummarizedExperiment-class.Rd
├── SummarizedExperiment-class.Rd
├── coverage-methods.Rd
├── findOverlaps-methods.Rd
├── inter-range-methods.Rd
├── intra-range-methods.Rd
├── makeSummarizedExperimentFromDataFrame.Rd
├── makeSummarizedExperimentFromExpressionSet.Rd
├── makeSummarizedExperimentFromLoom.Rd
└── nearest-methods.Rd
├── tests
└── run_unitTests.R
└── vignettes
├── .install_extras
├── Extensions.Rmd
├── SE.svg
└── SummarizedExperiment.Rmd
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^NOTES$
2 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: SummarizedExperiment
2 | Title: A container (S4 class) for matrix-like assays
3 | Description: The SummarizedExperiment container contains one or more assays,
4 | each represented by a matrix-like object of numeric or other mode.
5 | The rows typically represent genomic ranges of interest and the columns
6 | represent samples.
7 | biocViews: Genetics, Infrastructure, Sequencing, Annotation, Coverage,
8 | GenomeAnnotation
9 | URL: https://bioconductor.org/packages/SummarizedExperiment
10 | BugReports: https://github.com/Bioconductor/SummarizedExperiment/issues
11 | Version: 1.39.0
12 | License: Artistic-2.0
13 | Encoding: UTF-8
14 | Authors@R: c(
15 | person("Martin", "Morgan", role="aut"),
16 | person("Valerie", "Obenchain", role="aut"),
17 | person("Jim", "Hester", role="aut"),
18 | person("Hervé", "Pagès", role=c("aut", "cre"),
19 | email="hpages.on.github@gmail.com"))
20 | Depends: R (>= 4.0.0), methods, MatrixGenerics (>= 1.1.3),
21 | GenomicRanges (>= 1.55.2), Biobase
22 | Imports: utils, stats, tools, Matrix,
23 | BiocGenerics (>= 0.51.3), S4Vectors (>= 0.33.7), IRanges (>= 2.23.9),
24 | GenomeInfoDb (>= 1.13.1), S4Arrays (>= 1.1.1), DelayedArray (>= 0.31.12)
25 | Suggests: jsonlite, rhdf5, HDF5Array (>= 1.7.5), annotate, AnnotationDbi,
26 | GenomicFeatures, SparseArray, SingleCellExperiment,
27 | TxDb.Hsapiens.UCSC.hg19.knownGene, hgu95av2.db, airway (>= 1.15.1),
28 | BiocStyle, knitr, rmarkdown, RUnit, testthat, digest
29 | VignetteBuilder: knitr
30 | Collate: Assays-class.R
31 | SummarizedExperiment-class.R
32 | RangedSummarizedExperiment-class.R
33 | intra-range-methods.R
34 | inter-range-methods.R
35 | coverage-methods.R
36 | combine-methods.R
37 | findOverlaps-methods.R
38 | nearest-methods.R
39 | makeSummarizedExperimentFromExpressionSet.R
40 | makeSummarizedExperimentFromDataFrame.R
41 | makeSummarizedExperimentFromLoom.R
42 | zzz.R
43 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | import(methods)
2 | importFrom(utils, read.delim, .DollarNames)
3 | importFrom(stats, setNames)
4 | importFrom(tools, file_path_as_absolute)
5 | importClassFrom(Matrix, Matrix) # for the "acbind" and "arbind" methods
6 | import(BiocGenerics)
7 | importFrom(MatrixGenerics, rowRanges)
8 | import(S4Vectors)
9 | import(IRanges)
10 | import(GenomeInfoDb)
11 | import(GenomicRanges)
12 | import(Biobase)
13 | import(S4Arrays)
14 | import(DelayedArray)
15 |
16 |
17 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
18 | ### Export S4 classes
19 | ###
20 |
21 | exportClasses(
22 | Assays, SimpleAssays, ShallowData, ShallowSimpleListAssays,
23 | Assays_OR_NULL,
24 | SummarizedExperiment,
25 | RangedSummarizedExperiment
26 | )
27 |
28 |
29 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
30 | ### Export S4 methods for generics not defined in SummarizedExperiment
31 | ###
32 |
33 | exportMethods(
34 | length, names, "names<-",
35 | dim, "dimnames<-",
36 | "[", "[<-", "[[", "[[<-", "$", "$<-",
37 | as.data.frame,
38 | coerce,
39 | show,
40 | c,
41 | merge,
42 | Compare,
43 |
44 | ## Generics defined in BiocGenerics:
45 | nrow, ncol,
46 | rownames, colnames,
47 | cbind, rbind,
48 | duplicated, match,
49 | is.unsorted, order, rank, sort,
50 | start, "start<-", end, "end<-", width, "width<-",
51 | strand, "strand<-",
52 | saveRDS,
53 | updateObject,
54 |
55 | ## Generics defined in S4Vectors:
56 | showAsCell,
57 | "metadata<-",
58 | parallel_slot_names, vertical_slot_names, horizontal_slot_names,
59 | mcols, "mcols<-",
60 | elementMetadata, "elementMetadata<-",
61 | values, "values<-",
62 | subset,
63 | pcompare,
64 |
65 | ## Generics defined in IRanges:
66 | narrow,
67 | ranges, "ranges<-",
68 | shift, resize, flank, promoters, terminators, restrict, trim,
69 | isDisjoint, disjointBins,
70 | coverage,
71 | findOverlaps,
72 | precede, follow, nearest, distance, distanceToNearest,
73 |
74 | ## Generics defined in GenomeInfoDb:
75 | seqinfo, "seqinfo<-",
76 | seqnames, "seqnames<-",
77 |
78 | ## Generics defined in DelayedArray:
79 | acbind, arbind,
80 | rowRanges, realize
81 | )
82 |
83 |
84 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
85 | ### Export non-generic functions
86 | ###
87 |
88 | export(
89 | Assays,
90 | SummarizedExperiment,
91 | .DollarNames.SummarizedExperiment,
92 | .DollarNames.RangedSummarizedExperiment,
93 | makeSummarizedExperimentFromExpressionSet,
94 | makeSummarizedExperimentFromDataFrame,
95 | makeSummarizedExperimentFromLoom,
96 | naiveRangeMapper, probeRangeMapper, geneRangeMapper
97 | )
98 |
99 |
100 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
101 | ### Export S4 generics defined in SummarizedExperiment + export corresponding
102 | ### methods
103 | ###
104 |
105 | export(
106 | "rowRanges<-",
107 | rowData, "rowData<-",
108 | colData, "colData<-",
109 | assayNames, "assayNames<-",
110 | assays, "assays<-",
111 | assay, "assay<-"
112 | )
113 |
114 | ### Exactly the same list as above.
115 | exportMethods(
116 | "rowRanges<-",
117 | rowData, "rowData<-",
118 | colData, "colData<-",
119 | assayNames, "assayNames<-",
120 | assays, "assays<-",
121 | assay, "assay<-"
122 | )
123 |
124 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
125 | ### Register S3 methods
126 | ###
127 |
128 | S3method(.DollarNames, SummarizedExperiment)
129 | S3method(.DollarNames, RangedSummarizedExperiment)
130 |
--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
1 | CHANGES IN VERSION 1.38.0
2 | -------------------------
3 |
4 | - No changes in this version.
5 |
6 |
7 | CHANGES IN VERSION 1.36.0
8 | -------------------------
9 |
10 | NEW FEATURES
11 |
12 | o Calling saveRDS() on a SummarizedExperiment object that contains
13 | out-of-memory data now raises an error with a message that redirects
14 | the user to HDF5Array::saveHDF5SummarizedExperiment().
15 |
16 | SIGNIFICANT USER-VISIBLE CHANGES
17 |
18 | o Move documentation of constructor function SummarizedExperiment()
19 | from RangedSummarizedExperiment-class.Rd to SummarizedExperiment-class.Rd.
20 | See https://github.com/Bioconductor/SummarizedExperiment/issues/80
21 |
22 | o Change default value of 'rowData' argument in SummarizedExperiment()
23 | constructor from GRangesList() to NULL.
24 |
25 | BUG FIXES
26 |
27 | o Fix typos in error message from assay() and assays() setters.
28 |
29 |
30 | CHANGES IN VERSION 1.34.0
31 | -------------------------
32 |
33 | NEW FEATURES
34 |
35 | o Add terminators() method, same as promoters() but for terminator regions.
36 |
37 | o Add "Top-level dimnames vs assay-level dimnames" section to vignette.
38 | Also fix typo in man/SummarizedExperiment-class.Rd. This is in response
39 | to https://github.com/Bioconductor/SummarizedExperiment/issues/79
40 |
41 | BUG FIXES
42 |
43 | o Fix typo in man/SummarizedExperiment-class.Rd
44 |
45 |
46 | CHANGES IN VERSION 1.32.0
47 | -------------------------
48 |
49 | SIGNIFICANT USER-VISIBLE CHANGES
50 |
51 | o Vignette gains a new section about interactive visualization with iSEE.
52 |
53 |
54 | CHANGES IN VERSION 1.30.0
55 | -------------------------
56 |
57 | DEPRECATED AND DEFUNCT
58 |
59 | o Finally remove readKallisto() (got deprecated in BioC 3.12 and defunct
60 | in BioC 3.15).
61 |
62 |
63 | CHANGES IN VERSION 1.28.0
64 | -------------------------
65 |
66 | SIGNIFICANT USER-VISIBLE CHANGES
67 |
68 | o SummarizedExperiment objects now accept NAs in their rownames.
69 | Important notes:
70 | - NAs in the **rownames** are now tolerated but will cause problems
71 | downstream e.g. they break the rowData() getter unless 'use.names=FALSE'
72 | is used.
73 | - NAs in the **colnames** are not and cannot be supported at the moment!
74 | Right now they break the SummarizedExperiment() constructor in an ugly
75 | way (error message not super helpful):
76 | > SummarizedExperiment(m)
77 | Error in DataFrame(x = seq_len(ncol(a1)), row.names = nms) :
78 | missing values in 'row.names'
79 | This will need to be improved.
80 | - At the root of these problems is the fact that at the moment DataFrame
81 | objects do NOT support NAs in their rownames.
82 | Bottom line: NAs in the dimnames of a SummarizedExperiment object should
83 | be avoided at all cost. One way to deal with them is to replace them with
84 | empty strings ("").
85 | See commit 71872cc03b7c0195fb80d1d09409243f049ebb3f.
86 |
87 | o Small tweak to combineRows/combineCols: combineRows() and combineCols()
88 | no longer need to "fix" the dimnames that end up on the combined assays
89 | of the returned SummarizedExperiment object. So the assay dimnames are
90 | now returned as-is.
91 | See commit 1d6610eb168330f32433273e4fe49da173dcd33b.
92 |
93 |
94 | CHANGES IN VERSION 1.26.0
95 | -------------------------
96 |
97 | DEPRECATED AND DEFUNCT
98 |
99 | o readKallisto() is now defunct after being deprecated in BioC 3.12.
100 |
101 |
102 | CHANGES IN VERSION 1.24.0
103 | -------------------------
104 |
105 | NEW FEATURES
106 |
107 | o Add 'checkDimnames' argument to SummarizedExperiment() constructor
108 | function
109 |
110 | o Add showAsCell() method for SummarizedExperiment objects.
111 |
112 | SIGNIFICANT USER-VISIBLE CHANGES
113 |
114 | o Check the assay dimnames at SummarizedExperiment construction time:
115 | The SummarizedExperiment() constructor function now raises an error
116 | if one of the supplied assays has rownames and/or colnames that don't
117 | match those of the SummarizedExperiment object to construct.
118 |
119 |
120 | CHANGES IN VERSION 1.22.0
121 | -------------------------
122 |
123 | NEW FEATURES
124 |
125 | o Add combineRows() and combineCols() methods for SummarizedExperiment
126 | objects and derivatives. These are more flexible versions of rbind()
127 | and cbind() that don't require the objects to combine to have the same
128 | columns or rows. Contributed by Aaron Lun.
129 |
130 |
131 | CHANGES IN VERSION 1.20.0
132 | -------------------------
133 |
134 | SIGNIFICANT USER-VISIBLE CHANGES
135 |
136 | o SummarizedExperiment now depends on the MatrixGenerics package.
137 |
138 | o DelayedArray was moved from Depends to Imports.
139 |
140 | DEPRECATED AND DEFUNCT
141 |
142 | o Deprecated readKallisto().
143 |
144 | BUG FIXES
145 |
146 | o Avoid triggering copies of the assays in assays() getter.
147 |
148 | o Fix long-standing bug in dim() method for Assays objects.
149 |
150 | o Fix assays(x) <- SimpleList(). Before that fix this operation was turning
151 | SummarizedExperiment object (or derivative) 'x' into an invalid object.
152 |
153 |
154 | CHANGES IN VERSION 1.18.0
155 | -------------------------
156 |
157 | NEW FEATURES
158 |
159 | o SummarizedExperiment objects with assays of > 4 dimensions are now
160 | fully supported.
161 |
162 | SIGNIFICANT USER-VISIBLE CHANGES
163 |
164 | o By default the assays() and assay() setters now reject inconsistent
165 | dimnames.
166 | By default the dimnames on the supplied assay(s) must be identical to
167 | the dimnames on the SummarizedExperiment object. The user now must use
168 | 'withDimnames=FALSE' if it's not the case or they get an error.
169 | This is for symmetry with the behavior of the assays() and assay()
170 | getters (see issue #35). Unfortunately this change is likely to break
171 | existing code but at least the fix is easy.
172 |
173 | o dimnames() now returns NULL instead of list(NULL, NULL) on a
174 | SummarizedExperiment object with no dimnames. This is consistent
175 | with matrix objects.
176 |
177 | o Swap positions of arguments '...' and 'withDimnames' in assays()
178 | setter and getter. So now it's:
179 |
180 | assays(x, withDimnames=TRUE, ...)
181 | assays(x, withDimnames=TRUE, ...) <- value
182 |
183 | o Add 'withDimnames' argument to the assay() getter/setter. So now it's:
184 |
185 | assay(x, i, withDimnames=TRUE, ...)
186 | assay(x, i, withDimnames=TRUE, ...) <- value
187 |
188 | Note that before this change, the user was able to explicitly set
189 | 'withDimnames' when calling assay() but since this was not a formal
190 | argument it was forwarded to assays() via the ellipsis. Having it as
191 | a formal argument makes it easier to discover and allows tab completion.
192 |
193 |
194 | CHANGES IN VERSION 1.16.0
195 | -------------------------
196 |
197 | NEW FEATURES
198 |
199 | o Some improvements to the SummarizedExperiment() constructor (see commit
200 | 0d74843c)
201 |
202 | o Support 'colData(SummarizedExperiment) <- NULL' to clear colData
203 |
204 | SIGNIFICANT USER-VISIBLE CHANGES
205 |
206 | o All the arguments of the SummarizedExperiment() constructor are now
207 | visible (no more ellipsis) and have default values. So tab completion
208 | works. See commit 0d74843c
209 |
210 | o The dimnames on the individual assays of a SummarizedExperiment derivative
211 | now can be anything (see issue #25 for the details)
212 |
213 | BUG FIXES
214 |
215 | o Some fixes to the SummarizedExperiment() constructor (see commit
216 | 0d74843c)
217 |
218 | o Address all.equal() false positives on SummarizedExperiment objects
219 | (see issue #16 for the details)
220 |
221 |
222 | CHANGES IN VERSION 1.12.0
223 | -------------------------
224 |
225 | NEW FEATURES
226 |
227 | o The package has a new vignette "Extending the SummarizedExperiment class"
228 | by Aaron Lun intended for developers. It documents in great details the
229 | process of implementing a SummarizedExperiment extension (a.k.a.
230 | subclass).
231 |
232 | SIGNIFICANT USER-VISIBLE CHANGES
233 |
234 | o rowData() gains use.names=TRUE argument; prior behavior was to
235 | use.names=FALSE. rowData() by default fails when rownames()
236 | contains NAs.
237 |
238 | BUG FIXES
239 |
240 | o Better error handling in SummarizedExperiment() constructor.
241 | SummarizedExperiment() now prints an informative error message when
242 | the supplied assays have insane rownames or colnames. This addresses
243 | https://github.com/Bioconductor/SummarizedExperiment/issues/7
244 |
245 |
246 | CHANGES IN VERSION 1.10.0
247 | -------------------------
248 |
249 | NEW FEATURES
250 |
251 | o Add "subset" method for SummarizedExperiment objects. See
252 | https://github.com/Bioconductor/SummarizedExperiment/pull/6
253 |
254 | o rowRanges() now is supported on a SummarizedExperiment object that is
255 | not a RangedSummarizedExperiment, and returns NULL. Also doing
256 | 'rowRanges(x) <- NULL' on a RangedSummarizedExperiment object now is
257 | supported and degrades it to a SummarizedExperiment instance.
258 |
259 | o Add 'BACKEND' argument to "realize" method for SummarizedExperiment
260 | objects.
261 |
262 | SIGNIFICANT USER-VISIBLE CHANGES
263 |
264 | o saveHDF5SummarizedExperiment() and loadHDF5SummarizedExperiment() are
265 | now in the HDF5Array package.
266 |
267 | o Replace old "updateObject" method for SummarizedExperiment objects with
268 | a new one.
269 |
270 | The new method calls updateObject() on all the assays of the object.
271 | This will update SummarizedExperiment objects (and their derivatives
272 | like BSseq objects) that have "old" DelayedArray objects in their assays.
273 |
274 | The old method has been around since BioC 3.2 (released 2.5 years ago)
275 | and was used to update objects made prior to the change of internals
276 | that happened between BioC 3.1 and BioC 3.2. All these "old" objects
277 | should have been updated by now so we don't need this anymore.
278 |
279 | BUG FIXES
280 |
281 | o Modify the "[<-" method for SummarizedExperiment to leave 'metadata(x)'
282 | intact instead of trying to combine it with 'metadata(value)'. With this
283 | change 'x[i , j] <- x[i , j]' behaves like a no-op (as expected) instead
284 | of duplicating metadata(x).
285 |
286 | o The SummarizedExperiment() constructor does not try to downgrade the
287 | supplied rowData and/or colData to DataFrame anymore if they derive
288 | from DataFrame.
289 |
290 |
291 | CHANGES IN VERSION 1.8.0
292 | ------------------------
293 |
294 | NEW FEATURES
295 |
296 | o Add 'chunk_dim' and 'level' arguments to saveHDF5SummarizedExperiment().
297 |
298 | o Add coercion from ExpressionSet to SummarizedExperiment.
299 |
300 | DEPRECATED AND DEFUNCT
301 |
302 | o Remove 'force' argument from seqinfo() and seqlevels() setters (the
303 | argument got deprecated in BioC 3.5 in favor of new and more flexible
304 | 'pruning.mode' argument).
305 |
306 | BUG FIXES
307 |
308 | o Coercion from SummarizedExperiment to RangedSummarizedExperiment was
309 | losing the metadata columns. Fixed now.
310 |
311 | o Fix cbind() and rbind() of SummarizedExperiment objects when some of the
312 | assays are DataFrame or data.frame objects.
313 |
314 | o '$' completion on SummarizedExperiment works in RStudio and on
315 | RangedSummarizedExperiment.
316 |
317 |
318 | CHANGES IN VERSION 1.6.0
319 | ------------------------
320 |
321 | NEW FEATURES
322 |
323 | o Add saveHDF5SummarizedExperiment() and loadHDF5SummarizedExperiment() for
324 | saving/loading HDF5-based SummarizedExperiment objects to/from disk.
325 |
326 | DEPRECATED AND DEFUNCT
327 |
328 | o Remove SummarizedExperiment0 class (was introduced to ease transition
329 | from old SummarizedExperiment class defined in GenomicRanges to new
330 | RangedSummarizedExperiment class defined in SummarizedExperiment package).
331 |
332 |
333 | CHANGES IN VERSION 1.4.0
334 | ------------------------
335 |
336 | NEW FEATURES
337 |
338 | o Add makeSummarizedExperimentFromDataFrame() function.
339 |
340 | o Add "acbind" and "arbind" methods for Matrix objects.
341 |
342 | SIGNIFICANT USER-VISIBLE CHANGES
343 |
344 | o Speed up "cbind" method for SummarizedExperiment objects based on a
345 | suggestion by Peter Hickey.
346 |
347 | DEPRECATED AND DEFUNCT
348 |
349 | o Remove exptData() getter and setter (were defunct in BioC 3.3).
350 |
351 | BUG FIXES
352 |
353 |
354 | CHANGES IN VERSION 1.2.0
355 | ------------------------
356 |
357 | NEW FEATURES
358 |
359 | o Add 'rowData' argument to SummarizedExperiment() constructor. This allows
360 | the user to supply the row data at construction time.
361 |
362 | o The SummarizedExperiment() constructor function and the assay() setter
363 | now both take any matrix-like object as long as the resulting
364 | SummarizedExperiment object is valid.
365 |
366 | o Support r/cbind'ing of SummarizedExperiment objects with assays of
367 | arbitrary dimensions (based on a patch by Pete Hickey).
368 |
369 | o Add "is.unsorted" method for RangedSummarizedExperiment objects.
370 |
371 | o NULL colnames() supported during SummarizedExperiment construction.
372 |
373 | o readKallisto() warns early when files need names.
374 |
375 | o base::rank() gained a new 'ties.method="last"' option and base::order()
376 | a new argument ('method') in R 3.3. Thus so do the "rank" and "order"
377 | methods for RangedSummarizedExperiment objects.
378 |
379 | SIGNIFICANT USER-VISIBLE CHANGES
380 |
381 | o Re-introduce the rowData() accessor (was defunt in BioC 3.2) as an alias
382 | for mcols() and make it the preferred way to access the row data. There
383 | is now a pleasant symmetry between rowData and colData.
384 |
385 | o Rename SummarizedExperiment0 class -> SummarizedExperiment.
386 |
387 | o Improved vignette.
388 |
389 | o Remove updateObject() method for "old" SummarizedExperiment objects.
390 |
391 | DEPRECATED AND DEFUNCT
392 |
393 | o exptData() is now defunct, metadata() should be used instead.
394 |
395 | BUG FIXES
396 |
397 | o Fix bug in "sort" method for RangedSummarizedExperiment objects when
398 | 'ignore.strand=TRUE' (the argument was ignored).
399 |
400 | o Fix 2 bugs when r/cbind'ing SummarizedExperiment objects:
401 | - r/cbind'ing assays without names would return only the first element.
402 | See https://stat.ethz.ch/pipermail/bioc-devel/2015-November/008318.html
403 | - r/cbind'ing assays with names in different order would stop() with
404 | 'Assays must have the same names()"
405 |
406 | o Fix validity method for SummarizedExperiment objects reporting incorrect
407 | numbers when the nb of cols in assay(x) doesn't match the nb of rows in
408 | colData(x).
409 |
410 | o assay colnames() must agree with colData rownames()
411 |
412 | o Fix bug where assays(se, withDimnames=TRUE) was dropping the dimnames of
413 | the 3rd and higher-order dimensions of the assays. Thanks to Pete Hickey
414 | for catching this and providing a patch.
415 |
416 | o A couple of minor tweaks to the rowData() setter to make it behave
417 | consistently with mcols()/elementMetadata() setters for Vector objects
418 | in general.
419 |
420 |
421 | CHANGES IN VERSION 0.3.*
422 | ------------------------
423 |
424 | NEW FEATURES
425 |
426 | o readKallisto() and readKallistoBootstrap() input kallisto
427 | transcript quantification output into SummarizedExperiment (and
428 | other) instances.
429 |
430 |
--------------------------------------------------------------------------------
/NOTES:
--------------------------------------------------------------------------------
1 | We need a mechanism to save an HDF5Array-based (or more generally
2 | DelayedArray-based) SummarizedExperiment object to disk.
3 |
4 | - The object can have more than 1 assay. Even though most of the time these
5 | assays are either all in memory (e.g. ordinary arrays or data frames) or all
6 | on disk and using the same backend (i.e. all HDF5-based DelayedArray
7 | objects), they can be a mix of in-memory and on-disk assays. Even a given
8 | assay can use more than 1 kind of on-disk backend. For example it could be
9 | the result of adding an HDF5-based DelayedArray object with a DelayedArray
10 | object based on another backend. Since the addition of DelayedArray objects
11 | is delayed, the result of this addition is a DelayedArray object with mixed
12 | backends.
13 |
14 | - Standard mechanisms save() and saveRDS() cannot handle this complexity.
15 |
16 | - We need a mechanism that produces several files: one .rda (or .rds)
17 | file containing the result of calling save() (or saveRDS()) on the
18 | object + all the files (e.g. HDF5) containing the on-disk assay data.
19 | The files containing the on-disk assay data can be a mix of HDF5 files
20 | and other formats.
21 | How should these files be bundled together? By putting them together in a
22 | destination folder? By creating a tarball of this folder? Should the creation
23 | of the tarball be left to the user or should the save function create it?
24 |
25 | - Should the on-disk assays with delayed operations on them be "realized"
26 | before the SummarizedExperiment object is saved to disk? Doing this has
27 | some significant advantages:
28 | (1) It "simplifies" the object: it reduces the number of files needed to
29 | store the on-disk assay data (only 1 file per on-disk assay).
30 | (2) It relocates and reduces the size of the on-disk data needed to
31 | represent the object.
32 |
33 | - Should the in-memory assays be converted into on-disk assays before saving?
34 | Should this be controlled by the user?
35 |
36 |
--------------------------------------------------------------------------------
/R/RangedSummarizedExperiment-class.R:
--------------------------------------------------------------------------------
1 | ### =========================================================================
2 | ### RangedSummarizedExperiment objects
3 | ### -------------------------------------------------------------------------
4 | ###
5 |
6 |
7 | ### The 'elementMetadata' slot must contain a zero-column DataFrame at all time
8 | ### (this is checked by the validity method). The top-level mcols are stored on
9 | ### the rowRanges component.
10 | setClass("RangedSummarizedExperiment",
11 | contains="SummarizedExperiment",
12 | representation(
13 | rowRanges="GenomicRanges_OR_GRangesList"
14 | ),
15 | prototype(
16 | rowRanges=GRanges()
17 | )
18 | )
19 |
20 | ### Combine the new "parallel slots" with those of the parent class. Make
21 | ### sure to put the new parallel slots **first**. See R/Vector-class.R file
22 | ### in the S4Vectors package for what slots should or should not be considered
23 | ### "parallel".
24 | setMethod("parallel_slot_names", "RangedSummarizedExperiment",
25 | function(x) c("rowRanges", callNextMethod())
26 | )
27 |
28 |
29 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
30 | ### Validity
31 | ###
32 |
33 | ### The names and mcols of a RangedSummarizedExperiment must be set on its
34 | ### rowRanges slot, not in its NAMES and elementMetadata slots!
35 | .valid.RangedSummarizedExperiment <- function(x)
36 | {
37 | if (!is.null(x@NAMES))
38 | return("'NAMES' slot must be set to NULL at all time")
39 | if (ncol(x@elementMetadata) != 0L)
40 | return(wmsg("'elementMetadata' slot must contain a zero-column ",
41 | "DataFrame at all time"))
42 | rowRanges_len <- length(x@rowRanges)
43 | x_nrow <- length(x)
44 | if (rowRanges_len != x_nrow) {
45 | txt <- sprintf(
46 | "\n length of 'rowRanges' (%d) must equal nb of rows in 'x' (%d)",
47 | rowRanges_len, x_nrow)
48 | return(txt)
49 | }
50 | NULL
51 | }
52 |
53 | setValidity2("RangedSummarizedExperiment", .valid.RangedSummarizedExperiment)
54 |
55 |
56 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
57 | ### Constructor
58 | ###
59 |
60 | new_RangedSummarizedExperiment <- function(assays, rowRanges, colData,
61 | metadata)
62 | {
63 | assays <- Assays(assays, as.null.if.no.assay=TRUE)
64 | elementMetadata <- S4Vectors:::make_zero_col_DataFrame(length(rowRanges))
65 | new("RangedSummarizedExperiment", rowRanges=rowRanges,
66 | colData=colData,
67 | assays=assays,
68 | elementMetadata=elementMetadata,
69 | metadata=as.list(metadata))
70 | }
71 |
72 |
73 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
74 | ### Coercion
75 | ###
76 | ### See makeSummarizedExperimentFromExpressionSet.R for coercion back and
77 | ### forth between SummarizedExperiment and ExpressionSet.
78 | ###
79 |
80 | .from_RangedSummarizedExperiment_to_SummarizedExperiment <- function(from)
81 | {
82 | new_SummarizedExperiment(from@assays,
83 | names(from@rowRanges),
84 | mcols(from@rowRanges, use.names=FALSE),
85 | from@colData,
86 | from@metadata)
87 | }
88 |
89 | setAs("RangedSummarizedExperiment", "SummarizedExperiment",
90 | .from_RangedSummarizedExperiment_to_SummarizedExperiment
91 | )
92 |
93 | .from_SummarizedExperiment_to_RangedSummarizedExperiment <- function(from)
94 | {
95 | partitioning <- PartitioningByEnd(integer(length(from)), names=names(from))
96 | rowRanges <- relist(GRanges(), partitioning)
97 | mcols(rowRanges) <- mcols(from, use.names=FALSE)
98 | new_RangedSummarizedExperiment(from@assays,
99 | rowRanges,
100 | from@colData,
101 | from@metadata)
102 | }
103 |
104 | setAs("SummarizedExperiment", "RangedSummarizedExperiment",
105 | .from_SummarizedExperiment_to_RangedSummarizedExperiment
106 | )
107 |
108 |
109 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
110 | ### Accessors
111 | ###
112 |
113 | ### The rowRanges() generic is defined in the MatrixGenerics package.
114 | setMethod("rowRanges", "SummarizedExperiment",
115 | function(x, ...) NULL
116 | )
117 |
118 | ### Fix old GRanges instances on-the-fly.
119 | setMethod("rowRanges", "RangedSummarizedExperiment",
120 | function(x, ...) updateObject(x@rowRanges, check=FALSE)
121 | )
122 |
123 | setGeneric("rowRanges<-",
124 | function(x, ..., value) standardGeneric("rowRanges<-"))
125 |
126 | ### No-op.
127 | setReplaceMethod("rowRanges", c("SummarizedExperiment", "NULL"),
128 | function(x, ..., value) x
129 | )
130 |
131 | ### Degrade 'x' to SummarizedExperiment instance.
132 | setReplaceMethod("rowRanges", c("RangedSummarizedExperiment", "NULL"),
133 | function(x, ..., value) as(x, "SummarizedExperiment", strict=TRUE)
134 | )
135 |
136 | .SummarizedExperiment.rowRanges.replace <-
137 | function(x, ..., value)
138 | {
139 | if (is(x, "RangedSummarizedExperiment")) {
140 | x <- updateObject(x, check=FALSE)
141 | } else {
142 | x <- as(x, "RangedSummarizedExperiment")
143 | }
144 | x <- BiocGenerics:::replaceSlots(x, ...,
145 | rowRanges=value,
146 | elementMetadata=S4Vectors:::make_zero_col_DataFrame(length(value)),
147 | check=FALSE)
148 | msg <- .valid.SummarizedExperiment.assays_nrow(x)
149 | if (!is.null(msg))
150 | stop(msg)
151 | x
152 | }
153 |
154 | setReplaceMethod("rowRanges", c("SummarizedExperiment", "GenomicRanges"),
155 | .SummarizedExperiment.rowRanges.replace)
156 |
157 | setReplaceMethod("rowRanges", c("SummarizedExperiment", "GRangesList"),
158 | .SummarizedExperiment.rowRanges.replace)
159 |
160 | setMethod("names", "RangedSummarizedExperiment",
161 | function(x) names(rowRanges(x))
162 | )
163 |
164 | setReplaceMethod("names", "RangedSummarizedExperiment",
165 | function(x, value)
166 | {
167 | rowRanges <- rowRanges(x)
168 | names(rowRanges) <- value
169 | BiocGenerics:::replaceSlots(x, rowRanges=rowRanges, check=FALSE)
170 | })
171 |
172 | setReplaceMethod("dimnames", c("RangedSummarizedExperiment", "list"),
173 | function(x, value)
174 | {
175 | rowRanges <- rowRanges(x)
176 | names(rowRanges) <- value[[1]]
177 | colData <- colData(x)
178 | rownames(colData) <- value[[2]]
179 | BiocGenerics:::replaceSlots(x,
180 | rowRanges=rowRanges,
181 | colData=colData,
182 | check=FALSE)
183 | })
184 |
185 |
186 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
187 | ### Subsetting
188 | ###
189 |
190 | .DollarNames.RangedSummarizedExperiment <- .DollarNames.SummarizedExperiment
191 |
192 | setMethod("subset", "RangedSummarizedExperiment",
193 | function(x, subset, select, ...)
194 | {
195 | i <- S4Vectors:::evalqForSubset(subset, rowRanges(x), ...)
196 | j <- S4Vectors:::evalqForSubset(select, colData(x), ...)
197 | x[i, j]
198 | })
199 |
200 |
201 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
202 | ## colData-as-GRanges compatibility: allow direct access to GRanges /
203 | ## GRangesList colData for select functions
204 |
205 | ## Not supported:
206 | ##
207 | ## Not consistent SummarizedExperiment structure: length, names,
208 | ## as.data.frame, c.
209 | ## Length-changing endomorphisms: disjoin, gaps, reduce, unique.
210 | ## 'legacy' data types / functions: as "RangedData", as "IntegerRangesList",
211 | ## renameSeqlevels, keepSeqlevels.
212 | ## Possile to implement, but not yet: Ops, map, window, window<-
213 |
214 | ## mcols
215 | setMethod("mcols", "RangedSummarizedExperiment",
216 | function(x, use.names=TRUE, ...)
217 | {
218 | mcols(rowRanges(x), use.names=use.names, ...)
219 | })
220 |
221 | setReplaceMethod("mcols", "RangedSummarizedExperiment",
222 | function(x, ..., value)
223 | {
224 | BiocGenerics:::replaceSlots(x,
225 | rowRanges=local({
226 | r <- rowRanges(x)
227 | mcols(r) <- value
228 | r
229 | }),
230 | check=FALSE)
231 | })
232 |
233 | ### mcols() is the recommended way for accessing the metadata columns.
234 | ### Use of values() or elementMetadata() is discouraged.
235 |
236 | setMethod("elementMetadata", "RangedSummarizedExperiment",
237 | function(x, use.names=FALSE, ...)
238 | {
239 | elementMetadata(rowRanges(x), use.names=use.names, ...)
240 | })
241 |
242 | setReplaceMethod("elementMetadata", "RangedSummarizedExperiment",
243 | function(x, ..., value)
244 | {
245 | elementMetadata(rowRanges(x), ...) <- value
246 | x
247 | })
248 |
249 | ## Single dispatch, generic signature fun(x, ...)
250 | local({
251 | .funs <-
252 | c("duplicated", "end", "end<-", "ranges", "seqinfo", "seqnames",
253 | "start", "start<-", "strand", "width", "width<-")
254 |
255 | endomorphisms <- .funs[grepl("<-$", .funs)]
256 |
257 | tmpl <- function() {}
258 | environment(tmpl) <- parent.frame(2)
259 | for (.fun in .funs) {
260 | generic <- getGeneric(.fun)
261 | formals(tmpl) <- formals(generic)
262 | fmls <- as.list(formals(tmpl))
263 | fmls[] <- sapply(names(fmls), as.symbol)
264 | fmls[[generic@signature]] <- quote(rowRanges(x))
265 | if (.fun %in% endomorphisms)
266 | body(tmpl) <- substitute({
267 | rowRanges(x) <- do.call(FUN, ARGS)
268 | x
269 | }, list(FUN=.fun, ARGS=fmls))
270 | else
271 | body(tmpl) <-
272 | substitute(do.call(FUN, ARGS),
273 | list(FUN=as.symbol(.fun), ARGS=fmls))
274 | setMethod(.fun, "RangedSummarizedExperiment", tmpl)
275 | }
276 | })
277 |
278 | setMethod("granges", "RangedSummarizedExperiment",
279 | function(x, use.mcols=FALSE, ...)
280 | {
281 | if (!identical(use.mcols, FALSE))
282 | stop("\"granges\" method for RangedSummarizedExperiment objects ",
283 | "does not support the 'use.mcols' argument")
284 | rowRanges(x)
285 | })
286 |
287 | ## 2-argument dispatch:
288 | ## pcompare / Compare
289 | ##
290 | .RangedSummarizedExperiment.pcompare <-
291 | function(x, y)
292 | {
293 | if (is(x, "RangedSummarizedExperiment"))
294 | x <- rowRanges(x)
295 | if (is(y, "RangedSummarizedExperiment"))
296 | y <- rowRanges(y)
297 | pcompare(x, y)
298 | }
299 |
300 | .RangedSummarizedExperiment.Compare <-
301 | function(e1, e2)
302 | {
303 | if (is(e1, "RangedSummarizedExperiment"))
304 | e1 <- rowRanges(e1)
305 | if (is(e2, "RangedSummarizedExperiment"))
306 | e2 <- rowRanges(e2)
307 | callGeneric(e1=e1, e2=e2)
308 | }
309 |
310 | local({
311 | .signatures <- list(
312 | c("RangedSummarizedExperiment", "ANY"),
313 | c("ANY", "RangedSummarizedExperiment"),
314 | c("RangedSummarizedExperiment", "RangedSummarizedExperiment"))
315 |
316 | for (.sig in .signatures) {
317 | setMethod("pcompare", .sig, .RangedSummarizedExperiment.pcompare)
318 | setMethod("Compare", .sig, .RangedSummarizedExperiment.Compare)
319 | }
320 | })
321 |
322 | ## additional getters / setters
323 |
324 | setReplaceMethod("strand", "RangedSummarizedExperiment",
325 | function(x, ..., value)
326 | {
327 | strand(rowRanges(x)) <- value
328 | x
329 | })
330 |
331 | setReplaceMethod("ranges", "RangedSummarizedExperiment",
332 | function(x, ..., value)
333 | {
334 | ranges(rowRanges(x)) <- value
335 | x
336 | })
337 |
338 | ## order, rank, sort
339 |
340 | setMethod("is.unsorted", "RangedSummarizedExperiment",
341 | function(x, na.rm = FALSE, strictly = FALSE, ignore.strand = FALSE)
342 | {
343 | x <- rowRanges(x)
344 | if (!is(x, "GenomicRanges"))
345 | stop("is.unsorted() is not yet supported when 'rowRanges(x)' is a ",
346 | class(x), " object")
347 | callGeneric()
348 | })
349 |
350 | setMethod("order", "RangedSummarizedExperiment",
351 | function(..., na.last=TRUE, decreasing=FALSE,
352 | method=c("auto", "shell", "radix"))
353 | {
354 | args <- lapply(list(...), rowRanges)
355 | do.call("order", c(args, list(na.last=na.last,
356 | decreasing=decreasing,
357 | method=method)))
358 | })
359 |
360 | setMethod("rank", "RangedSummarizedExperiment",
361 | function(x, na.last = TRUE,
362 | ties.method = c("average", "first", "last", "random", "max", "min"))
363 | {
364 | ties.method <- match.arg(ties.method)
365 | rank(rowRanges(x), na.last=na.last, ties.method=ties.method)
366 | })
367 |
368 | setMethod("sort", "RangedSummarizedExperiment",
369 | function(x, decreasing = FALSE, ignore.strand = FALSE)
370 | {
371 | x_rowRanges <- rowRanges(x)
372 | if (!is(x_rowRanges, "GenomicRanges"))
373 | stop("sort() is not yet supported when 'rowRanges(x)' is a ",
374 | class(x_rowRanges), " object")
375 | oo <- GenomicRanges:::order_GenomicRanges(x_rowRanges,
376 | decreasing = decreasing,
377 | ignore.strand = ignore.strand)
378 | x[oo]
379 | })
380 |
381 | ## seqinfo (also seqlevels, genome, seqlevels<-, genome<-), seqinfo<-
382 |
383 | setMethod("seqinfo", "RangedSummarizedExperiment",
384 | function(x)
385 | {
386 | seqinfo(x@rowRanges)
387 | })
388 |
389 | .set_RangedSummarizedExperiment_seqinfo <-
390 | function(x, new2old=NULL,
391 | pruning.mode=c("error", "coarse", "fine", "tidy"),
392 | value)
393 | {
394 | if (!is(value, "Seqinfo"))
395 | stop("the supplied 'seqinfo' must be a Seqinfo object")
396 | pruning.mode <- match.arg(pruning.mode)
397 | if (pruning.mode == "fine") {
398 | if (is(x@rowRanges, "GenomicRanges"))
399 | stop(wmsg("\"fine\" pruning mode is not supported on ",
400 | class(x), " objects with a rowRanges component that ",
401 | "is a GRanges object or a GenomicRanges derivative"))
402 | } else {
403 | dangling_seqlevels <- GenomeInfoDb:::getDanglingSeqlevels(x@rowRanges,
404 | new2old=new2old,
405 | pruning.mode=pruning.mode,
406 | seqlevels(value))
407 | if (length(dangling_seqlevels) != 0L) {
408 | idx <- !(seqnames(x@rowRanges) %in% dangling_seqlevels)
409 | ## 'idx' should be either a logical vector or a list-like
410 | ## object where all the list elements are logical vectors (e.g.
411 | ## a LogicalList or RleList object). If the latter, we transform
412 | ## it into a logical vector.
413 | if (is(idx, "List")) {
414 | if (pruning.mode == "coarse") {
415 | idx <- all(idx) # "coarse" pruning
416 | } else {
417 | idx <- any(idx) | elementNROWS(idx) == 0L # "tidy" pruning
418 | }
419 | }
420 | ## 'idx' now guaranteed to be a logical vector.
421 | x <- x[idx]
422 | }
423 | }
424 | seqinfo(x@rowRanges, new2old=new2old, pruning.mode=pruning.mode) <- value
425 | if (is.character(msg <- .valid.RangedSummarizedExperiment(x)))
426 | stop(msg)
427 | x
428 | }
429 | setReplaceMethod("seqinfo", "RangedSummarizedExperiment",
430 | .set_RangedSummarizedExperiment_seqinfo
431 | )
432 |
433 | setMethod("split", "RangedSummarizedExperiment",
434 | function(x, f, drop=FALSE, ...)
435 | {
436 | splitAsList(x, f, drop=drop)
437 | })
438 |
439 |
440 | ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
441 | ### updateObject()
442 | ###
443 |
444 | .updateObject_RangedSummarizedExperiment <- function(object, ..., verbose=FALSE)
445 | {
446 | object <- callNextMethod() # call method for SummarizedExperiment objects
447 | object@rowRanges <- updateObject(object@rowRanges, ..., verbose=verbose)
448 | object
449 | }
450 |
451 | setMethod("updateObject", "RangedSummarizedExperiment",
452 | .updateObject_RangedSummarizedExperiment
453 | )
454 |
455 |
--------------------------------------------------------------------------------
/R/combine-methods.R:
--------------------------------------------------------------------------------
1 | # Contains methods for combineRows and combineCols. These serve as more
2 | # fault-tolerant relaxed counterparts to rbind and cbind, respectively.
3 |
4 | setMethod("combineRows", "SummarizedExperiment", function(x, ..., delayed=TRUE, fill=NA, use.names=TRUE) {
5 | all.se <- list(x, ...)
6 |
7 | # Combining the rowData.
8 | all.rd <- lapply(all.se, rowData)
9 | tryCatch({
10 | com.rd <- do.call(combineRows, all.rd)
11 | }, error=function(e) {
12 | stop(paste0("failed to combine rowData of SummarizedExperiment objects:\n ", conditionMessage(e)))
13 | })
14 |
15 | # Combining the colData. This constructs mappings of the columns for each
16 | # SE to the columns of the final object.
17 | all.cd <- lapply(all.se, colData)
18 | tryCatch({
19 | com.cd <- do.call(combineUniqueCols, c(all.cd, list(use.names=use.names)))
20 | }, error=function(e) {
21 | stop(paste0("failed to combine colData of SummarizedExperiment objects:\n ", conditionMessage(e)))
22 | })
23 |
24 | if (use.names) {
25 | # If combineUniqueCols succeeded, all SE's should have valid column names.
26 | all.names <- rownames(com.cd)
27 | mappings <- vector("list", length(all.se))
28 | for (i in seq_along(mappings)) {
29 | mappings[[i]] <- match(all.names, rownames(all.cd[[i]]))
30 | }
31 | } else {
32 | mappings <- NULL
33 | }
34 |
35 | args <- list(
36 | assays=combine_assays_by(all.se, mappings, delayed=delayed, fill=fill, by.row=TRUE),
37 | colData=com.cd,
38 | metadata=unlist(lapply(all.se, metadata), recursive=FALSE, use.names=FALSE),
39 | checkDimnames=FALSE
40 | )
41 |
42 | # Finally, filling in the rowRanges. Rows for SummarizedExperiment
43 | # inputs are filled in with empty GRangesList objects.
44 | extracted <- extract_granges_from_se(all.se)
45 |
46 | if (!is.null(extracted)) {
47 | filled.ranges <- FALSE
48 | for (i in seq_along(extracted)) {
49 | if (is.null(extracted[[i]])) {
50 | filled.ranges <- TRUE
51 | cur.se <- all.se[[i]]
52 | levels <- rownames(cur.se)
53 | if (is.null(levels)) {
54 | levels <- seq_len(nrow(cur.se))
55 | }
56 | rr <- splitAsList(GRanges(), factor(character(0), levels))
57 | if (is.null(rownames(cur.se))) {
58 | names(rr) <- NULL
59 | }
60 | extracted[[i]] <- rr
61 | }
62 | }
63 |
64 | if (filled.ranges) {
65 | for (s in seq_along(extracted)) {
66 | extracted[[s]] <- as(extracted[[s]], "GRangesList")
67 | }
68 | }
69 |
70 | com.rr <- do.call(c, extracted)
71 | mcols(com.rr) <- com.rd
72 | args$rowRanges <- com.rr
73 | } else {
74 | args$rowData <- com.rd
75 | }
76 |
77 | # Assembling the SE.
78 | do.call(SummarizedExperiment, args)
79 | })
80 |
81 | combine_assays_by <- function(all.se, mappings, delayed, fill, by.row) {
82 | all.assays <- lapply(all.se, assays, withDimnames=FALSE)
83 | each.assay.names <- lapply(all.assays, names)
84 | no.assay.names <- vapply(each.assay.names, is.null, TRUE)
85 |
86 | if (by.row) {
87 | INFLATE <- inflate_matrix_by_column
88 | } else {
89 | INFLATE <- inflate_matrix_by_row
90 | }
91 |
92 | if (any(no.assay.names)) {
93 | if (!all(no.assay.names)) {
94 | stop("named and unnamed assays cannot be mixed")
95 | }
96 | n.assays <- unique(lengths(all.assays))
97 | if (length(n.assays)!=1L) {
98 | stop("all SummarizedExperiments should have the same number of unnamed assays")
99 | }
100 |
101 | for (s in seq_along(all.se)) {
102 | all.assays[[s]] <- lapply(all.assays[[s]], FUN=INFLATE,
103 | idx=mappings[[s]], delayed=delayed, fill=fill)
104 | }
105 | } else {
106 | all.assay.names <- Reduce(union, each.assay.names)
107 | for (s in seq_along(all.se)) {
108 | cur.se <- all.se[[s]]
109 | cur.assays <- all.assays[[s]]
110 | idx <- mappings[[s]]
111 |
112 | # Filling in all missing assay names and columns.
113 | for (a in all.assay.names) {
114 | if (a %in% names(cur.assays)) {
115 | mat <- INFLATE(cur.assays[[a]], idx, delayed=delayed, fill=fill)
116 | } else {
117 | nr <- nrow(cur.se)
118 | nc <- ncol(cur.se)
119 | if (!is.null(idx)) {
120 | if (by.row) {
121 | nc <- length(idx)
122 | } else {
123 | nr <- length(idx)
124 | }
125 | }
126 | mat <- create_dummy_matrix(nr, nc, delayed=delayed, fill=fill)
127 | }
128 | cur.assays[[a]] <- mat
129 | }
130 | all.assays[[s]] <- cur.assays
131 | }
132 | }
133 |
134 | # Re-use assay r/cbind'ing machinery.
135 | all.assays <- lapply(all.assays, Assays)
136 | if (by.row) {
137 | combined <- do.call(rbind, all.assays)
138 | } else {
139 | combined <- do.call(cbind, all.assays)
140 | }
141 | as(combined, "SimpleList")
142 | }
143 |
144 | create_dummy_matrix <- function(nr, nc, delayed, fill) {
145 | if (!delayed) {
146 | array(c(nr, nc), data=fill)
147 | } else {
148 | ConstantArray(c(nr, nc), value=fill)
149 | }
150 | }
151 |
152 | inflate_matrix_by_column <- function(mat, idx, delayed, fill) {
153 | if (delayed) {
154 | mat <- DelayedArray(mat)
155 | }
156 | if (!is.null(idx)) {
157 | absent <- is.na(idx)
158 | if (any(absent)) {
159 | idx[absent] <- ncol(mat)+1L
160 | mat <- cbind(mat, create_dummy_matrix(nrow(mat), 1L, delayed, fill))
161 | }
162 | mat <- mat[,idx,drop=FALSE]
163 | }
164 | mat
165 | }
166 |
167 | inflate_matrix_by_row <- function(mat, idx, delayed, fill) {
168 | if (delayed) {
169 | mat <- DelayedArray(mat)
170 | }
171 | if (!is.null(idx)) {
172 | absent <- is.na(idx)
173 | if (any(absent)) {
174 | idx[absent] <- nrow(mat)+1L
175 | mat <- rbind(mat, create_dummy_matrix(1L, ncol(mat), delayed, fill))
176 | }
177 | mat <- mat[idx,,drop=FALSE]
178 | }
179 | mat
180 | }
181 |
182 | extract_granges_from_se <- function(all.se) {
183 | has.ranges <- vapply(all.se, is, class2="RangedSummarizedExperiment", FUN.VALUE=TRUE)
184 | if (!any(has.ranges)) {
185 | return(NULL)
186 | }
187 |
188 | final.rr <- vector("list", length(all.se))
189 | for (s in which(has.ranges)) {
190 | cur.se <- all.se[[s]]
191 | rr <- rowRanges(cur.se)
192 | mcols(rr) <- NULL
193 | names(rr) <- rownames(cur.se)
194 | final.rr[[s]] <- rr
195 | }
196 |
197 | # Coercing everyone to a GRL if anyone is a GRL. Note that we don't fill in
198 | # NULLs with GRLs yet, to give a chance for the caller to decide how to
199 | # handle them (e.g., fill in combineRows or merge in combineCols).
200 | is.grl <- vapply(final.rr[has.ranges], function(x) is(x, "GRangesList"), TRUE)
201 | if (any(is.grl)) {
202 | for (s in which(has.ranges)) {
203 | final.rr[[s]] <- as(final.rr[[s]], "GRangesList")
204 | }
205 | }
206 |
207 | final.rr
208 | }
209 |
210 | setMethod("combineCols", "SummarizedExperiment", function(x, ..., delayed=TRUE, fill=NA, use.names=TRUE) {
211 | all.se <- list(x, ...)
212 |
213 | # Combining the rowData. This constructs mappings of the rows for each
214 | # SE to the columns of the final object.
215 | all.rd <- lapply(all.se, rowData)
216 | tryCatch({
217 | com.rd <- do.call(combineUniqueCols, c(all.rd, list(use.names=use.names)))
218 | }, error=function(e) {
219 | stop(paste0("failed to combine rowData of SummarizedExperiment objects:\n ", conditionMessage(e)))
220 | })
221 |
222 | # Combining the colData.
223 | all.cd <- lapply(all.se, colData)
224 | tryCatch({
225 | com.cd <- do.call(combineRows, all.cd)
226 | }, error=function(e) {
227 | stop(paste0("failed to combine colData of SummarizedExperiment objects:\n ", conditionMessage(e)))
228 | })
229 |
230 | if (use.names) {
231 | # If combineUniqueCols succeeded for the rowData, all SE's should have valid row names.
232 | all.names <- rownames(com.rd)
233 | mappings <- vector("list", length(all.se))
234 | for (i in seq_along(mappings)) {
235 | mappings[[i]] <- match(all.names, rownames(all.rd[[i]]))
236 | }
237 | } else {
238 | mappings <- NULL
239 | }
240 |
241 | args <- list(
242 | assays=combine_assays_by(all.se, mappings, delayed=delayed, fill=fill, by.row=FALSE),
243 | colData=com.cd,
244 | metadata=unlist(lapply(all.se, metadata), recursive=FALSE, use.names=FALSE),
245 | checkDimnames=FALSE
246 | )
247 |
248 | com.rr <- merge_granges_from_se(all.se, mappings)
249 | if (!is.null(com.rr)) {
250 | mcols(com.rr) <- com.rd
251 | names(com.rr) <- rownames(com.rd)
252 | args$rowRanges <- com.rr
253 | } else {
254 | args$rowData <- com.rd
255 | }
256 |
257 | # Assembling the SE.
258 | do.call(SummarizedExperiment, args)
259 | })
260 |
261 | merge_granges_from_se <- function(all.se, mappings) {
262 | extracted <- extract_granges_from_se(all.se)
263 | if (is.null(extracted)) {
264 | return(NULL)
265 | }
266 |
267 | has.ranges <- which(!vapply(extracted, is.null, FALSE))
268 | extracted.ranges <- extracted[has.ranges]
269 |
270 | if (!is.null(mappings)) {
271 | # We concatenate everything to automatically merge the seqinfo. We
272 | # then create a container for the filling process.
273 | temp.rr <- do.call(c, extracted.ranges)
274 | names(temp.rr) <- NULL
275 | nentries <- lengths(extracted.ranges)
276 | starts <- cumsum(c(0L, nentries))
277 |
278 | com.rr <- temp.rr
279 | if (length(temp.rr) > 0) {
280 | com.rr <- temp.rr[rep(1L, length(mappings[[1]]))]
281 | }
282 | filled <- logical(length(com.rr))
283 |
284 | for (i in seq_along(extracted.ranges)) {
285 | idx <- mappings[[has.ranges[i]]]
286 | candidates <- temp.rr[starts[i] + seq_len(nentries[i])]
287 |
288 | available <- which(!is.na(idx))
289 | new.rr <- candidates[idx[available]]
290 | old.rr <- com.rr[available]
291 |
292 | existing <- filled[available]
293 | if (!identical(new.rr[existing], old.rr[existing])) {
294 | warning(wmsg("different 'rowRanges' for shared rows across SummarizedExperiment objects, ",
295 | "ignoring 'rowRanges' for ", class(all.se[[i]])[1], " ", i))
296 | next
297 | }
298 |
299 | com.rr[available[!existing]] <- new.rr[!existing]
300 | filled[available[!existing]] <- TRUE
301 | }
302 |
303 | if (!all(filled)) {
304 | com.rr <- as(com.rr, "GRangesList")
305 | com.rr[!filled] <- GRangesList(GRanges())
306 | }
307 | } else {
308 | if (length(unique(extracted.ranges)) > 1) {
309 | warning(wmsg("'rowRanges' are not identical across input objects, ",
310 | "using 'rowRanges' from the first object only"))
311 | }
312 | com.rr <- extracted.ranges[[1]]
313 | }
314 |
315 | com.rr
316 | }
317 |
--------------------------------------------------------------------------------
/R/coverage-methods.R:
--------------------------------------------------------------------------------
1 | ### =========================================================================
2 | ### "coverage" method
3 | ### -------------------------------------------------------------------------
4 | ###
5 |
6 |
7 | setMethod("coverage", "RangedSummarizedExperiment",
8 | function(x, shift=0L, width=NULL, weight=1L,
9 | method=c("auto", "sort", "hash"))
10 | {
11 | x <- rowRanges(x)
12 | callGeneric()
13 | }
14 | )
15 |
16 |
--------------------------------------------------------------------------------
/R/findOverlaps-methods.R:
--------------------------------------------------------------------------------
1 | ### =========================================================================
2 | ### findOverlaps methods
3 | ### -------------------------------------------------------------------------
4 |
5 |
6 | ### findOverlaps
7 |
8 | setMethod("findOverlaps", c("RangedSummarizedExperiment", "Vector"),
9 | function(query, subject, maxgap=-1L, minoverlap=0L,
10 | type=c("any", "start", "end", "within", "equal"),
11 | select=c("all", "first", "last", "arbitrary"),
12 | ignore.strand=FALSE)
13 | {
14 | query <- rowRanges(query)
15 | callGeneric()
16 | }
17 | )
18 |
19 | setMethod("findOverlaps", c("Vector", "RangedSummarizedExperiment"),
20 | function(query, subject, maxgap=-1L, minoverlap=0L,
21 | type=c("any", "start", "end", "within", "equal"),
22 | select=c("all", "first", "last", "arbitrary"),
23 | ignore.strand=FALSE)
24 | {
25 | subject <- rowRanges(subject)
26 | callGeneric()
27 | }
28 | )
29 |
30 | setMethod("findOverlaps", c("RangedSummarizedExperiment",
31 | "RangedSummarizedExperiment"),
32 | function(query, subject, maxgap=-1L, minoverlap=0L,
33 | type=c("any", "start", "end", "within", "equal"),
34 | select=c("all", "first", "last", "arbitrary"),
35 | ignore.strand=FALSE)
36 | {
37 | query <- rowRanges(query)
38 | subject <- rowRanges(subject)
39 | callGeneric()
40 | }
41 | )
42 |
43 |
--------------------------------------------------------------------------------
/R/inter-range-methods.R:
--------------------------------------------------------------------------------
1 | ### =========================================================================
2 | ### Inter-range methods
3 | ### -------------------------------------------------------------------------
4 | ###
5 |
6 |
7 | setMethod("isDisjoint", "RangedSummarizedExperiment",
8 | function(x, ignore.strand=FALSE)
9 | {
10 | x <- rowRanges(x)
11 | callGeneric()
12 | }
13 | )
14 |
15 | setMethod("disjointBins", "RangedSummarizedExperiment",
16 | function(x, ignore.strand = FALSE)
17 | {
18 | x <- rowRanges(x)
19 | callGeneric()
20 | }
21 | )
22 |
23 |
--------------------------------------------------------------------------------
/R/intra-range-methods.R:
--------------------------------------------------------------------------------
1 | ### =========================================================================
2 | ### Intra-range methods
3 | ### -------------------------------------------------------------------------
4 | ###
5 |
6 |
7 | setMethod("shift", "RangedSummarizedExperiment",
8 | function(x, shift=0L, use.names=TRUE)
9 | {
10 | x0 <- x
11 | x <- rowRanges(x)
12 | rowRanges(x0) <- callGeneric()
13 | x0
14 | }
15 | )
16 |
17 | setMethod("narrow", "RangedSummarizedExperiment",
18 | function(x, start=NA, end=NA, width=NA, use.names=TRUE)
19 | {
20 | x0 <- x
21 | x <- rowRanges(x)
22 | rowRanges(x0) <- callGeneric()
23 | x0
24 | }
25 | )
26 |
27 | setMethod("resize", "RangedSummarizedExperiment",
28 | function(x, width, fix="start", use.names=TRUE, ignore.strand=FALSE)
29 | {
30 | x0 <- x
31 | x <- rowRanges(x)
32 | rowRanges(x0) <- callGeneric()
33 | x0
34 | }
35 | )
36 |
37 | setMethod("flank", "RangedSummarizedExperiment",
38 | function(x, width, start=TRUE, both=FALSE, use.names=TRUE,
39 | ignore.strand=FALSE)
40 | {
41 | x0 <- x
42 | x <- rowRanges(x)
43 | rowRanges(x0) <- callGeneric()
44 | x0
45 | }
46 | )
47 |
48 | setMethod("promoters", "RangedSummarizedExperiment",
49 | function(x, upstream=2000, downstream=200)
50 | {
51 | x0 <- x
52 | x <- rowRanges(x)
53 | rowRanges(x0) <- callGeneric()
54 | x0
55 | }
56 | )
57 |
58 | setMethod("terminators", "RangedSummarizedExperiment",
59 | function(x, upstream=2000, downstream=200)
60 | {
61 | x0 <- x
62 | x <- rowRanges(x)
63 | rowRanges(x0) <- callGeneric()
64 | x0
65 | }
66 | )
67 |
68 | ### Because 'keep.all.ranges' is FALSE by default, it will break if some
69 | ### ranges are dropped.
70 | setMethod("restrict", "RangedSummarizedExperiment",
71 | function(x, start=NA, end=NA, keep.all.ranges=FALSE, use.names=TRUE)
72 | {
73 | x0 <- x
74 | x <- rowRanges(x)
75 | rowRanges(x0) <- callGeneric()
76 | x0
77 | }
78 | )
79 |
80 | setMethod("trim", "RangedSummarizedExperiment",
81 | function(x, use.names=TRUE)
82 | {
83 | x0 <- x
84 | x <- rowRanges(x)
85 | rowRanges(x0) <- callGeneric()
86 | x0
87 | }
88 | )
89 |
90 |
--------------------------------------------------------------------------------
/R/makeSummarizedExperimentFromDataFrame.R:
--------------------------------------------------------------------------------
1 | ### =========================================================================
2 | ### makeSummarizedExperimentFromDataFrame()
3 | ### -------------------------------------------------------------------------
4 |
5 | ### 'df' must be a data.frame or DataFrame object.
6 | makeSummarizedExperimentFromDataFrame <-
7 | function(df,
8 | ...,
9 | seqinfo = NULL,
10 | starts.in.df.are.0based = FALSE)
11 | {
12 | rowRanges <- makeGRangesFromDataFrame(
13 | df,
14 | ...,
15 | keep.extra.columns = FALSE,
16 | seqinfo = seqinfo,
17 | starts.in.df.are.0based = starts.in.df.are.0based)
18 |
19 | # Find column names for rowRanges
20 | granges_cols <-
21 | GenomicRanges:::.find_GRanges_cols(names(df), ...)
22 |
23 | rangedNames <- names(df)[na.omit(granges_cols)]
24 | idx <- match(rangedNames, names(df))
25 | counts <- as.matrix(df[, -idx, drop = FALSE])
26 |
27 | if (!is(as.vector(counts), "numeric"))
28 | stop("failed to coerce non-range columns to 'numeric'")
29 |
30 | SummarizedExperiment(
31 | assays=SimpleList(counts), rowRanges=rowRanges)
32 | }
33 |
--------------------------------------------------------------------------------
/R/makeSummarizedExperimentFromExpressionSet.R:
--------------------------------------------------------------------------------
1 | ##
2 | ## makeSummarizedExperimentFromExpressionSet
3 |
4 | ## coercion
5 |
6 | .from_rowRanges_to_FeatureData <- function(from)
7 | {
8 | if (is(from, "GRanges")) {
9 | fd <- .from_GRanges_to_FeatureData(from)
10 | } else if (is(from, "GRangesList")) {
11 | fd <- .from_GRangesList_to_FeatureData(from)
12 | } else {
13 | stop("class ", sQuote(class(from)),
14 | " is not a supported type for rowRanges coercion")
15 | }
16 | featureNames(fd) <- names(from)
17 | fd
18 | }
19 |
20 | .from_GRanges_to_FeatureData <- function(from)
21 | {
22 | data <- as.data.frame(from)
23 |
24 | ## the first mcols are automatically included in the data.frame from
25 | ## as.data.frame, the secondary mcols holds the metadata for the first
26 | ## metadata columns.
27 | metaData <- mcols(mcols(from, use.names=FALSE), use.names=FALSE)
28 | if (is.null(metaData)) {
29 | metaData <- as.data.frame(matrix(ncol=0, nrow=NCOL(data)))
30 | } else {
31 | metaData <- as.data.frame(metaData)
32 | }
33 | AnnotatedDataFrame(data, metaData)
34 | }
35 | .from_GRangesList_to_FeatureData <- function(from)
36 | {
37 | data <- as.data.frame(mcols(from, use.names=FALSE))
38 |
39 | ## the first mcols are automatically included in the data.frame from
40 | ## as.data.frame, the secondary mcols holds the metadata for the first
41 | ## metadata columns.
42 | metaData <- mcols(mcols(from, use.names=FALSE), use.names=FALSE)
43 | if (is.null(metaData)) {
44 | metaData <- as.data.frame(matrix(ncol=0, nrow=NCOL(data)))
45 | } else {
46 | metaData <- as.data.frame(metaData)
47 | }
48 | AnnotatedDataFrame(data, metaData)
49 | }
50 |
51 | .from_AnnotatedDataFrame_to_DataFrame <- function(from)
52 | {
53 | df <- DataFrame(pData(from), row.names=rownames(from))
54 | mcols(df) <- DataFrame(varMetadata(from))
55 | df
56 | }
57 |
58 | .from_DataFrame_to_AnnotatedDataFrame <- function(df)
59 | {
60 | data <- as(df, "data.frame")
61 | metaData <- mcols(df, use.names=FALSE)
62 | if (is.null(metaData)) {
63 | metaData <- as.data.frame(matrix(ncol=0, nrow=NCOL(data)))
64 | } else {
65 | metaData <- as(metaData, "data.frame")
66 | }
67 | AnnotatedDataFrame(data, metaData)
68 | }
69 |
70 | ## If the ExpressionSet has featureData with range information make
71 | ## GRanges out of that, otherwise make an empty GRangesList with names
72 | ## from the featureNames
73 | naiveRangeMapper <- function(from)
74 | {
75 | nms <- featureNames(from)
76 | res <- tryCatch({
77 | makeGRangesFromDataFrame(pData(featureData(from)),
78 | keep.extra.columns = TRUE)
79 | }, error = function(e) {
80 | res <- relist(GRanges(), vector("list", length=length(nms)))
81 | mcols(res) <- .from_AnnotatedDataFrame_to_DataFrame(featureData(from))
82 | res
83 | })
84 | names(res) <- nms
85 | res
86 | }
87 |
88 | # Simple ProbeId to Range mapper
89 | # Probes with multiple ranges are dropped
90 | # The sign of the chromosome location is assumed to contain the strand
91 | # information
92 | probeRangeMapper <- function(from)
93 | {
94 | annotation <- annotation(from)
95 | if (identical(annotation, character(0))) {
96 | return(naiveRangeMapper(from))
97 | }
98 | if (requireNamespace("annotate", quietly = TRUE)) {
99 | annotationPackage <- annotate::annPkgName(annotation)
100 | test <- require(annotationPackage, character.only = TRUE,
101 | quietly = TRUE)
102 | if (test) {
103 | db <- get(annotationPackage, envir = asNamespace(annotationPackage))
104 | pid <- featureNames(from)
105 | locs <- AnnotationDbi::select(
106 | db, pid, columns = c("CHR", "CHRLOC", "CHRLOCEND"))
107 | locs <- na.omit(locs)
108 | dups <- duplicated(locs$PROBEID)
109 | if (any(dups)) {
110 | locs <- locs[!dups, , drop = FALSE]
111 | }
112 | strand <- ifelse(locs$CHRLOC > 0, "+", "-")
113 | res <- GRanges(seqnames = locs$CHR,
114 | ranges = IRanges(abs(locs$CHRLOC),
115 | abs(locs$CHRLOCEND)),
116 | strand = strand)
117 | names(res) <- locs$PROBEID
118 |
119 | if (NROW(res) < length(pid)) {
120 | warning(length(pid) - NROW(res),
121 | " probes could not be mapped.", call. = FALSE)
122 | }
123 | res
124 | } else {
125 | stop("Failed to load ", sQuote(annotationPackage), " package",
126 | call. = FALSE)
127 | }
128 | } else {
129 | stop("Failed to load annotate package", call. = FALSE)
130 | }
131 | }
132 |
133 | # Simple ProbeId to Gene mapper
134 | # Is there a way to get the txDb given the annotation package?
135 | geneRangeMapper <- function(txDbPackage, key = "ENTREZID")
136 | {
137 | function(from) {
138 | annotation <- annotation(from)
139 | if (identical(annotation, character(0))) {
140 | return(naiveRangeMapper(from))
141 | }
142 | if (requireNamespace("annotate", quietly = TRUE)) {
143 | annotationPackage <- annotate::annPkgName(annotation)
144 | test <- require(annotationPackage, character.only = TRUE,
145 | quietly = TRUE)
146 | if (test) {
147 | db <- get(annotationPackage,
148 | envir = asNamespace(annotationPackage))
149 | pid <- featureNames(from)
150 | probeIdToGeneId <-
151 | AnnotationDbi::mapIds(db, pid, key, "PROBEID")
152 | geneIdToProbeId <-
153 | setNames(names(probeIdToGeneId), probeIdToGeneId)
154 |
155 | if (requireNamespace(txDbPackage, quietly = TRUE)) {
156 | txDb <- get(txDbPackage, envir = asNamespace(txDbPackage))
157 | genes <- GenomicFeatures::genes(txDb)
158 | probesWithAMatch <-
159 | probeIdToGeneId[probeIdToGeneId %in% names(genes)]
160 | res <- genes[probesWithAMatch]
161 | names(res) <- geneIdToProbeId[names(res)]
162 | if (NROW(res) < length(pid)) {
163 | warning(length(pid) - NROW(res),
164 | " probes could not be mapped.", call. = FALSE)
165 | }
166 | res
167 | } else {
168 | stop("Failed to load ", sQuote(txDbPackage), " package",
169 | call. = FALSE)
170 | }
171 |
172 | } else {
173 | stop("Failed to load ", sQuote(annotationPackage), " package",
174 | call. = FALSE)
175 | }
176 | } else {
177 | stop("Failed to load annotate package", call. = FALSE)
178 | }
179 | }
180 | }
181 |
182 | makeSummarizedExperimentFromExpressionSet <-
183 | function(from, mapFun = naiveRangeMapper, ...)
184 | {
185 | mapFun <- match.fun(mapFun)
186 | rowRanges <- mapFun(from, ...)
187 | matches <- match(names(rowRanges),
188 | featureNames(from),
189 | nomatch = 0)
190 | from <- from[matches, drop = FALSE]
191 | assays <- as.list(assayData(from))
192 | colData <- .from_AnnotatedDataFrame_to_DataFrame(phenoData(from))
193 | metadata <- SimpleList(
194 | experimentData = experimentData(from),
195 | annotation = annotation(from),
196 | protocolData = protocolData(from)
197 | )
198 |
199 | SummarizedExperiment(
200 | assays = assays,
201 | rowRanges = rowRanges,
202 | colData = colData,
203 | metadata = metadata
204 | )
205 | }
206 |
207 | setAs("ExpressionSet", "RangedSummarizedExperiment", function(from)
208 | {
209 | makeSummarizedExperimentFromExpressionSet(from)
210 | })
211 |
212 | setAs("ExpressionSet", "SummarizedExperiment", function(from)
213 | {
214 | as(makeSummarizedExperimentFromExpressionSet(from), "SummarizedExperiment")
215 | })
216 |
217 | setAs("RangedSummarizedExperiment", "ExpressionSet",
218 | function(from)
219 | {
220 | assayData <- list2env(as.list(assays(from)))
221 |
222 | numAssays <- length(assayData)
223 |
224 | if (numAssays == 0) {
225 | assayData$exprs <- new("matrix")
226 | } else if (!"exprs" %in% ls(assayData)) {
227 | ## if there isn't an exprs assay we need to pick one as exprs,
228 | ## so rename the first element exprs and issue a warning.
229 | exprs <- ls(assayData)[[1]]
230 | warning("No assay named ", sQuote("exprs"), " found, renaming ",
231 | exprs, " to ", sQuote("exprs"), ".")
232 | assayData[["exprs"]] <- assayData[[exprs]]
233 | rm(list=exprs, envir=assayData)
234 | }
235 | lockEnvironment(assayData, bindings = TRUE)
236 |
237 | featureData <- .from_rowRanges_to_FeatureData(rowRanges(from))
238 | phenoData <- .from_DataFrame_to_AnnotatedDataFrame(colData(from))
239 |
240 | metadata <- metadata(from)
241 |
242 | experimentData <- if (!is.null(metadata$experimentData)) {
243 | metadata$experimentData
244 | } else {
245 | MIAME()
246 | }
247 |
248 | annotation <- if (!is.null(metadata$annotation)) {
249 | metadata$annotation
250 | } else {
251 | character()
252 | }
253 |
254 | protocolData <- if (!is.null(metadata$protocolData)) {
255 | metadata$protocolData
256 | } else {
257 | annotatedDataFrameFrom(assayData, byrow=FALSE)
258 | }
259 |
260 | ExpressionSet(assayData,
261 | phenoData = phenoData,
262 | featureData = featureData,
263 | experimentData = experimentData,
264 | annotation = annotation,
265 | protocolData = protocolData
266 | )
267 | })
268 |
269 | setAs("SummarizedExperiment", "ExpressionSet", function(from)
270 | as(as(from, "RangedSummarizedExperiment"), "ExpressionSet")
271 | )
272 |
--------------------------------------------------------------------------------
/R/makeSummarizedExperimentFromLoom.R:
--------------------------------------------------------------------------------
1 | .loom_make_rownames <- function(df, colname) {
2 | rownames(df) <- df[[colname]]
3 | df[, -match(colname, colnames(df)), drop = FALSE]
4 | }
5 |
6 | makeSummarizedExperimentFromLoom <-
7 | function(file, rownames_attr = NULL, colnames_attr = NULL)
8 | {
9 | stopifnot(file.exists(file))
10 |
11 | ls <- rhdf5::h5ls(file)
12 | rowColnames <- ls[ls$group == "/row_attrs", "name", drop=TRUE]
13 | colColnames <- ls[ls$group == "/col_attrs", "name", drop=TRUE]
14 | stopifnot(
15 | is.null(rownames_attr) || rownames_attr %in% rowColnames,
16 | is.null(colnames_attr) || colnames_attr %in% colColnames
17 | )
18 |
19 | assay <- t(HDF5Array::HDF5Array(file, "/matrix"))
20 | layerNames <- ls[ls$group == "/layers", "name", drop = TRUE]
21 | layers <- lapply(setNames(layerNames, layerNames), function(layer) {
22 | layer <- paste0("/layers/", layer)
23 | t(HDF5Array::HDF5Array(file, layer))
24 | })
25 | assays <- c(list(matrix = assay), layers)
26 |
27 | rowData <- DataFrame(rhdf5::h5read(file, "row_attrs"))
28 | if (!is.null(rownames_attr))
29 | rowData <- .loom_make_rownames(rowData, rownames_attr)
30 |
31 | colData <- DataFrame(rhdf5::h5read(file, "col_attrs"))
32 | if (!is.null(colnames_attr))
33 | colData <- .loom_make_rownames(colData, colnames_attr)
34 |
35 | se <- SummarizedExperiment(assays, rowData = rowData, colData = colData)
36 | metadata(se) <- rhdf5::h5readAttributes(file, "/")
37 | se
38 | }
39 |
--------------------------------------------------------------------------------
/R/nearest-methods.R:
--------------------------------------------------------------------------------
1 | ### =========================================================================
2 | ### nearest (and related) methods
3 | ### -------------------------------------------------------------------------
4 | ###
5 |
6 |
7 | ### precede & follow
8 |
9 | for (f in c("precede", "follow")) {
10 | setMethod(f, c("RangedSummarizedExperiment", "ANY"),
11 | function(x, subject, select=c("arbitrary", "all"), ignore.strand=FALSE)
12 | {
13 | x <- rowRanges(x)
14 | callGeneric()
15 | }
16 | )
17 | setMethod(f, c("ANY", "RangedSummarizedExperiment"),
18 | function(x, subject, select=c("arbitrary", "all"), ignore.strand=FALSE)
19 | {
20 | subject <- rowRanges(subject)
21 | callGeneric()
22 | }
23 | )
24 | setMethod(f, c("RangedSummarizedExperiment", "RangedSummarizedExperiment"),
25 | function(x, subject, select=c("arbitrary", "all"), ignore.strand=FALSE)
26 | {
27 | x <- rowRanges(x)
28 | subject <- rowRanges(subject)
29 | callGeneric()
30 | }
31 | )
32 | }
33 |
34 | ### nearest
35 |
36 | setMethod("nearest", c("RangedSummarizedExperiment", "ANY"),
37 | function(x, subject, select=c("arbitrary", "all"), ignore.strand=FALSE)
38 | {
39 | x <- rowRanges(x)
40 | callGeneric()
41 | }
42 | )
43 |
44 | setMethod("nearest", c("ANY", "RangedSummarizedExperiment"),
45 | function(x, subject, select=c("arbitrary", "all"), ignore.strand=FALSE)
46 | {
47 | subject <- rowRanges(subject)
48 | callGeneric()
49 | }
50 | )
51 |
52 | setMethod("nearest", c("RangedSummarizedExperiment",
53 | "RangedSummarizedExperiment"),
54 | function(x, subject, select=c("arbitrary", "all"), ignore.strand=FALSE)
55 | {
56 | x <- rowRanges(x)
57 | subject <- rowRanges(subject)
58 | callGeneric()
59 | }
60 | )
61 |
62 | ### distance
63 |
64 | setMethod("distance", c("RangedSummarizedExperiment", "ANY"),
65 | function(x, y, ignore.strand=FALSE, ...)
66 | {
67 | x <- rowRanges(x)
68 | callGeneric()
69 | }
70 | )
71 |
72 | setMethod("distance", c("ANY", "RangedSummarizedExperiment"),
73 | function(x, y, ignore.strand=FALSE, ...)
74 | {
75 | y <- rowRanges(y)
76 | callGeneric()
77 | }
78 | )
79 |
80 | setMethod("distance", c("RangedSummarizedExperiment",
81 | "RangedSummarizedExperiment"),
82 | function(x, y, ignore.strand=FALSE, ...)
83 | {
84 | x <- rowRanges(x)
85 | y <- rowRanges(y)
86 | callGeneric()
87 | }
88 | )
89 |
90 | ### distanceToNearest
91 |
92 | setMethod("distanceToNearest", c("RangedSummarizedExperiment", "ANY"),
93 | function(x, subject, ignore.strand=FALSE, ...)
94 | {
95 | x <- rowRanges(x)
96 | callGeneric()
97 | }
98 | )
99 |
100 | setMethod("distanceToNearest", c("ANY", "RangedSummarizedExperiment"),
101 | function(x, subject, ignore.strand=FALSE, ...)
102 | {
103 | subject <- rowRanges(subject)
104 | callGeneric()
105 | }
106 | )
107 |
108 | setMethod("distanceToNearest", c("RangedSummarizedExperiment",
109 | "RangedSummarizedExperiment"),
110 | function(x, subject, ignore.strand=FALSE, ...)
111 | {
112 | x <- rowRanges(x)
113 | subject <- rowRanges(subject)
114 | callGeneric()
115 | }
116 | )
117 |
118 |
--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .test <- function() BiocGenerics:::testPackage("SummarizedExperiment")
2 |
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [
](https://bioconductor.org/packages/SummarizedExperiment)
2 |
3 | [
](https://bioconductor.org/)
4 |
5 | **SummarizedExperiment** is an R/Bioconductor package that implements a container (S4 class) for matrix-like assays.
6 |
7 | See https://bioconductor.org/packages/SummarizedExperiment for more information including how to install the release version of the package (please refrain from installing directly from GitHub).
8 |
9 | _SummarizedExperiment sticker courtesy of [Mike Love](https://github.com/mikelove)._
10 |
--------------------------------------------------------------------------------
/inst/extdata/example.loom:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/SummarizedExperiment/494ca9fc060b433bed2dca9b12ca8d7be4375140/inst/extdata/example.loom
--------------------------------------------------------------------------------
/inst/scripts/Find_and_update_objects/README:
--------------------------------------------------------------------------------
1 | Find and update objects
2 | =======================
3 |
4 | This document describes the procedure for finding and updating serialized
5 | objects in the rda files located in a directory hierarchy. This procedure
6 | consists of 4 STEPS, as described below.
7 |
8 | Note that this procedure was developped and used to update all "old"
9 | serialized SummarizedExperiment objects located in the data-experiment svn
10 | repository. It should be easy to adapt to update other types of objects in
11 | other locations.
12 |
13 | Also note that the purpose of STEPS 1 & 2 & 3 below is to collect the list
14 | of rda objects to update. Since this can take a long time, the 2 lists
15 | obtained for the "old" serialized SummarizedExperiment objects located
16 | in https://hedgehog.fhcrc.org/bioc-data/trunk/experiment/pkgs and
17 | https://hedgehog.fhcrc.org/bioc-data/trunk/experiment/data_store were saved
18 | to the pkgs_RDA_OBJECTS_TO_UPDATE and data_store_RDA_OBJECTS_TO_UPDATE
19 | files, respectively, and these 2 files placed in this folder. So in case
20 | these objects need to be updated again, these 2 files can be re-used to run
21 | STEP 4 directly without the need to re-run STEPS 1 & 2 & 3.
22 |
23 | STEP 1: Collect the rda files.
24 |
25 | cd
26 | find . -type d -name '.git' -prune -o -type f -print | \
27 | grep -Ei '\.(rda|RData)$' >RDA_FILES
28 |
29 | STEP 2: Collect the rda objects.
30 |
31 | cd
32 |
33 | R="$HOME/bin/R-3.2"
34 | R_SCRIPT="scriptfile <- system.file('scripts', 'Find_and_update_objects', "
35 | R_SCRIPT="$R_SCRIPT 'collect_rda_objects.R', "
36 | R_SCRIPT="$R_SCRIPT package='SummarizedExperiment', mustWork=TRUE)"
37 | R_SCRIPT="$R_SCRIPT; source(scriptfile)"
38 | echo "$R_SCRIPT" | $R --vanilla >collect_rda_objects.log 2>&1 &
39 |
40 | STEP 3: Collect the rda objects to update.
41 |
42 | cd
43 |
44 | R="$HOME/bin/R-3.2"
45 | R_SCRIPT="scriptfile <- system.file('scripts', 'Find_and_update_objects', "
46 | R_SCRIPT="$R_SCRIPT 'collect_rda_objects_to_update.R', "
47 | R_SCRIPT="$R_SCRIPT package='SummarizedExperiment', mustWork=TRUE)"
48 | R_SCRIPT="$R_SCRIPT; source(scriptfile)"
49 | echo "$R_SCRIPT" | $R --vanilla >collect_rda_objects_to_update.log 2>&1 &
50 |
51 | STEP 4: Update rda objects.
52 |
53 | cd
54 |
55 | #To update the "old" serialized SummarizedExperiment objects located in
56 | # https://hedgehog.fhcrc.org/bioc-data/trunk/experiment/pkgs
57 | #replace the above command with
58 | # svn co https://hedgehog.fhcrc.org/bioc-data/trunk/experiment/pkgs
59 | # cp path/to/pkgs_RDA_OBJECTS_TO_UPDATE pkgs/RDA_OBJECTS_TO_UPDATE
60 | # cd pkgs
61 |
62 | R="$HOME/bin/R-3.2"
63 | R_SCRIPT="scriptfile <- system.file('scripts', 'Find_and_update_objects', "
64 | R_SCRIPT="$R_SCRIPT 'update_rda_objects.R', "
65 | R_SCRIPT="$R_SCRIPT package='SummarizedExperiment', mustWork=TRUE)"
66 | R_SCRIPT="$R_SCRIPT; source(scriptfile)"
67 | echo "$R_SCRIPT" | $R --vanilla >update_rda_objects.log 2>&1 &
68 |
69 |
--------------------------------------------------------------------------------
/inst/scripts/Find_and_update_objects/collect_rda_objects.R:
--------------------------------------------------------------------------------
1 | ### =========================================================================
2 | ### collect_rda_objects.R
3 | ### -------------------------------------------------------------------------
4 | ###
5 | ### This script performs STEP 2 of the "Find and update objects" procedure
6 | ### described in the README file located in the same folder.
7 | ###
8 | ### Before you run this script, make sure you performed STEP 1, that is, you
9 | ### need to generate input file RDA_FILES. This can be achieved with
10 | ### something like:
11 | ###
12 | ### cd
13 | ### find . -type d -name '.git' -prune -o -type f -print | \
14 | ### grep -Ei '\.(rda|RData)$' >RDA_FILES
15 | ###
16 | ### See README file for more information.
17 | ###
18 | ### Then to run STEP 2 in "batch" mode:
19 | ###
20 | ### cd # RDA_FILES file should be here
21 | ### R CMD BATCH collect_rda_objects.R >collect_rda_objects.log 2>&1 &
22 | ###
23 | ### This can take a couple of hours to complete...
24 | ###
25 | ### The output of STEP 2 is a file created in the current directory and named
26 | ### RDA_OBJECTS. It has 1 line per serialized object and the 4 following
27 | ### fields (separated by tabs):
28 | ### 1. Path to rda file (as found in input file RDA_FILES).
29 | ### 2. Name of object in rda file.
30 | ### 3. Class of object in rda file.
31 | ### 4. Package where class of object is defined.
32 | ###
33 |
34 | INFILE <- "RDA_FILES"
35 | OUTFILE <- "RDA_OBJECTS"
36 |
37 | collect_rda_objects <- function(rda_files, outfile="")
38 | {
39 | cat("", file=outfile) # create (or overwrite) empty output file
40 | for (i in seq_along(rda_files)) {
41 | rda_file <- rda_files[[i]]
42 |
43 | cat("[", i , "/", length(rda_files), "] Loading ", rda_file, " ... ",
44 | sep="")
45 | envir <- new.env(parent=emptyenv())
46 | load(rda_file, envir=envir)
47 | cat("OK\n")
48 |
49 | for (objname in names(envir)) {
50 | obj <- get(objname, envir=envir)
51 | objclass <- class(obj)
52 | objclass_pkg <- attr(objclass, "package")
53 |
54 | ## Fix 'objclass' and 'objclass_pkg' (they both need to be
55 | ## character vectors of length 1 before we pass them to paste()).
56 | objclass <- objclass[[1L]]
57 | if (is.null(objclass_pkg))
58 | objclass_pkg <- "."
59 | outline <- paste(rda_file, objname, objclass, objclass_pkg,
60 | sep="\t")
61 | cat(outline, "\n", sep="", file=OUTFILE, append=TRUE)
62 | }
63 | }
64 | }
65 |
66 | rda_files <- read.table(INFILE, stringsAsFactors=FALSE)[[1L]]
67 | collect_rda_objects(rda_files, outfile=OUTFILE)
68 |
69 |
--------------------------------------------------------------------------------
/inst/scripts/Find_and_update_objects/collect_rda_objects_to_update.R:
--------------------------------------------------------------------------------
1 | ### =========================================================================
2 | ### collect_rda_objects_to_update.R
3 | ### -------------------------------------------------------------------------
4 | ###
5 | ### This script performs STEP 3 of the "Find and update objects" procedure
6 | ### described in the README file located in the same folder.
7 | ###
8 | ### Before you run this script, make sure you performed STEPS 1 & 2.
9 | ### See README file for more information.
10 | ###
11 | ### Then to run STEP 3 in "batch" mode:
12 | ###
13 | ### cd # RDA_OBJECTS file should be here
14 | ### R CMD BATCH collect_rda_objects_to_update.R \
15 | ### >collect_rda_objects_to_update.log 2>&1 &
16 | ###
17 | ### The output of STEP 3 is a file created in the current directory and named
18 | ### RDA_OBJECTS_TO_UPDATE. It is a subset of input file RDA_OBJECTS.
19 | ###
20 |
21 | INFILE <- "RDA_OBJECTS"
22 | OUTFILE <- "RDA_OBJECTS_TO_UPDATE"
23 |
24 | library(BiocManager)
25 | library(SummarizedExperiment)
26 |
27 | if (FALSE) {
28 | ### Unfortunately, loading all the required packages in the main process will
29 | ### sometimes hit the maximal number of DLLs that can be loaded ("maximal
30 | ### number of DLLs reached..." infamous error).
31 | .check_classes <- function(classes, package)
32 | {
33 | suppressWarnings(suppressPackageStartupMessages(
34 | library(package, character.only=TRUE, quietly=TRUE)
35 | ))
36 | sapply(classes, function(class) {
37 | extends(class, "SummarizedExperiment") ||
38 | extends(class, "RangedSummarizedExperiment")
39 | })
40 | }
41 | }
42 |
43 | ### We check the classes in a subprocess to work around the "maximal number
44 | ### of DLLs reached..." infamous error.
45 | .check_classes <- function(classes, package)
46 | {
47 | classes_in1string <- paste0("\"", classes, "\"")
48 | classes_in1string <- paste0("c(",
49 | paste(classes_in1string, collapse=", "),
50 | ")")
51 | outfile <- file.path(tempdir(), paste0(package, "_class_summary"))
52 | input <- c("suppressWarnings(suppressPackageStartupMessages(",
53 | sprintf(" library(%s)", "SummarizedExperiment"),
54 | "))",
55 | "suppressWarnings(suppressPackageStartupMessages(",
56 | sprintf(" library(%s)", package),
57 | "))",
58 | sprintf("classes <- %s", classes_in1string),
59 | "ok <- sapply(classes, function(class) {",
60 | " extends(class, \"SummarizedExperiment\") ||",
61 | " extends(class, \"RangedSummarizedExperiment\")",
62 | "})",
63 | "class_summary <- data.frame(class=classes, ok=unname(ok))",
64 | sprintf("write.table(class_summary, file=\"%s\", sep=\"\t\")", outfile)
65 | )
66 | command <- file.path(R.home("bin"), "R")
67 | args <- c("--vanilla", "--slave")
68 | system2(command, args=args, input=input)
69 | class_summary <- read.table(outfile, stringsAsFactors=FALSE)
70 | file.remove(outfile)
71 | stopifnot(identical(class_summary[ , "class"], classes)) # sanity check
72 | setNames(class_summary[ , "ok"], classes)
73 | }
74 |
75 | collectRdaObjectsToUpdate <- function(rda_objects, outfile="")
76 | {
77 | rda_objects2 <- unique(rda_objects[ , c("objclass", "objclass_pkg")])
78 | objclass2 <- rda_objects2[ , "objclass"]
79 | objclass_pkg2 <- rda_objects2[ , "objclass_pkg"]
80 | idx <- which(duplicated(objclass2))
81 | if (length(idx) != 0L) {
82 | msg <- c("the following classes are defined in more than 1 package: ",
83 | paste0(unique(objclass2[idx]), collapse=", "))
84 | warning(msg)
85 | }
86 | pkg2class <- split(objclass2, objclass_pkg2)
87 | pkg2class[c(".", ".GlobalEnv")] <- NULL
88 | pkgs <- names(pkg2class)
89 |
90 | ## Install missing packages.
91 | installed_pkgs <- rownames(installed.packages())
92 | missing_pkgs <- setdiff(pkgs, installed_pkgs)
93 | if (length(missing_pkgs) != 0L) {
94 | install(missing_pkgs)
95 | installed_pkgs <- rownames(installed.packages())
96 | missing_pkgs <- setdiff(pkgs, installed_pkgs)
97 | if (length(missing_pkgs) != 0L) {
98 | ## Some packages could not be installed.
99 | pkgs <- intersect(pkgs, installed_pkgs)
100 | pkg2class <- pkg2class[pkgs]
101 | }
102 | }
103 |
104 | ## Check classes, one package at a time.
105 | class2ok <- unlist(
106 | lapply(seq_along(pkgs),
107 | function(i) {
108 | pkg <- pkgs[[i]]
109 | cat("[", i , "/", length(pkgs), "] Check classes defined ",
110 | "in package ", pkg, " ... ", sep="")
111 | ans <- .check_classes(pkg2class[[pkg]], pkg)
112 | cat("OK\n")
113 | ans
114 | }
115 | )
116 | )
117 |
118 | ## Write output to file.
119 | objclass <- rda_objects[ , "objclass"]
120 | ok <- class2ok[objclass]
121 | ok[is.na(ok)] <- FALSE
122 | rda_objects_to_update <- rda_objects[ok, , drop=FALSE]
123 | rda_objects_to_update <- do.call(
124 | paste,
125 | c(as.list(rda_objects_to_update), list(sep="\t"))
126 | )
127 | writeLines(rda_objects_to_update, con=outfile)
128 | }
129 |
130 | rda_objects <- read.table(INFILE, stringsAsFactors=FALSE)
131 | colnames(rda_objects) <- c("rda_file", "objname", "objclass", "objclass_pkg")
132 | collectRdaObjectsToUpdate(rda_objects, outfile=OUTFILE)
133 |
134 |
--------------------------------------------------------------------------------
/inst/scripts/Find_and_update_objects/data_store_RDA_OBJECTS_TO_UPDATE:
--------------------------------------------------------------------------------
1 | # This file contains the list of rda objects to update located in
2 | # https://hedgehog.fhcrc.org/bioc-data/trunk/experiment/data_store
3 | # It was obtained by running STEPS 1 & 2 & 3 of the "Find and update objects"
4 | # procedure on May 26, 2015. See README file for more information.
5 | # To update the objects listed in this file:
6 | # 1. Get a working copy of the above URL.
7 | # 2. Copy this file to the working copy and rename it RDA_OBJECTS_TO_UPDATE.
8 | # 3. Run STEP 4 (see README file for more information).
9 | ./geuvPack/data/geuFPKM.rda geuFPKM SummarizedExperiment GenomicRanges
10 | ./bsseqData/data/BS.cancer.ex.fit.rda BS.cancer.ex.fit BSseq bsseq
11 | ./bsseqData/data/BS.cancer.ex.rda BS.cancer.ex BSseq bsseq
12 | ./fission/data/fission.RData fission SummarizedExperiment GenomicRanges
13 | ./DREAM4/data/dream4_010_04.RData dream4_010_04 SummarizedExperiment GenomicRanges
14 | ./DREAM4/data/dream4_100_01.RData dream4_100_01 SummarizedExperiment GenomicRanges
15 | ./DREAM4/data/dream4_010_01.RData dream4_010_01 SummarizedExperiment GenomicRanges
16 | ./DREAM4/data/dream4_010_05.RData dream4_010_05 SummarizedExperiment GenomicRanges
17 | ./DREAM4/data/dream4_010_02.RData dream4_010_02 SummarizedExperiment GenomicRanges
18 | ./DREAM4/data/dream4_100_04.RData dream4_100_04 SummarizedExperiment GenomicRanges
19 | ./DREAM4/data/dream4_010_03.RData dream4_010_03 SummarizedExperiment GenomicRanges
20 | ./DREAM4/data/dream4_100_05.RData dream4_100_05 SummarizedExperiment GenomicRanges
21 | ./DREAM4/data/dream4_100_02.RData dream4_100_02 SummarizedExperiment GenomicRanges
22 | ./DREAM4/data/dream4_100_03.RData dream4_100_03 SummarizedExperiment GenomicRanges
23 | ./COSMIC.67/data/cosmic_67.rda cosmic_67 CollapsedVCF VariantAnnotation
24 | ./parathyroidSE/data/parathyroidGenesSE.RData parathyroidGenesSE SummarizedExperiment GenomicRanges
25 | ./parathyroidSE/data/parathyroidExonsSE.RData parathyroidExonsSE SummarizedExperiment GenomicRanges
26 | ./RRBSdata/data/rrbs.RData rrbs BSraw BiSeq
27 | ./airway/data/airway.RData airway SummarizedExperiment GenomicRanges
28 | ./dsQTL/data/DSQ_2.rda DSQ_2 SummarizedExperiment GenomicRanges
29 | ./dsQTL/data/DSQ_17.rda DSQ_17 SummarizedExperiment GenomicRanges
30 | ./dsQTL/data/DHStop5_hg19.rda DHStop5_hg19 SummarizedExperiment GenomicRanges
31 |
--------------------------------------------------------------------------------
/inst/scripts/Find_and_update_objects/pkgs_RDA_OBJECTS_TO_UPDATE:
--------------------------------------------------------------------------------
1 | # This file contains the list of rda objects to update located in
2 | # https://hedgehog.fhcrc.org/bioc-data/trunk/experiment/pkgs
3 | # It was obtained by running STEPS 1 & 2 & 3 of the "Find and update objects"
4 | # procedure on May 26, 2015. See README file for more information.
5 | # To update the objects listed in this file:
6 | # 1. Get a working copy of the above URL.
7 | # 2. Copy this file to the working copy and rename it RDA_OBJECTS_TO_UPDATE.
8 | # 3. Run STEP 4 (see README file for more information).
9 | ./pasilla/data/pasillaDEXSeqDataSet.RData dxd DEXSeqDataSet DEXSeq
10 |
--------------------------------------------------------------------------------
/inst/scripts/Find_and_update_objects/update_rda_objects.R:
--------------------------------------------------------------------------------
1 | ### =========================================================================
2 | ### update_rda_objects.R
3 | ### -------------------------------------------------------------------------
4 | ###
5 | ### This script performs STEP 4 of the "Find and update objects" procedure
6 | ### described in the README file located in the same folder.
7 | ###
8 | ### Before you run this script, make sure you performed STEPS 1 & 2 & 3.
9 | ### See README file for more information.
10 | ###
11 | ### Then to run STEP 4 in "batch" mode:
12 | ###
13 | ### cd # RDA_OBJECTS_TO_UPDATE file should be here
14 | ### R CMD BATCH update_rda_objects.R >update_rda_objects.log 2>&1 &
15 | ###
16 |
17 | INFILE <- "RDA_OBJECTS_TO_UPDATE"
18 |
19 | library(SummarizedExperiment)
20 |
21 | .update_objects <- function(envir)
22 | {
23 | updated <- FALSE
24 | for (objname in names(envir)) {
25 | obj <- get(objname, envir=envir, inherits=FALSE)
26 | objclass <- class(obj)
27 | objclass_pkg <- attr(objclass, "package")
28 | if (!is.null(objclass_pkg)) {
29 | suppressWarnings(suppressPackageStartupMessages(
30 | library(objclass_pkg, character.only=TRUE, quietly=TRUE)
31 | ))
32 | }
33 | if (!SummarizedExperiment:::.has_SummarizedExperiment_internal_structure(obj))
34 | next()
35 |
36 | cat(" Updating ", objname, " ... ", sep="")
37 | obj <- updateObject(obj)
38 | validObject(obj, complete=TRUE)
39 | assign(objname, obj, envir=envir, inherits=FALSE)
40 | cat("OK\n")
41 | updated <- TRUE
42 | }
43 | updated
44 | }
45 |
46 | updateRdaObjects <- function(rda_objects)
47 | {
48 | rda_files <- unique(rda_objects[ , "rda_file"])
49 | for (i in seq_along(rda_files)) {
50 | rda_file <- rda_files[[i]]
51 |
52 | cat("[", i , "/", length(rda_files), "] Loading ", rda_file, " ... ",
53 | sep="")
54 | envir <- new.env(parent=emptyenv())
55 | load(rda_file, envir=envir)
56 | cat("OK\n")
57 |
58 | if (.update_objects(envir)) {
59 | cat(" Saving updated objects to ", rda_file, " ... ", sep="")
60 | save(list=names(envir), file=rda_file, envir=envir, compress="xz")
61 | cat("OK\n")
62 | }
63 | }
64 | }
65 |
66 | rda_objects_to_update <- read.table(INFILE, stringsAsFactors=FALSE)
67 | colnames(rda_objects_to_update) <- c("rda_file", "objname",
68 | "objclass", "objclass_pkg")
69 | updateRdaObjects(rda_objects_to_update)
70 |
71 |
--------------------------------------------------------------------------------
/inst/unitTests/test_Assays-class.R:
--------------------------------------------------------------------------------
1 | test_bind_Assays <- function() {
2 | ## unnamed -- map by position
3 | l1 <- list(matrix(1, 3, 4), matrix(2, 3, 4))
4 | l2 <- list(matrix(3, 3, 4), matrix(4, 3, 4))
5 | a1 <- Assays(l1)
6 | a2 <- Assays(l2)
7 |
8 | target <- Map(rbind, l1, l2)
9 | current <- rbind(a1, a2)
10 | checkTrue(is(current, "SimpleAssays"))
11 | checkIdentical(as(target, "SimpleList"), as(current, "SimpleList"))
12 |
13 | target <- Map(cbind, l1, l2)
14 | current <- cbind(a1, a2)
15 | checkTrue(is(current, "SimpleAssays"))
16 | checkIdentical(as(target, "SimpleList"), as(current, "SimpleList"))
17 |
18 | ## named -- map by name
19 | l1 <- list(x=matrix(1, 3, 4), y=matrix(2, 3, 4))
20 | l2 <- list(y=matrix(4, 3, 4), x=matrix(3, 3, 4))
21 | a1 <- Assays(l1)
22 | a2 <- Assays(l2)
23 |
24 | target <- Map(rbind, l1, l2[match(names(l2), names(l1))])
25 | current <- rbind(a1, a2)
26 | checkTrue(is(current, "SimpleAssays"))
27 | checkIdentical(as(target, "SimpleList"), as(current, "SimpleList"))
28 |
29 | target <- Map(cbind, l1, l2[match(names(l2), names(l1))])
30 | current <- cbind(a1, a2)
31 | checkTrue(is(current, "SimpleAssays"))
32 | checkIdentical(as(target, "SimpleList"), as(current, "SimpleList"))
33 | }
34 |
35 | test_bind_higher_order_Assays <- function() {
36 | ## unnamed -- map by position
37 | l1 <- list(array(1, dim = c(3, 4, 5, 6)),
38 | array(2, dim = c(3, 4, 5, 6)))
39 | l2 <- list(array(4, dim = c(3, 4, 5, 6)),
40 | array(3, dim = c(3, 4, 5, 6)))
41 | a1 <- Assays(l1)
42 | a2 <- Assays(l2)
43 |
44 | target <- Map(arbind, l1, l2)
45 | current <- rbind(a1, a2)
46 | checkTrue(is(current, "SimpleAssays"))
47 | checkIdentical(as(target, "SimpleList"), as(current, "SimpleList"))
48 |
49 | target <- Map(acbind, l1, l2)
50 | current <- cbind(a1, a2)
51 | checkTrue(is(current, "SimpleAssays"))
52 | checkIdentical(as(target, "SimpleList"), as(current, "SimpleList"))
53 |
54 | ## named -- map by name
55 | l1 <- list(x = array(1, dim = c(3, 4, 5, 6)),
56 | y = array(2, dim = c(3, 4, 5, 6)))
57 | l2 <- list(y = array(4, dim = c(3, 4, 5, 6)),
58 | x = array(3, dim = c(3, 4, 5, 6)))
59 | a1 <- Assays(l1)
60 | a2 <- Assays(l2)
61 |
62 | target <- Map(arbind, l1, l2[match(names(l2), names(l1))])
63 | current <- rbind(a1, a2)
64 | checkTrue(is(current, "SimpleAssays"))
65 | checkIdentical(as(target, "SimpleList"), as(current, "SimpleList"))
66 |
67 | target <- Map(acbind, l1, l2[match(names(l2), names(l1))])
68 | current <- cbind(a1, a2)
69 | checkTrue(is(current, "SimpleAssays"))
70 | checkIdentical(as(target, "SimpleList"), as(current, "SimpleList"))
71 | }
72 |
73 | test_bind_error_on_incompatible_dimension_Assays <- function() {
74 | l1 <- list(x = array(1, dim = c(3, 4, 5, 6)),
75 | y = array(2, dim = c(3, 4, 5, 6)))
76 | l2 <- list(y = matrix(4, 3, 4), x = matrix(3, 3, 4))
77 | a1 <- Assays(l1)
78 | a2 <- Assays(l2)
79 |
80 | ## arbind
81 | checkException(rbind(a1, a2), silent = TRUE)
82 | checkException(arbind(l1[[1]], l2[[1]]), silent = TRUE)
83 |
84 | ## acbind
85 | checkException(cbind(a1, a2), silent = TRUE)
86 | checkException(acbind(l1[[1]], l2[[1]]), silent = TRUE)
87 | }
88 |
89 |
--------------------------------------------------------------------------------
/inst/unitTests/test_RangedSummarizedExperiment-class.R:
--------------------------------------------------------------------------------
1 | library(digest)
2 |
3 | .singleDispatch <-
4 | c("duplicated", "end", "end<-", "granges", "ranges",
5 | "seqinfo", "seqinfo<-", "seqnames", "start", "start<-",
6 | "strand", "width", "width<-")
7 |
8 | .twoDispatch <- c("pcompare", "Compare")
9 |
10 | .otherFuns <- c("is.unsorted", "order", "rank", "sort")
11 |
12 | M1 <- matrix(1, 5, 3)
13 | M2 <- matrix(1, 3, 3)
14 | mList <- list(M1, M2)
15 | assaysList <- list(gr=SimpleList(m=M1), grl=SimpleList(m=M2))
16 | rowRangesList <-
17 | list(gr=GRanges("chr1", IRanges(1:5, 10)),
18 | grl=split(GRanges("chr1", IRanges(1:5, 10)), c(1,1,2,2,3)))
19 | names(rowRangesList[["grl"]]) <- NULL
20 | colData <- DataFrame(x=letters[1:3])
21 |
22 | ## a list of one SE with GRanges and one with GRangesList
23 | rseList <-
24 | list(SummarizedExperiment(
25 | assays=assaysList[["gr"]],
26 | rowRanges=rowRangesList[["gr"]],
27 | colData=colData),
28 | SummarizedExperiment(
29 | assays=assaysList[["grl"]],
30 | rowRanges=rowRangesList[["grl"]],
31 | colData=colData))
32 |
33 |
34 | test_RangedSummarizedExperiment_construction <- function()
35 | {
36 | ## empty-ish
37 | m1 <- matrix(0, 0, 0)
38 | checkTrue(validObject(new("RangedSummarizedExperiment")))
39 |
40 | ## substance
41 | for (i in seq_along(rseList)) {
42 | rse <- rseList[[i]]
43 | checkTrue(validObject(rse))
44 | checkIdentical(SimpleList(m=mList[[i]]), assays(rse))
45 | checkIdentical(rowRangesList[[i]], rowRanges(rse))
46 | checkIdentical(DataFrame(x=letters[1:3]), colData(rse))
47 | }
48 |
49 | ## array in assays slot
50 | ss <- rseList[[1]]
51 | assays(ss) <- SimpleList(array(1:5, c(5,3,2)))
52 | checkTrue(validObject(ss))
53 | checkTrue(all(dim(assays(ss[1:3,1:2])[[1]]) == c(3, 2, 2)))
54 | }
55 |
56 | test_RangedSummarizedExperiment_getters <- function()
57 | {
58 | for (i in seq_along(rseList)) {
59 | rse <- rseList[[i]]
60 | rowRanges <- rowRangesList[[i]]
61 |
62 | ## dim, dimnames
63 | checkIdentical(c(length(rowRanges), nrow(colData)), dim(rse))
64 | checkIdentical(NULL, dimnames(rse))
65 |
66 | ## row / col / metadata
67 | checkIdentical(rowRanges, rowRanges(rse))
68 | checkIdentical(colData, colData(rse))
69 | checkIdentical(list(), metadata(rse))
70 | }
71 | }
72 |
73 | test_RangedSummarizedExperiment_setters <- function()
74 | {
75 | for (i in seq_along(rseList)) {
76 | rse <- rseList[[i]]
77 | rowRanges <- rowRangesList[[i]]
78 |
79 | ## row / col / metadata<-
80 | ss1 <- rse
81 | revData <- rev(rowRanges)
82 | rowRanges(ss1) <- revData
83 | checkIdentical(revData, rowRanges(ss1))
84 | checkException(rowRanges(ss1) <- rowRanges(rse)[1:2,,drop=FALSE],
85 | "incorrect row dimensions", TRUE)
86 | revData <- colData[rev(seq_len(nrow(colData))),,drop=FALSE]
87 | colData(ss1) <- revData
88 | checkIdentical(revData, colData(ss1))
89 | checkException(colData(ss1) <- colData(rse)[1:2,,drop=FALSE],
90 | "incorrect col dimensions", TRUE)
91 | lst <- list("foo", "bar")
92 | metadata(ss1) <- lst
93 | checkIdentical(lst, metadata(ss1))
94 |
95 | ## assay / assays
96 | ss1 <- rse
97 | assay(ss1) <- assay(ss1)+1
98 | checkIdentical(assay(rse)+1, assay(ss1))
99 | ss1 <- rse
100 | assay(ss1, 1) <- assay(ss1, 1) + 1
101 | checkIdentical(assay(rse, "m") + 1, assay(ss1, "m"))
102 | ss1 <- rse
103 | assay(ss1, "m") <- assay(ss1, "m") + 1
104 | checkIdentical(assay(rse, "m")+1, assay(ss1, "m"))
105 |
106 | ## dimnames<-
107 | ss1 <- rse
108 | dimnames <- list(letters[seq_len(nrow(ss1))],
109 | LETTERS[seq_len(ncol(ss1))])
110 | rownames(ss1) <- dimnames[[1]]
111 | colnames(ss1) <- dimnames[[2]]
112 | checkIdentical(dimnames, dimnames(ss1))
113 | rowRanges1 <- rowRanges
114 | names(rowRanges1) <- dimnames[[1]]
115 | checkIdentical(rowRanges1, rowRanges(ss1))
116 | colData1 <- colData
117 | row.names(colData1) <- dimnames[[2]]
118 | checkIdentical(colData1, colData(ss1))
119 | ss1 <- rse
120 | dimnames(ss1) <- dimnames
121 | checkIdentical(dimnames, dimnames(ss1))
122 | dimnames(ss1) <- NULL
123 | checkIdentical(NULL, dimnames(ss1))
124 | }
125 | }
126 |
127 | test_RangedSummarizedExperiment_subset <- function()
128 | {
129 | for (i in seq_along(rseList)) {
130 | rse <- rseList[[i]]
131 | rowRanges <- rowRangesList[[i]]
132 |
133 | ## numeric
134 | ss1 <- rse[2:3,]
135 | checkIdentical(c(2L, ncol(rse)), dim(ss1))
136 | checkIdentical(rowRanges(ss1), rowRanges(rse)[2:3,])
137 | checkIdentical(colData(ss1), colData(rse))
138 | ss1 <- rse[,2:3]
139 | checkIdentical(c(nrow(rse), 2L), dim(ss1))
140 | checkIdentical(rowRanges(ss1), rowRanges(rse))
141 | checkIdentical(colData(ss1), colData(rse)[2:3,,drop=FALSE])
142 | ss1 <- rse[2:3, 2:3]
143 | checkIdentical(c(2L, 2L), dim(ss1))
144 | checkIdentical(rowRanges(ss1), rowRanges(rse)[2:3,,drop=FALSE])
145 | checkIdentical(colData(ss1), colData(rse)[2:3,,drop=FALSE])
146 |
147 | ## character
148 | ss1 <- rse
149 | dimnames(ss1) <- list(LETTERS[seq_len(nrow(ss1))],
150 | letters[seq_len(ncol(ss1))])
151 | ridx <- c("B", "C")
152 | checkIdentical(rowRanges(ss1[ridx,]), rowRanges(ss1)[ridx,])
153 | checkIdentical(rowRanges(ss1["C",]), rowRanges(ss1)["C",,drop=FALSE])
154 | checkException(ss1[LETTERS,], "i-index out of bounds", TRUE)
155 | cidx <- c("b", "c")
156 | checkIdentical(colData(ss1[,cidx]), colData(ss1)[cidx,,drop=FALSE])
157 | checkIdentical(colData(ss1[,"a"]), colData(ss1)["a",,drop=FALSE])
158 | checkException(ss1[,letters], "j-index out of bounds", TRUE)
159 |
160 | ## logical
161 | ss1 <- rse
162 | dimnames(ss1) <- list(LETTERS[seq_len(nrow(ss1))],
163 | letters[seq_len(ncol(ss1))])
164 | checkEquals(ss1, ss1[TRUE,])
165 | checkIdentical(c(0L, ncol(ss1)), dim(ss1[FALSE,]))
166 | checkEquals(ss1, ss1[,TRUE])
167 | checkIdentical(c(nrow(ss1), 0L), dim(ss1[,FALSE]))
168 | idx <- c(TRUE, FALSE) # recycling
169 | ss2 <- ss1[idx,]
170 | checkIdentical(rowRanges(ss1)[idx,,drop=FALSE], rowRanges(ss2))
171 | ss2 <- ss1[,idx]
172 | checkIdentical(colData(ss1)[idx,,drop=FALSE], colData(ss2))
173 |
174 | ## Rle
175 | ss1 <- rse
176 | rle <- rep(c(TRUE, FALSE), each=3, length.out=nrow(ss1))
177 | checkIdentical(rowRanges(ss1[rle]), rowRanges(ss1[Rle(rle)]))
178 | checkIdentical(assays(ss1[rle]), assays(ss1[Rle(rle)]))
179 | }
180 |
181 | ## 0 columns
182 | se <- SummarizedExperiment(rowRanges=GRanges("chr1", IRanges(1:10, width=1)))
183 | checkIdentical(dim(se[1:5, ]), c(5L, 0L))
184 | ## 0 rows
185 | se <- SummarizedExperiment(colData=DataFrame(samples=1:10))
186 | checkIdentical(dim(se[ ,1:5]), c(0L, 5L))
187 | }
188 |
189 | test_RangedSummarizedExperiment_subsetassign <- function()
190 | {
191 | for (i in seq_along(rseList)) {
192 | rse <- rseList[[i]]
193 | dimnames(rse) <- list(LETTERS[seq_len(nrow(rse))],
194 | letters[seq_len(ncol(rse))])
195 | ## rows
196 | ss1 <- rse
197 | ss1[1:2,] <- ss1[2:1,]
198 | checkIdentical(rowRanges(rse)[2:1,], rowRanges(ss1)[1:2,])
199 | checkIdentical(rowRanges(rse[-(1:2),]), rowRanges(ss1)[-(1:2),])
200 | checkIdentical(colData(rse), colData(ss1))
201 | checkIdentical(c(metadata(rse), metadata(rse)), metadata(ss1))
202 | ## Rle
203 | ss1rle <- ss1Rle <- rse
204 | rle <- rep(c(TRUE, FALSE), each=3, length.out=nrow(ss1))
205 | ss1rle[rle,] <- ss1rle[rle,]
206 | ss1Rle[Rle(rle),] <- ss1Rle[Rle(rle),]
207 | checkIdentical(rowRanges(ss1rle), rowRanges(ss1Rle))
208 | checkIdentical(assays(ss1rle), assays(ss1Rle))
209 | ## cols
210 | ss1 <- rse
211 | ss1[,1:2] <- ss1[,2:1,drop=FALSE]
212 | checkIdentical(colData(rse)[2:1,,drop=FALSE],
213 | colData(ss1)[1:2,,drop=FALSE])
214 | checkIdentical(colData(rse)[-(1:2),,drop=FALSE],
215 | colData(ss1)[-(1:2),,drop=FALSE])
216 | checkIdentical(rowRanges(rse), rowRanges(ss1))
217 | checkIdentical(c(metadata(rse), metadata(rse)), metadata(ss1))
218 | }
219 |
220 | ## full replacement
221 | ss1 <- ss2 <- rseList[[1]]
222 | rowRanges(ss2) <- rev(rowRanges(ss2))
223 | ss1[,] <- ss2
224 | checkIdentical(ss1, ss2)
225 | }
226 |
227 | quiet <- suppressWarnings
228 | test_RangedSummarizedExperiment_cbind <- function()
229 | ## requires matching ranges
230 | {
231 | ## empty
232 | se <- SummarizedExperiment()
233 | empty <- cbind(se, se)
234 | checkTrue(all.equal(se, empty))
235 |
236 | ## different ranges
237 | se1 <- rseList[[1]]
238 | se2 <- se1[2:4]
239 | rownames(se2) <- month.name[seq_len(nrow(se2))]
240 | checkException(quiet(cbind(se1, se2)), silent=TRUE)
241 |
242 | ## same ranges
243 | se1 <- rseList[[1]]
244 | se2 <- se1[,1:2]
245 | colnames(se2) <- month.name[seq_len(ncol(se2))]
246 | res <- cbind(se1, se2)
247 | checkTrue(nrow(res) == 5)
248 | checkTrue(ncol(res) == 5)
249 | ## rowRanges
250 | rowData(se1) <- DataFrame("one"=1:5)
251 | rowData(se2) <- DataFrame("two"=6:10)
252 | res <- quiet(cbind(se1, se2))
253 | checkIdentical(names(mcols(rowRanges(res))), c("one", "two"))
254 | rowData(se2) <- DataFrame("one"=6:10, "two"=6:10)
255 | checkException(cbind(se1, se2), silent=TRUE)
256 | ## colData
257 | checkTrue(nrow(colData(res)) == 5)
258 | ## assays
259 | se1 <- rseList[[1]]
260 | se2 <- se1[,1:2]
261 | assays(se1) <- SimpleList("m"=matrix(rep("m", 15), nrow=5),
262 | "a"=array(rep("a", 30), c(5,3,2)))
263 | assays(se2) <- SimpleList("m"=matrix(LETTERS[1:10], nrow=5),
264 | "a"=array(LETTERS[1:20], c(5,2,2)))
265 | res <- cbind(se1, se2) ## same variables
266 | checkTrue(nrow(res) == 5)
267 | checkTrue(ncol(res) == 5)
268 | checkTrue(all.equal(dim(assays(res)$m), c(5L, 5L)))
269 | checkTrue(all.equal(dim(assays(res)$a), c(5L, 5L, 2L)))
270 | names(assays(se1)) <- c("mm", "aa")
271 | checkException(cbind(se1, se2), silent=TRUE) ## different variables
272 | }
273 |
274 | test_RangedSummarizedExperiment_rbind <- function()
275 | ## requires matching samples
276 | {
277 | ## empty
278 | se <- SummarizedExperiment()
279 | empty <- rbind(se, se)
280 | checkTrue(all.equal(se, empty))
281 |
282 | ## different samples
283 | se1 <- rseList[[1]]
284 | se2 <- se1[,1]
285 | checkException(quiet(rbind(se1, se2)), silent=TRUE)
286 |
287 | ## same samples
288 | se1 <- rseList[[1]]
289 | se2 <- se1
290 | rownames(se2) <- LETTERS[seq_len(nrow(se2))]
291 | res <- rbind(se1, se2)
292 | checkTrue(nrow(res) == 10)
293 | checkTrue(ncol(res) == 3)
294 | ## rowRanges
295 | rowData(se1) <- DataFrame("one"=1:5)
296 | rowData(se2) <- DataFrame("two"=6:10)
297 | checkIdentical(
298 | rbind(
299 | cbind(rowData(se1), two = NA_integer_),
300 | cbind(one = NA_integer_, rowData(se2))
301 | ),
302 | rowData(rbind(se1, se2), use.names = FALSE)
303 | )
304 | ## colDat
305 | se1 <- rseList[[1]]
306 | se2 <- se1
307 | colData(se2) <- DataFrame("one"=1:3, "two"=4:6)
308 | res <- quiet(rbind(se1, se2))
309 | checkTrue(ncol(colData(res)) == 3)
310 | ## assays
311 | se1 <- rseList[[1]]
312 | se2 <- se1
313 | assays(se1) <- SimpleList("m"=matrix(rep("m", 15), nrow=5),
314 | "a"=array(rep("a", 30), c(5,3,2)))
315 | assays(se2) <- SimpleList("m"=matrix(LETTERS[1:15], nrow=5),
316 | "a"=array(LETTERS[1:30], c(5,3,2)))
317 | res <- rbind(se1, se2) ## same variables
318 | checkTrue(nrow(res) == 10)
319 | checkTrue(ncol(res) == 3)
320 | checkTrue(all.equal(dim(assays(res)$m), c(10L, 3L)))
321 | checkTrue(all.equal(dim(assays(res)$a), c(10L, 3L, 2L)))
322 | names(assays(se1)) <- c("mm", "aa")
323 | checkException(rbind(se1, se2), silent=TRUE) ## different variables
324 | }
325 |
326 | test_RangedSummarizedExperiment_GRanges_API <- function()
327 | {
328 | ## are we targetting the correct API? signature for
329 | ## RangedSummarizedExperiment method should match signature for
330 | ## GenomicRanges or similar, as in each test below
331 |
332 | for (.fun in .singleDispatch) {
333 | generic <- getGeneric(.fun)
334 | method <- getMethod(.fun, "RangedSummarizedExperiment")
335 | checkIdentical("x", generic@signature)
336 | checkIdentical(formals(generic@.Data), formals(method@.Data))
337 | }
338 |
339 | ## FIXME: pcompare, Compare
340 |
341 | .sig <- "RangedSummarizedExperiment"
342 | for (.fun in .otherFuns) {
343 | generic <- getGeneric(.fun)
344 | method <- getMethod(.fun, "RangedSummarizedExperiment")
345 | checkIdentical(formals(generic@.Data), formals(method@.Data))
346 | }
347 | }
348 |
349 | test_RangedSummarizedExperiment_GRanges_values <- function()
350 | {
351 | x <- rseList[[1]]
352 | isAssign <- grep("<-$", .singleDispatch, value=TRUE)
353 | .funs <- setdiff(.singleDispatch, isAssign)
354 | ## 'exp' created after manual inspection of results
355 | exp <- setNames(c("02dde", "80339", "df2bc", "410ea", "77198",
356 | "ec53a", "35e2c", "625d9", "3c90a"), .funs)
357 | obs <- sapply(.funs, function(.fun) {
358 | substr(digest(getGeneric(.fun)(x)), 1, 5)
359 | })
360 | checkIdentical(exp, obs)
361 |
362 | .funs <- isAssign
363 | .gets <- sub("<-$", "", isAssign)
364 | for (i in seq_along(isAssign)) {
365 | ## self-assignment isomorphism
366 | value <- getGeneric(.gets[[i]])(x)
367 | x1 <- do.call(isAssign[[i]], list(x, value=value))
368 | checkIdentical(x, x1)
369 | }
370 | }
371 |
372 | test_RangedSummarizedExperiment_split <- function()
373 | {
374 | gr <- GRanges(Rle(c("A", "B"), c(2, 3)), IRanges(1:5, 10))
375 | se <- SummarizedExperiment(M1, rowRanges=gr, colData=colData)
376 | ## FIXME: unname should not be necessary
377 | obs <- split(se, seqnames(se))
378 | exp <- SimpleList(A=se[1:2], B=se[3:5])
379 | checkEquals(obs, exp)
380 | }
381 |
382 | test_RangedSummarizedExperiment_NULL_rowRanges <- function()
383 | {
384 | se <- SummarizedExperiment(M1, colData=colData)
385 | rse <- rseList[[1L]]
386 | rowRanges(rse) <- NULL
387 | checkTrue(identical(rowRanges(rse), NULL))
388 | checkTrue(is(rse, "SummarizedExperiment") &&
389 | !is(rse, "RangedSummarizedExperiment"))
390 | checkTrue(identical(rowRanges(se), NULL))
391 | }
392 |
--------------------------------------------------------------------------------
/inst/unitTests/test_combine-methods.R:
--------------------------------------------------------------------------------
1 | test_combineRows_unnamed <- function() {
2 | se <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=10)))
3 | colData(se)$A <- 1
4 | rowData(se)$A <- 1
5 |
6 | se2 <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=10),
7 | normalized=matrix(rnorm(1000), ncol=10)))
8 | colData(se2)$B <- 2
9 | rowData(se2)$B <- "B"
10 |
11 | stuff <- combineRows(se, se2, use.names=FALSE)
12 |
13 | # Column data is correctly combined.
14 | checkIdentical(stuff$A, rep(1, ncol(stuff)))
15 | checkIdentical(stuff$B, rep(2, ncol(stuff)))
16 |
17 | # Row data is correctly combined.
18 | checkIdentical(rowData(stuff)$A, rep(c(1, NA), c(nrow(se), nrow(se2))))
19 | checkIdentical(rowData(stuff)$B, rep(c(NA, "B"), c(nrow(se), nrow(se2))))
20 |
21 | # Assay data is correctly combined.
22 | checkIdentical(as.matrix(assay(stuff)), rbind(assay(se), assay(se2)))
23 | checkIdentical(as.matrix(assay(stuff, 2)), rbind(matrix(NA, nrow(se), ncol(se)), assay(se2, 2)))
24 |
25 | # Unary methods work as expected.
26 | checkIdentical(se, combineRows(se, delayed=FALSE, use.names=FALSE))
27 | }
28 |
29 | test_combineRows_named <- function() {
30 | se <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=10)))
31 | colData(se)$A <- 1
32 | rowData(se)$A <- 1
33 |
34 | se2 <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=20),
35 | normalized=matrix(rnorm(1000), ncol=20)))
36 | colData(se2)$B <- 2
37 | rowData(se2)$B <- "B"
38 |
39 | # This fails, because we expect matching numbers of columns when use.names=TRUE.
40 | checkException(combineRows(se, se2), silent=TRUE)
41 |
42 | colnames(se) <- letters[1:10]
43 | colnames(se2) <- letters[3:22]
44 | stuff <- combineRows(se, se2)
45 |
46 | # Column data is correctly combined
47 | checkIdentical(colnames(stuff), letters[1:22])
48 | checkIdentical(stuff$A, rep(c(1, NA), c(ncol(se), 12)))
49 | checkIdentical(stuff$B, rep(c(NA, 2), c(2, ncol(se2))))
50 |
51 | # Row data is correctly combined.
52 | checkIdentical(rowData(stuff)$A, rep(c(1, NA), c(nrow(se), nrow(se2))))
53 | checkIdentical(rowData(stuff)$B, rep(c(NA, "B"), c(nrow(se), nrow(se2))))
54 |
55 | # Assay data is correctly combined.
56 | mat <- as.matrix(assay(stuff))
57 | ref <- rbind(
58 | cbind(assay(se), matrix(NA, nrow(se), ncol=12)),
59 | cbind(NA, NA, assay(se2))
60 | )
61 | colnames(ref) <- letters[1:22]
62 | checkIdentical(mat, ref)
63 |
64 | mat <- as.matrix(assay(stuff, 2))
65 | ref <- rbind(
66 | matrix(NA, nrow(se), ncol(stuff)),
67 | cbind(NA, NA, assay(se2, 2))
68 | )
69 | colnames(ref) <- letters[1:22]
70 | checkIdentical(mat, ref)
71 |
72 | # Unary methods work as expected.
73 | checkIdentical(se, combineRows(se, delayed=FALSE))
74 | }
75 |
76 | test_combineRows_assays <- function() {
77 | # Deep dive into correct assay name behavior.
78 | se <- SummarizedExperiment(list(matrix(rpois(1000, 10), ncol=10)))
79 | se2 <- SummarizedExperiment(list(matrix(rpois(1000, 10), ncol=10),
80 | matrix(rnorm(1000), ncol=10)))
81 | colnames(se) <- letters[1:10]
82 | colnames(se2) <- letters[15:24]
83 | rownames(se) <- paste0("GENE_", 1:100)
84 | rownames(se2) <- paste0("SPIKE_", 1:100)
85 |
86 | # This should fail due to differences in the number of _unnamed_ assays.
87 | checkException(combineRows(se, se2), silent=TRUE)
88 |
89 | # Either all assays are named, or all are unnamed.
90 | assays(se) <- assays(se)[c(1, 1)]
91 | assayNames(se2) <- c("WHEE", "BLAH")
92 | checkException(combineRows(se, se2), silent=TRUE)
93 |
94 | assays(se2) <- unname(assays(se2))
95 | out <- combineRows(se, se2)
96 | checkIdentical(colnames(out), letters[c(1:10, 15:24)])
97 | checkIdentical(rownames(out), c(rownames(se), rownames(se2)))
98 | }
99 |
100 | test_combineRows_ranges_named <- function() {
101 | se <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=10)))
102 | se2 <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=10)))
103 | rownames(se) <- paste0("GENE_", 1:100)
104 | rownames(se2) <- paste0("SPIKE_", 1:100)
105 |
106 | # Returns a vanilla SE.
107 | out <- combineRows(se, se2, use.names=FALSE)
108 | checkIdentical(as.character(class(out)), "SummarizedExperiment")
109 | checkIdentical(rownames(out), c(rownames(se), rownames(se2)))
110 |
111 | # Returns a GRanges.
112 | replace <- GRanges("chrA", IRanges(1, 1:100))
113 | names(replace) <- rownames(se)
114 | rowRanges(se) <- replace
115 |
116 | replace2 <- GRanges("chrB", IRanges(1, 1:100))
117 | names(replace2) <- rownames(se2)
118 | rowRanges(se2) <- replace2
119 |
120 | suppressWarnings(out <- combineRows(se, se2, use.names=FALSE))
121 | checkIdentical(rowRanges(out), suppressWarnings(c(replace, replace2)))
122 |
123 | # Testing different objects.
124 | se3 <- se2
125 | rowRanges(se3) <- NULL
126 | rownames(se3) <- rownames(se2)
127 | suppressWarnings(out <- combineRows(se, se3, use.names=FALSE))
128 | checkTrue(is(rowRanges(out), "GRangesList"))
129 | checkIdentical(unname(lengths(rowRanges(out))), rep(c(1L, 0L), c(nrow(se), nrow(se3))))
130 |
131 | se4 <- se2
132 | rowRanges(se4) <- as(rowRanges(se4), "GRangesList")
133 | suppressWarnings(out <- combineRows(se, se4, use.names=FALSE))
134 | expected <- suppressWarnings(as(c(replace, replace2), "GRangesList"))
135 | checkIdentical(rowRanges(out), expected)
136 |
137 | # Order doesn't affect conversion to GRL.
138 | suppressWarnings(out <- combineRows(se4, se, use.names=FALSE))
139 | expected <- suppressWarnings(as(c(replace2, replace), "GRangesList"))
140 | checkIdentical(rowRanges(out), expected)
141 |
142 | suppressWarnings(combined <- rowRanges(combineRows(se, se3, se4, use.names=FALSE)))
143 | checkIdentical(unname(lengths(combined)), rep(c(1L, 0L, 1L), c(nrow(se), nrow(se3), nrow(se4))))
144 | suppressWarnings(combined <- rowRanges(combineRows(se3, se, se4, use.names=FALSE)))
145 | checkIdentical(unname(lengths(combined)), rep(c(0L, 1L, 1L), c(nrow(se3), nrow(se), nrow(se4))))
146 | }
147 |
148 | test_combineRows_ranges_unnamed <- function() {
149 | # Repeating the same suite of tests for SEs without rownames.
150 | # This checks the correctness of some edge-case behaviors.
151 |
152 | se <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=10)))
153 | se2 <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=10)))
154 |
155 | # Returns a vanilla SE.
156 | out <- combineRows(se, se2, use.names=FALSE)
157 | checkIdentical(as.character(class(out)), "SummarizedExperiment")
158 | checkIdentical(nrow(out), nrow(se) + nrow(se2))
159 |
160 | # Returns a GRanges.
161 | rowRanges(se) <- GRanges("chrA", IRanges(1, 1:100))
162 | rowRanges(se2) <- GRanges("chrB", IRanges(1, 1:100))
163 | suppressWarnings(out <- combineRows(se, se2, use.names=FALSE))
164 | checkIdentical(rowRanges(out), suppressWarnings(c(rowRanges(se), rowRanges(se2))))
165 |
166 | # Testing different objects.
167 | se3 <- se2
168 | rowRanges(se3) <- NULL
169 | suppressWarnings(out <- combineRows(se, se3, use.names=FALSE))
170 | checkTrue(is(rowRanges(out), "GRangesList"))
171 | checkIdentical(unname(lengths(rowRanges(out))), rep(c(1L, 0L), c(nrow(se), nrow(se3))))
172 |
173 | se4 <- se2
174 | rowRanges(se4) <- as(rowRanges(se4), "GRangesList")
175 | suppressWarnings(out <- combineRows(se, se4, use.names=FALSE))
176 | expected <- suppressWarnings(as(c(rowRanges(se), rowRanges(se2)), "GRangesList"))
177 | checkIdentical(rowRanges(out), expected)
178 |
179 | suppressWarnings(combined <- rowRanges(combineRows(se, se3, se4, use.names=FALSE)))
180 | checkIdentical(unname(lengths(combined)), rep(c(1L, 0L, 1L), c(nrow(se), nrow(se3), nrow(se4))))
181 |
182 | # Handles partial row names.
183 | rownames(se) <- paste0("GENE_", 1:100)
184 | suppressWarnings(out <- combineRows(se, se2, use.names=FALSE))
185 | checkIdentical(rownames(out), c(rownames(se), character(nrow(se2))))
186 | }
187 |
188 | test_combineCols_unnamed <- function() {
189 | se <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=10)))
190 | colData(se)$A <- 1L
191 | rowData(se)$A <- 1
192 |
193 | se2 <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=10),
194 | normalized=matrix(rnorm(1000), ncol=10)))
195 | colData(se2)$A <- 2L
196 | colData(se2)$B <- 3
197 | rowData(se2)$B <- "B"
198 |
199 | stuff <- combineCols(se, se2, use.names=FALSE)
200 |
201 | # Column data is correctly combined.
202 | checkIdentical(stuff$A, rep(1:2, each=10))
203 | checkIdentical(stuff$B, rep(c(NA, 3), each=10))
204 |
205 | # Row data is correctly combined.
206 | checkIdentical(rowData(stuff)$A, rep(1, nrow(se)))
207 | checkIdentical(rowData(stuff)$B, rep("B", nrow(se)))
208 |
209 | # Assay data is correctly combined.
210 | checkIdentical(as.matrix(assay(stuff)), cbind(assay(se), assay(se2)))
211 | checkIdentical(as.matrix(assay(stuff, 2)), cbind(matrix(NA, nrow(se), ncol(se)), assay(se2, 2)))
212 |
213 | # Unary methods work as expected.
214 | checkIdentical(se, combineCols(se, delayed=FALSE, use.names=FALSE))
215 | }
216 |
217 | test_combineCols_named <- function() {
218 | se <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=100)))
219 | colData(se)$A <- 1L
220 | rowData(se)$A <- 1
221 |
222 | se2 <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=50),
223 | normalized=matrix(rnorm(1000), ncol=50)))
224 | colData(se2)$A <- 2L
225 | colData(se2)$B <- 3
226 | rowData(se2)$B <- "B"
227 |
228 | # This fails, because we expect matching numbers of columns when use.names=TRUE.
229 | checkException(combineCols(se, se2), silent=TRUE)
230 |
231 | rownames(se) <- letters[1:10]
232 | rownames(se2) <- letters[3:22]
233 | stuff <- combineCols(se, se2)
234 |
235 | # Column data is correctly combined
236 | checkIdentical(rownames(stuff), letters[1:22])
237 | checkIdentical(stuff$A, rep(1:2, c(ncol(se), ncol(se2))))
238 | checkIdentical(stuff$B, rep(c(NA, 3), c(ncol(se), ncol(se2))))
239 |
240 | # Row data is correctly combined.
241 | checkIdentical(rowData(stuff)$A, rep(c(1, NA), c(nrow(se), 12)))
242 | checkIdentical(rowData(stuff)$B, rep(c(NA, "B"), c(2, nrow(se2))))
243 |
244 | # Assay data is correctly combined.
245 | mat <- as.matrix(assay(stuff))
246 | ref <- cbind(
247 | rbind(assay(se), matrix(NA, 12, ncol(se))),
248 | rbind(NA, NA, assay(se2))
249 | )
250 | rownames(ref) <- letters[1:22]
251 | checkIdentical(mat, ref)
252 |
253 | mat <- as.matrix(assay(stuff, 2))
254 | ref <- cbind(
255 | matrix(NA, nrow(stuff), ncol(se)),
256 | rbind(NA, NA, assay(se2, 2))
257 | )
258 | rownames(ref) <- letters[1:22]
259 | checkIdentical(mat, ref)
260 |
261 | # Unary methods work as expected.
262 | checkIdentical(se, combineCols(se, delayed=FALSE))
263 | }
264 |
265 | test_combineCols_assays <- function() {
266 | # Deep dive into correct assay name behavior.
267 | se <- SummarizedExperiment(list(matrix(rpois(1000, 10), ncol=10)))
268 | se2 <- SummarizedExperiment(list(matrix(rpois(1000, 10), ncol=10),
269 | matrix(rnorm(1000), ncol=10)))
270 | colnames(se) <- letters[1:10]
271 | colnames(se2) <- letters[15:24]
272 | rownames(se) <- paste0("GENE_", 1:100)
273 | rownames(se2) <- paste0("SPIKE_", 1:100)
274 |
275 | # This should fail due to differences in the number of _unnamed_ assays.
276 | checkException(combineCols(se, se2), silent=TRUE)
277 |
278 | # Either all assays are named, or all are unnamed.
279 | assays(se) <- assays(se)[c(1, 1)]
280 | assayNames(se2) <- c("WHEE", "BLAH")
281 | checkException(combineCols(se, se2), silent=TRUE)
282 |
283 | assays(se2) <- unname(assays(se2))
284 | out <- combineCols(se, se2)
285 | checkIdentical(colnames(out), letters[c(1:10, 15:24)])
286 | checkIdentical(rownames(out), c(rownames(se), rownames(se2)))
287 | }
288 |
289 | test_combineCols_ranges_named <- function() {
290 | se <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=10)))
291 | se2 <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=10)))
292 | rownames(se) <- paste0("GENE_", 1:100)
293 | rownames(se2) <- paste0("GENE_", 21:120)
294 |
295 | # Checking that an SE is returned.
296 | out <- combineCols(se, se2, use.names=FALSE)
297 | checkIdentical(as.character(class(out)), "SummarizedExperiment")
298 | checkIdentical(rownames(out), rownames(se)) # ignoring other row names when use.names=FALSE.
299 |
300 | out <- combineCols(se, se2)
301 | checkIdentical(as.character(class(out)), "SummarizedExperiment")
302 | checkIdentical(rownames(out), union(rownames(se), rownames(se2)))
303 |
304 | # Checking that an RSE is returned.
305 | ref <- GRanges("chrA", IRanges(1, 1:120), seqinfo=Seqinfo(seqlengths=c(chrA=1000)))
306 | names(ref) <- paste0("GENE_", 1:120)
307 | rowRanges(se) <- ref[1:100]
308 | rowRanges(se2) <- ref[21:120]
309 |
310 | suppressWarnings(out <- combineCols(se, se2, use.names=FALSE)) # should have a warning here due to differences in values.
311 | checkIdentical(as.character(class(out)), "RangedSummarizedExperiment")
312 | checkIdentical(rowRanges(out), rowRanges(se))
313 |
314 | out <- combineCols(se, se2)
315 | checkIdentical(rowRanges(out), ref)
316 |
317 | # Checking that it works with mixtures of object classes in rowRanges.
318 | se3 <- se2
319 | rowRanges(se3) <- NULL
320 | rownames(se3) <- rownames(se2)
321 |
322 | out <- combineCols(se, se3)
323 | checkTrue(is(rowRanges(out), "GRangesList"))
324 | checkIdentical(rownames(out), paste0("GENE_", 1:120))
325 | checkIdentical(unname(lengths(rowRanges(out))), rep(1:0, c(100, 20)))
326 |
327 | out2 <- combineCols(se3, se) # flipping the order.
328 | checkIdentical(rownames(out2), paste0("GENE_", c(21:120, 1:20)))
329 | checkIdentical(unname(lengths(rowRanges(out2))), rep(c(1L,0L,1L), c(80, 20, 20)))
330 |
331 | out3 <- combineCols(se, se2, se3)
332 | checkIdentical(rowRanges(out3), ref) # avoid unnecessary conversion to a GRL.
333 |
334 | se4 <- se2
335 | rowRanges(se4) <- as(rowRanges(se4), "GRangesList")
336 | out <- combineCols(se, se4)
337 | checkIdentical(rowRanges(out), as(ref, "GRangesList"))
338 |
339 | # Checking that we get the same object class, regardless of ordering of inputs.
340 | checkIdentical(rowRanges(out), rowRanges(combineCols(se, se3, se4)))
341 | checkIdentical(rowRanges(out), rowRanges(combineCols(se3, se, se4))[rownames(out)])
342 | checkIdentical(rowRanges(out), rowRanges(combineCols(se3, se4, se))[rownames(out)])
343 |
344 | # Handles conflicting features correctly.
345 | se5 <- se2
346 | strand(rowRanges(se5)[1]) <- "+"
347 | suppressWarnings(out <- combineCols(se, se5)) # this should emit a warning.
348 | checkTrue(is(rowRanges(out), "GRangesList"))
349 | checkIdentical(unname(lengths(rowRanges(out))), rep(1:0, c(100, 20)))
350 | }
351 |
352 | test_combineCols_ranges_unnamed <- function() {
353 | # Repeating the same suite of tests for SEs without rownames.
354 | # This checks the correctness of some edge-case behaviors.
355 |
356 | se <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=10)))
357 | se2 <- SummarizedExperiment(list(counts=matrix(rpois(1000, 10), ncol=10)))
358 |
359 | # Checking that an SE is returned.
360 | out <- combineCols(se, se2, use.names=FALSE)
361 | checkIdentical(as.character(class(out)), "SummarizedExperiment")
362 | checkIdentical(nrow(out), nrow(se))
363 | checkException(combineCols(se, se2), silent=TRUE)
364 |
365 | # Checking that an RSE is returned.
366 | ref <- GRanges("chrA", IRanges(1, 1:120), seqinfo=Seqinfo(seqlengths=c(chrA=1000)))
367 | rowRanges(se) <- ref[1:100]
368 | rowRanges(se2) <- ref[21:120]
369 |
370 | suppressWarnings(out <- combineCols(se, se2, use.names=FALSE)) # should have a warning here due to differences in values.
371 | checkIdentical(as.character(class(out)), "RangedSummarizedExperiment")
372 | checkIdentical(rowRanges(out), rowRanges(se))
373 |
374 | # Checking that mixtures of objects work.
375 | se3 <- se2
376 | rowRanges(se3) <- NULL
377 |
378 | out <- combineCols(se, se3, use.names=FALSE) # no warning.
379 | checkIdentical(rowRanges(out), rowRanges(se))
380 | out2 <- combineCols(se3, se, use.names=FALSE)
381 | checkIdentical(rowRanges(out2), rowRanges(se))
382 |
383 | se4 <- se2
384 | rowRanges(se4) <- as(rowRanges(se4), "GRangesList")
385 | suppressWarnings(out <- combineCols(se, se4, use.names=FALSE)) # has warning.
386 | checkIdentical(rowRanges(out), as(rowRanges(se), "GRangesList"))
387 |
388 | se5 <- se
389 | rowRanges(se5) <- as(rowRanges(se5), "GRangesList")
390 | out <- combineCols(se, se5, use.names=FALSE) # no warning.
391 | checkIdentical(rowRanges(out), as(rowRanges(se), "GRangesList"))
392 |
393 | multi.com <- suppressWarnings(combineCols(se, se3, se4, use.names=FALSE))
394 | checkIdentical(rowRanges(out), rowRanges(multi.com))
395 | }
396 |
--------------------------------------------------------------------------------
/inst/unitTests/test_coverage-methods.R:
--------------------------------------------------------------------------------
1 | ###
2 |
3 | M1 <- matrix(1, 5, 3)
4 | M2 <- matrix(1, 3, 3)
5 | assaysList <- list(gr=SimpleList(m=M1), grl=SimpleList(m=M2))
6 | rowRangesList <-
7 | list(gr=GRanges("chr1", IRanges(1:5, 10)),
8 | grl=split(GRanges("chr1", IRanges(1:5, 10)), c(1,1,2,2,3)))
9 | names(rowRangesList[["grl"]]) <- NULL
10 | colData <- DataFrame(x=letters[1:3])
11 |
12 | ## a list of one SE with GRanges and one with GRangesList
13 | rseList <-
14 | list(SummarizedExperiment(
15 | assays=assaysList[["gr"]],
16 | rowRanges=rowRangesList[["gr"]],
17 | colData=colData),
18 | SummarizedExperiment(
19 | assays=assaysList[["grl"]],
20 | rowRanges=rowRangesList[["grl"]],
21 | colData=colData))
22 |
23 |
24 | test_interfaces <- function()
25 | {
26 | generic_functions <- "coverage"
27 | for (fun in generic_functions) {
28 | generic <- getGeneric(fun)
29 | method <- getMethod(fun, "RangedSummarizedExperiment")
30 | checkIdentical("x", generic@signature)
31 | checkIdentical(formals(generic@.Data), formals(method@.Data))
32 | }
33 | }
34 |
35 | test_coverage_RangedSummarizedExperiment <- function()
36 | {
37 | for (i in 1:2) {
38 | x <- rseList[[i]]
39 |
40 | target <- coverage(rowRanges(x))
41 | current <- coverage(x)
42 | checkIdentical(target, current)
43 |
44 | weight <- runif(length(x))
45 | ## Issues a warning (in BioC 3.3) when rowRanges(x) is a GRangesList
46 | ## object, which reveals a problem with how the "coverage" method for
47 | ## GRangesList objects handles the 'weight' argument. The warning is
48 | ## expected and healthy, don't try to suppress it here. It will go
49 | ## away when we fix the "coverage" method for GRangesList objects
50 | ## (defined in the GenomicRanges package).
51 | target <- coverage(rowRanges(x), weight=weight)
52 | current <- coverage(x, weight=weight)
53 | checkIdentical(target, current)
54 | }
55 | }
56 |
57 |
--------------------------------------------------------------------------------
/inst/unitTests/test_findOverlaps-methods.R:
--------------------------------------------------------------------------------
1 | ###
2 |
3 | M1 <- matrix(1, 5, 3)
4 | M2 <- matrix(1, 3, 3)
5 | assaysList <- list(gr=SimpleList(m=M1), grl=SimpleList(m=M2))
6 | rowRangesList <-
7 | list(gr=GRanges("chr1", IRanges(1:5, 10)),
8 | grl=split(GRanges("chr1", IRanges(1:5, 10)), c(1,1,2,2,3)))
9 | names(rowRangesList[["grl"]]) <- NULL
10 | colData <- DataFrame(x=letters[1:3])
11 |
12 | ## a list of one SE with GRanges and one with GRangesList
13 | rseList <-
14 | list(SummarizedExperiment(
15 | assays=assaysList[["gr"]],
16 | rowRanges=rowRangesList[["gr"]],
17 | colData=colData),
18 | SummarizedExperiment(
19 | assays=assaysList[["grl"]],
20 | rowRanges=rowRangesList[["grl"]],
21 | colData=colData))
22 |
23 |
24 | test_interfaces <- function()
25 | {
26 | fun <- "findOverlaps"
27 | signatures <- list(
28 | c("RangedSummarizedExperiment", "Vector"),
29 | c("Vector", "RangedSummarizedExperiment"),
30 | c("RangedSummarizedExperiment", "RangedSummarizedExperiment")
31 | )
32 | generic <- getGeneric(fun)
33 | for (sig in signatures) {
34 | method <- getMethod(fun, sig)
35 | checkIdentical(c("query", "subject"), generic@signature)
36 | checkIdentical(formals(generic@.Data), formals(method@.Data))
37 | }
38 | }
39 |
40 | test_findOverlaps_methods <- function()
41 | {
42 | identical_SummarizedExperiment <- function(x, y) {
43 | x@assays <- as(assays(x), "SimpleAssays")
44 | y@assays <- as(assays(y), "SimpleAssays")
45 | identical(x, y)
46 | }
47 | for (i in 1:2) {
48 | x <- rseList[[i]]
49 | for (j in 1:2) {
50 | y <- rseList[[j]]
51 |
52 | ## findOverlaps
53 | target <- findOverlaps(rowRanges(x), rowRanges(y))
54 | current <- findOverlaps(x, rowRanges(y))
55 | checkIdentical(target, current)
56 | current <- findOverlaps(rowRanges(x), y)
57 | checkIdentical(target, current)
58 | current <- findOverlaps(x, y)
59 | checkIdentical(target, current)
60 |
61 | ## countOverlaps
62 | target <- countOverlaps(rowRanges(x), rowRanges(y))
63 | current <- countOverlaps(x, rowRanges(y))
64 | checkIdentical(target, current)
65 | current <- countOverlaps(rowRanges(x), y)
66 | checkIdentical(target, current)
67 | current <- countOverlaps(x, y)
68 | checkIdentical(target, current)
69 |
70 | ## overlapsAny
71 | target <- overlapsAny(rowRanges(x), rowRanges(y))
72 | current <- overlapsAny(x, rowRanges(y))
73 | checkIdentical(target, current)
74 | current <- overlapsAny(rowRanges(x), y)
75 | checkIdentical(target, current)
76 | current <- overlapsAny(x, y)
77 | checkIdentical(target, current)
78 |
79 | ## subsetByOverlaps
80 | target <- subsetByOverlaps(x, rowRanges(y))
81 | current <- subsetByOverlaps(x, rowRanges(y))
82 | checkTrue(identical_SummarizedExperiment(target, current))
83 | current <- subsetByOverlaps(x, y)
84 | checkTrue(identical_SummarizedExperiment(target, current))
85 |
86 | target <- subsetByOverlaps(rowRanges(x), rowRanges(y))
87 | current <- subsetByOverlaps(rowRanges(x), y)
88 | checkIdentical(target, current)
89 | }
90 | }
91 | }
92 |
93 |
--------------------------------------------------------------------------------
/inst/unitTests/test_inter-range-methods.R:
--------------------------------------------------------------------------------
1 | ###
2 |
3 | M1 <- matrix(1, 5, 3)
4 | M2 <- matrix(1, 3, 3)
5 | assaysList <- list(gr=SimpleList(m=M1), grl=SimpleList(m=M2))
6 | rowRangesList <-
7 | list(gr=GRanges("chr1", IRanges(1:5, 10), Rle(c("+", "-"), 3:2)),
8 | grl=split(GRanges("chr1", IRanges(1:5, 10)), c(1,1,2,2,3)))
9 | names(rowRangesList[["grl"]]) <- NULL
10 | colData <- DataFrame(x=letters[1:3])
11 |
12 | ## a list of one SE with GRanges and one with GRangesList
13 | rseList <-
14 | list(SummarizedExperiment(
15 | assays=assaysList[["gr"]],
16 | rowRanges=rowRangesList[["gr"]],
17 | colData=colData),
18 | SummarizedExperiment(
19 | assays=assaysList[["grl"]],
20 | rowRanges=rowRangesList[["grl"]],
21 | colData=colData))
22 |
23 |
24 | test_interfaces <- function()
25 | {
26 | generic_functions <- c("isDisjoint", "disjointBins")
27 | for (fun in generic_functions) {
28 | generic <- getGeneric(fun)
29 | method <- getMethod(fun, "RangedSummarizedExperiment")
30 | checkIdentical("x", generic@signature)
31 | checkIdentical(formals(generic@.Data), formals(method@.Data))
32 | }
33 | }
34 |
35 | test_inter_range_methods <- function()
36 | {
37 | #for (i in 1:2) {
38 | for (i in 1L) {
39 | x <- rseList[[i]]
40 |
41 | ## isDisjoint
42 | target <- isDisjoint(rowRanges(x))
43 | current <- isDisjoint(x)
44 | checkIdentical(target, current)
45 |
46 | ## disjointBins
47 | target <- disjointBins(rowRanges(x))
48 | current <- disjointBins(x)
49 | checkIdentical(target, current)
50 | }
51 | }
52 |
53 |
--------------------------------------------------------------------------------
/inst/unitTests/test_intra-range-methods.R:
--------------------------------------------------------------------------------
1 | ###
2 |
3 | M1 <- matrix(1, 5, 3)
4 | M2 <- matrix(1, 3, 3)
5 | assaysList <- list(gr=SimpleList(m=M1), grl=SimpleList(m=M2))
6 | rowRangesList <-
7 | list(gr=GRanges("chr1", IRanges(1:5, 10), Rle(c("+", "-"), 3:2)),
8 | grl=split(GRanges("chr1", IRanges(1:5, 10)), c(1,1,2,2,3)))
9 | names(rowRangesList[["grl"]]) <- NULL
10 | colData <- DataFrame(x=letters[1:3])
11 |
12 | ## a list of one SE with GRanges and one with GRangesList
13 | rseList <-
14 | list(SummarizedExperiment(
15 | assays=assaysList[["gr"]],
16 | rowRanges=rowRangesList[["gr"]],
17 | colData=colData),
18 | SummarizedExperiment(
19 | assays=assaysList[["grl"]],
20 | rowRanges=rowRangesList[["grl"]],
21 | colData=colData))
22 |
23 |
24 | test_interfaces <- function()
25 | {
26 | generic_functions <- c("shift", "narrow", "resize",
27 | "flank", "promoters",
28 | "restrict", "trim")
29 | for (fun in generic_functions) {
30 | generic <- getGeneric(fun)
31 | method <- getMethod(fun, "RangedSummarizedExperiment")
32 | checkIdentical("x", generic@signature)
33 | checkIdentical(formals(generic@.Data), formals(method@.Data))
34 | }
35 | }
36 |
37 | test_intra_range_methods <- function()
38 | {
39 | identical_SummarizedExperiment <- function(x, y) {
40 | x@assays <- as(assays(x), "SimpleAssays")
41 | y@assays <- as(assays(y), "SimpleAssays")
42 | identical(x, y)
43 | }
44 | #for (i in 1:2) {
45 | for (i in 1L) {
46 | ## shift
47 | target <- rseList[[i]]
48 | rowRanges(target) <- shift(rowRanges(target), 50)
49 | current <- shift(rseList[[i]], 50)
50 | checkTrue(identical_SummarizedExperiment(target, current))
51 |
52 | ## narrow
53 | target <- rseList[[i]]
54 | rowRanges(target) <- narrow(rowRanges(target), 2, -2)
55 | current <- narrow(rseList[[i]], 2, -2)
56 | checkTrue(identical_SummarizedExperiment(target, current))
57 |
58 | ## resize
59 | target <- rseList[[i]]
60 | rowRanges(target) <- resize(rowRanges(target), 8)
61 | current <- resize(rseList[[i]], 8)
62 | checkTrue(identical_SummarizedExperiment(target, current))
63 |
64 | ## flank
65 | target <- rseList[[i]]
66 | rowRanges(target) <- flank(rowRanges(target), 5, both=TRUE)
67 | current <- flank(rseList[[i]], 5, both=TRUE)
68 | checkTrue(identical_SummarizedExperiment(target, current))
69 |
70 | ## promoters
71 | target <- rseList[[i]]
72 | rowRanges(target) <- promoters(rowRanges(target),
73 | upstream=20, downstream=5)
74 | current <- promoters(rseList[[i]], upstream=20, downstream=5)
75 | checkTrue(identical_SummarizedExperiment(target, current))
76 |
77 | ## restrict
78 | target <- rseList[[i]]
79 | rowRanges(target) <- restrict(rowRanges(target), start=2, end=3,
80 | keep.all.ranges=TRUE)
81 | current <- restrict(rseList[[i]], start=2, end=3,
82 | keep.all.ranges=TRUE)
83 | checkTrue(identical_SummarizedExperiment(target, current))
84 |
85 | ## trim
86 | suppressWarnings(seqlengths(rseList[[i]]) <- 8)
87 | target <- rseList[[i]]
88 | rowRanges(target) <- trim(rowRanges(target))
89 | current <- trim(rseList[[i]])
90 | checkTrue(identical_SummarizedExperiment(target, current))
91 | seqlengths(rseList[[i]]) <- NA
92 | }
93 | }
94 |
95 |
--------------------------------------------------------------------------------
/inst/unitTests/test_makeSummarizedExperimentFromDataFrame.R:
--------------------------------------------------------------------------------
1 | ##
2 |
3 | rowNames <- paste0("GENE", letters[5:1])
4 |
5 | range_info <- list(chr="chr2", start = 11:15, end = 12:16,
6 | strand = c("+", "-", "+", "*", "."))
7 | expr_info <- list(expr0 = 3:7, expr1 = 8:12, expr2 = 12:16)
8 |
9 | df <- as.data.frame(c(range_info, expr_info), row.names = rowNames)
10 | DF <- DataFrame(c(range_info, expr_info), row.names = rowNames)
11 |
12 | test_makeSummarizedExperimentFromDataFrame <- function()
13 | {
14 | validObject(makeSummarizedExperimentFromDataFrame(df))
15 | validObject(makeSummarizedExperimentFromDataFrame(DF))
16 |
17 | rangesA <- GRanges(as.data.frame(range_info, row.names = rowNames))
18 | rangesB <- rowRanges(makeSummarizedExperimentFromDataFrame(df))
19 | # Check rowRanges to be identical
20 | checkIdentical(rangesA, rangesB)
21 | # Check assay matrix and expr_info matrix are identical
22 | checkIdentical(assay(makeSummarizedExperimentFromDataFrame(df)),
23 | as.matrix(as.data.frame(expr_info, row.names = rowNames)))
24 | checkIdentical(assay(makeSummarizedExperimentFromDataFrame(DF)),
25 | as.matrix(as.data.frame(expr_info, row.names = rowNames)))
26 |
27 | checkEquals(makeSummarizedExperimentFromDataFrame(df),
28 | makeSummarizedExperimentFromDataFrame(DF))
29 |
30 | checkException(
31 | makeSummarizedExperimentFromDataFrame(
32 | cbind(df, expr3 = letters[seq_len(nrow(df))])))
33 |
34 | checkException(
35 | makeSummarizedExperimentFromDataFrame(
36 | cbind(DF, DataFrame(expr3 = letters[seq_len(nrow(df))]))))
37 |
38 | checkIdentical(nrow(df),
39 | length(rowRanges(
40 | makeSummarizedExperimentFromDataFrame(df))))
41 |
42 | checkIdentical(nrow(DF),
43 | length(rowRanges(
44 | makeSummarizedExperimentFromDataFrame(DF))))
45 |
46 | checkIdentical(colnames(makeSummarizedExperimentFromDataFrame(df)),
47 | names(expr_info))
48 | checkIdentical(rownames(makeSummarizedExperimentFromDataFrame(df)),
49 | rowNames)
50 |
51 | checkIdentical(colnames(makeSummarizedExperimentFromDataFrame(DF)),
52 | names(expr_info))
53 | checkIdentical(rownames(makeSummarizedExperimentFromDataFrame(DF)),
54 | rowNames)
55 | }
56 |
57 |
--------------------------------------------------------------------------------
/inst/unitTests/test_makeSummarizedExperimentFromExpressionSet.R:
--------------------------------------------------------------------------------
1 | M1 <- matrix(1, 5, 3)
2 | M2 <- matrix(1, 3, 3)
3 | assaysList <- list(gr=SimpleList(m=M1), grl=SimpleList(m=M2))
4 | rowRangesList <-
5 | list(gr=GRanges("chr1", IRanges(1:5, 10)),
6 | grl=split(GRanges("chr1", IRanges(1:5, 10)), c(1,1,2,2,3)))
7 | names(rowRangesList[["grl"]]) <- NULL
8 | colData <- DataFrame(x=letters[1:3])
9 |
10 | ## a list of one SE with GRanges and one with GRangesList
11 | rseList <-
12 | list(SummarizedExperiment(
13 | assays=assaysList[["gr"]],
14 | rowRanges=rowRangesList[["gr"]],
15 | colData=colData),
16 | SummarizedExperiment(
17 | assays=assaysList[["grl"]],
18 | rowRanges=rowRangesList[["grl"]],
19 | colData=colData))
20 |
21 |
22 | test_SummarizedExperiment_GenomicRanges_coercion <- function()
23 | {
24 | eset1 <- ExpressionSet()
25 |
26 | checkTrue(validObject(eset1))
27 |
28 | se1 <- as(eset1, "RangedSummarizedExperiment")
29 |
30 | checkTrue(validObject(se1))
31 |
32 | data("sample.ExpressionSet", package = "Biobase")
33 |
34 | eset2 <- sample.ExpressionSet
35 | checkTrue(validObject(eset2))
36 |
37 | se2 <- as(eset2, "RangedSummarizedExperiment")
38 |
39 | checkTrue(validObject(se2))
40 |
41 | checkIdentical(experimentData(eset2),
42 | metadata(se2)$experimentData)
43 |
44 | checkIdentical(annotation(eset2),
45 | metadata(se2)$annotation)
46 |
47 | checkIdentical(protocolData(eset2),
48 | metadata(se2)$protocolData)
49 |
50 | eset2Assays <- SimpleList(as.list(assayData(eset2)))
51 | se2Assays <- assays(se2)
52 | checkIdentical(eset2Assays$exprs, se2Assays$exprs)
53 | checkIdentical(eset2Assays$se.exprs, se2Assays$se.exprs)
54 |
55 | checkIdentical(featureNames(eset2), rownames(se2))
56 |
57 | checkIdentical(sampleNames(eset2), colnames(se2))
58 | }
59 |
60 | test_GenomicRanges_SummarizedExperiment_coercion <- function()
61 | {
62 | ## empty SE
63 | simpleSE <- SummarizedExperiment()
64 |
65 | eset1 <- as(simpleSE, "ExpressionSet")
66 |
67 | checkTrue(validObject(eset1))
68 |
69 | ## Back and forth empty ES
70 | simpleES <- ExpressionSet()
71 |
72 | simpleES2 <- as(as(simpleES, "RangedSummarizedExperiment"),
73 | "ExpressionSet")
74 |
75 | checkTrue(validObject(simpleES2))
76 |
77 | checkEquals(as.list(assayData(simpleES)),
78 | as.list(assayData(simpleES2)))
79 |
80 | ## Simple SE
81 | simpleSE <- rseList[[1]]
82 | assayNames(simpleSE) <- "exprs" # No warning 'No assay named exprs..."
83 | eset2 <- as(simpleSE, "ExpressionSet")
84 | checkTrue(validObject(eset2))
85 |
86 | ## The ExpressionSet features should have the data from the
87 | ## SummarizedExperiment rows if they are from GRanges.
88 | checkIdentical(pData(featureData(eset2)),
89 | as.data.frame(rowRanges(rseList[[1]])))
90 |
91 | # the rowRanges are retained if the object has them to begin with.
92 | se2_2 <- as(eset2, "RangedSummarizedExperiment")
93 | rr_se2_2 <- unname(rowRanges(se2_2))
94 | rr_eset2 <- rowRanges(rseList[[1]])
95 | checkEquals(rr_se2_2, rr_eset2)
96 |
97 | simpleSE <- rseList[[2]]
98 | assayNames(simpleSE) <- "exprs" # No warning 'No assay named exprs..."
99 | eset3 <- as(simpleSE, "ExpressionSet")
100 | checkTrue(validObject(eset3))
101 |
102 | ## The ExpressionSet features should not have the data from the
103 | ## SummarizedExperiment rows if they are from GRangesList, but they
104 | ## should be empty and the same length as the number of ranges.
105 | checkEquals(unname(NROW(featureData(eset3))),
106 | unname(length(rowRanges(rseList[[2]]))))
107 |
108 | data("sample.ExpressionSet", package = "Biobase")
109 | eset4 <- sample.ExpressionSet
110 |
111 | eset5 <- as(as(eset4, "RangedSummarizedExperiment"), "ExpressionSet")
112 |
113 | checkTrue(validObject(eset5))
114 |
115 | ## this is necessary because the order in environments is undefined.
116 | compareLists <- function(x, y) {
117 | nmsX <- names(x)
118 | nmsY <- names(y)
119 |
120 | reorderY <- match(nmsY, nmsX)
121 |
122 | checkIdentical(x, y[reorderY])
123 | }
124 |
125 | compareLists(as.list(assayData(eset4)),
126 | as.list(assayData(eset5)))
127 |
128 | checkIdentical(experimentData(eset4),
129 | experimentData(eset5))
130 |
131 | checkIdentical(annotation(eset4),
132 | annotation(eset5))
133 |
134 | checkIdentical(protocolData(eset4),
135 | protocolData(eset5))
136 |
137 | checkIdentical(featureNames(eset4),
138 | featureNames(eset5))
139 |
140 | checkIdentical(sampleNames(eset4),
141 | sampleNames(eset5))
142 | }
143 |
144 | test_GenomicRanges_SummarizedExperiment_coercion_lockedEnvironment <- function()
145 | {
146 | ## https://github.com/Bioconductor/SummarizedExperiment/issues/43
147 | se = SummarizedExperiment(list(exprs = matrix(1:10, 5)))
148 | es1 = es2 = as(se, "ExpressionSet")
149 | original <- exprs(es2)
150 | checkIdentical(original, exprs(es2))
151 | exprs(es1)[1, 1] = 2
152 | checkTrue(!identical(original, exprs(es1)))
153 | checkIdentical(original, exprs(es2))
154 | }
155 |
156 | test_GenomicRanges_SummarizedExperiment_coercion_mappingFunctions <- function()
157 | {
158 | ## naiveRangeMapper
159 | ## valid object from empty object
160 | checkTrue(validObject(makeSummarizedExperimentFromExpressionSet(ExpressionSet())))
161 |
162 | ## valid object from sample ExpressionSet
163 | data("sample.ExpressionSet", package = "Biobase")
164 | eset1 <- sample.ExpressionSet
165 | checkTrue(validObject(makeSummarizedExperimentFromExpressionSet(eset1)))
166 |
167 | ## makeSummarizedExperimentFromExpressionSet should be the same as `as`
168 | ## with default args
169 | checkEquals(makeSummarizedExperimentFromExpressionSet(eset1),
170 | as(eset1, "RangedSummarizedExperiment"))
171 |
172 | ## probeRangeMapper
173 | ## valid object from empty object
174 | checkTrue(validObject(
175 | makeSummarizedExperimentFromExpressionSet(ExpressionSet(),
176 | probeRangeMapper)))
177 |
178 | ## valid object from sample ExpressionSet
179 | se1 <- makeSummarizedExperimentFromExpressionSet(eset1, probeRangeMapper)
180 | checkTrue(validObject(se1))
181 |
182 | ## Granges returned have rownames that were from the featureNames
183 | checkTrue(all(rownames(rowRanges(se1)) %in% featureNames(eset1)))
184 |
185 | ## geneRangeMapper
186 | ## valid object from empty object
187 | checkTrue(validObject(
188 | makeSummarizedExperimentFromExpressionSet(ExpressionSet(),
189 | geneRangeMapper(NULL))))
190 |
191 | ## valid object from sample ExpressionSet
192 | se2 <- makeSummarizedExperimentFromExpressionSet(eset1,
193 | geneRangeMapper("TxDb.Hsapiens.UCSC.hg19.knownGene"))
194 | checkTrue(validObject(se2))
195 |
196 | ## Granges returned have rownames that were from the featureNames
197 | checkTrue(all(rownames(rowRanges(se2)) %in% featureNames(eset1)))
198 | }
199 |
200 |
--------------------------------------------------------------------------------
/inst/unitTests/test_nearest-methods.R:
--------------------------------------------------------------------------------
1 | ###
2 |
3 | M1 <- matrix(1, 5, 3)
4 | M2 <- matrix(1, 3, 3)
5 | assaysList <- list(gr=SimpleList(m=M1), grl=SimpleList(m=M2))
6 | rowRangesList <-
7 | list(gr=GRanges("chr1", IRanges(1:5, 10)),
8 | grl=split(GRanges("chr1", IRanges(1:5, 10)), c(1,1,2,2,3)))
9 | names(rowRangesList[["grl"]]) <- NULL
10 | colData <- DataFrame(x=letters[1:3])
11 |
12 | ## a list of one SE with GRanges and one with GRangesList
13 | rseList <-
14 | list(SummarizedExperiment(
15 | assays=assaysList[["gr"]],
16 | rowRanges=rowRangesList[["gr"]],
17 | colData=colData),
18 | SummarizedExperiment(
19 | assays=assaysList[["grl"]],
20 | rowRanges=rowRangesList[["grl"]],
21 | colData=colData))
22 |
23 |
24 | .GENERIC_SIGNATURES <- list(
25 | precede=c("x", "subject"),
26 | follow=c("x", "subject"),
27 | nearest=c("x", "subject"),
28 | distance=c("x", "y"),
29 | distanceToNearest=c("x", "subject")
30 | )
31 |
32 | test_interfaces <- function()
33 | {
34 | method_signatures <- list(
35 | c("RangedSummarizedExperiment", "ANY"),
36 | c("ANY", "RangedSummarizedExperiment"),
37 | c("RangedSummarizedExperiment", "RangedSummarizedExperiment")
38 | )
39 | for (fun in names(.GENERIC_SIGNATURES)) {
40 | generic <- getGeneric(fun)
41 | checkIdentical(.GENERIC_SIGNATURES[[fun]], generic@signature)
42 | for (sig in method_signatures) {
43 | method <- getMethod(fun, sig)
44 | checkIdentical(formals(generic@.Data), formals(method@.Data))
45 | }
46 | }
47 | }
48 |
49 | test_nearest_methods <- function()
50 | {
51 | #for (i in 1:2) {
52 | for (i in 1L) {
53 | x <- rseList[[i]]
54 | #for (j in 1:2) {
55 | for (j in 1L) {
56 | y <- rseList[[j]]
57 | for (fun in names(.GENERIC_SIGNATURES)) {
58 | fun <- get(fun)
59 | target <- fun(rowRanges(x), rowRanges(y))
60 | current <- fun(x, rowRanges(y))
61 | checkIdentical(target, current)
62 | current <- fun(rowRanges(x), y)
63 | checkIdentical(target, current)
64 | current <- fun(x, y)
65 | checkIdentical(target, current)
66 | }
67 | }
68 | }
69 | }
70 |
71 |
--------------------------------------------------------------------------------
/man/Assays-class.Rd:
--------------------------------------------------------------------------------
1 | \name{Assays-class}
2 | \docType{class}
3 |
4 | \alias{class:Assays}
5 | \alias{Assays-class}
6 | \alias{Assays}
7 | \alias{updateObject,Assays-method}
8 | \alias{length,Assays-method}
9 | \alias{names,Assays-method}
10 | \alias{names<-,Assays-method}
11 | \alias{getListElement,Assays-method}
12 | \alias{setListElement,Assays-method}
13 | \alias{dim,Assays-method}
14 | \alias{[,Assays,ANY-method}
15 | \alias{[,Assays,ANY,ANY,ANY-method}
16 | \alias{[<-,Assays,ANY,ANY,ANY-method}
17 | \alias{rbind,Assays-method}
18 | \alias{cbind,Assays-method}
19 | \alias{arbind,Matrix-method}
20 | \alias{acbind,Matrix-method}
21 |
22 | \alias{class:SimpleAssays}
23 | \alias{SimpleAssays-class}
24 | \alias{SimpleAssays}
25 | \alias{coerce,SimpleList,SimpleAssays-method}
26 | \alias{coerce,SimpleAssays,SimpleList-method}
27 |
28 | \alias{class:ShallowData}
29 | \alias{ShallowData-class}
30 | \alias{ShallowData}
31 |
32 | \alias{class:ShallowSimpleListAssays}
33 | \alias{ShallowSimpleListAssays-class}
34 | \alias{ShallowSimpleListAssays}
35 | \alias{coerce,SimpleList,ShallowSimpleListAssays-method}
36 | \alias{coerce,ShallowSimpleListAssays,SimpleList-method}
37 |
38 | \alias{class:AssaysInEnv}
39 | \alias{AssaysInEnv-class}
40 | \alias{AssaysInEnv}
41 | \alias{length,AssaysInEnv-method}
42 | \alias{names,AssaysInEnv-method}
43 | \alias{names<-,AssaysInEnv-method}
44 | \alias{getListElement,AssaysInEnv-method}
45 | \alias{setListElement,AssaysInEnv-method}
46 | \alias{coerce,SimpleList,AssaysInEnv-method}
47 | \alias{coerce,AssaysInEnv,SimpleList-method}
48 |
49 | \title{Assays objects}
50 |
51 | \description{
52 | The Assays virtual class and its methods provide a formal abstraction
53 | of the assays slot of \link{SummarizedExperiment} objects.
54 |
55 | SimpleAssays and ShallowSimpleListAssays are concrete subclasses of
56 | Assays with the former being currently the default implementation of Assays
57 | objects. Other implementations (e.g. disk-based) could easily be added.
58 |
59 | Note that these classes are not meant to be used directly by the end user
60 | and the material in this man page is aimed at package developers.
61 | }
62 |
63 | \details{
64 | Assays objects have a list-like semantics with elements having matrix- or
65 | array-like semantics (e.g., \code{dim}, \code{dimnames}).
66 |
67 | The Assays API consists of:
68 | \itemize{
69 | \item (a) The \code{Assays()} constructor function.
70 | \item (b) Lossless back and forth coercion from/to
71 | \link[S4Vectors]{SimpleList}. The coercion method from
72 | \link[S4Vectors]{SimpleList} doesn't need (and should not)
73 | validate the returned object.
74 | \item (c) \code{length}, \code{names}, \code{`names<-`},
75 | \code{getListElement}, \code{setListElement},
76 | \code{dim}, \code{[}, \code{`[<-`}, \code{rbind}, \code{cbind}.
77 | }
78 | An Assays concrete subclass needs to implement (b) (required) plus,
79 | optionally any of the methods in (c).
80 |
81 | IMPORTANT:
82 | \enumerate{
83 | \item Nobody in the Assays hierarchy is allowed to inherit from
84 | \link[S4Vectors]{SimpleList} because of the conflicting semantic
85 | of \code{[}.
86 |
87 | \item Methods that return a modified Assays object (a.k.a.
88 | endomorphisms), that is, \code{[} as well as replacement methods
89 | \code{names<-}, \code{setListElement}, and \code{[<-}, must respect
90 | the \emph{copy-on-change contract}.
91 | With objects that don't make use of references internally, the
92 | developer doesn't need to take any special action for that because
93 | it's automatically taken care of by R itself. However, for objects
94 | that do make use of references internally (e.g. environments,
95 | external pointers, pointer to a file on disk, etc...), the developer
96 | needs to be careful to implement endomorphisms with copy-on-change
97 | semantics. This can easily be achieved (and is what the default
98 | methods for Assays objects do) by performaing a full (deep) copy of
99 | the object before modifying it instead of trying to modify it
100 | in-place. However note that this full (deep) copy can be very
101 | expensive and is actually not necessary in order to achieve
102 | copy-on-change semantics: it's enough (and often preferrable for
103 | performance reasons) to copy only the parts of the object that need
104 | to be modified.
105 | }
106 |
107 | Assays has currently 3 implementations which are formalized by concrete
108 | subclasses SimpleAssays, ShallowSimpleListAssays, and AssaysInEnv.
109 | SimpleAssays is the default (prior to SummarizedExperiment 1.15.4,
110 | ShallowSimpleListAssays was the default). AssaysInEnv is a \emph{broken}
111 | alternative to ShallowSimpleListAssays that does NOT respect the
112 | \emph{copy-on-change contract}. It is only provided for illustration
113 | purposes (see source file Assays-class.R for the details).
114 |
115 | A little more detail about ShallowSimpleListAssays: a small reference
116 | class hierarchy (not exported from the \pkg{GenomicRanges} name space)
117 | defines a reference class ShallowData with a single field \code{data}
118 | of type \code{ANY}, and a derived class ShallowSimpleListAssays
119 | that specializes the type of \code{data} as \link[S4Vectors]{SimpleList},
120 | and \code{contains=c("ShallowData", "Assays")}. The assays slot of a
121 | \link{SummarizedExperiment} object contains an instance of
122 | ShallowSimpleListAssays.
123 | }
124 |
125 | \author{Martin Morgan and Hervé Pagès}
126 |
127 | \seealso{
128 | \itemize{
129 | \item \link{SummarizedExperiment} objects.
130 |
131 | \item \link[S4Vectors]{SimpleList} objects in the \pkg{S4Vectors} package.
132 | }
133 | }
134 |
135 | \examples{
136 | ## ---------------------------------------------------------------------
137 | ## DIRECT MANIPULATION OF Assays OBJECTS
138 | ## ---------------------------------------------------------------------
139 | m1 <- matrix(runif(24), ncol=3)
140 | m2 <- matrix(runif(24), ncol=3)
141 | a <- Assays(SimpleList(m1, m2))
142 | a
143 |
144 | as(a, "SimpleList")
145 |
146 | length(a)
147 | getListElement(a, 2)
148 | dim(a)
149 |
150 | b <- a[-4, 2]
151 | b
152 | length(b)
153 | getListElement(b, 2)
154 | dim(b)
155 |
156 | names(a)
157 | names(a) <- c("a1", "a2")
158 | names(a)
159 | getListElement(a, "a2")
160 |
161 | rbind(a, a)
162 | cbind(a, a)
163 |
164 | ## ---------------------------------------------------------------------
165 | ## COPY-ON-CHANGE CONTRACT
166 | ## ---------------------------------------------------------------------
167 |
168 | ## ShallowSimpleListAssays objects have copy-on-change semantics but not
169 | ## AssaysInEnv objects. For example:
170 | ssla <- as(SimpleList(m1, m2), "ShallowSimpleListAssays")
171 | aie <- as(SimpleList(m1, m2), "AssaysInEnv")
172 |
173 | ## No names on 'ssla' and 'aie':
174 | names(ssla)
175 | names(aie)
176 |
177 | ssla2 <- ssla
178 | aie2 <- aie
179 | names(ssla2) <- names(aie2) <- c("A1", "A2")
180 |
181 | names(ssla) # still NULL (as expected)
182 |
183 | names(aie) # changed! (because the names<-,AssaysInEnv method is not
184 | # implemented in a way that respects the copy-on-change
185 | # contract)
186 | }
187 |
--------------------------------------------------------------------------------
/man/RangedSummarizedExperiment-class.Rd:
--------------------------------------------------------------------------------
1 | \name{RangedSummarizedExperiment-class}
2 | \docType{class}
3 |
4 | % Class
5 | \alias{class:RangedSummarizedExperiment}
6 | \alias{RangedSummarizedExperiment-class}
7 | \alias{RangedSummarizedExperiment}
8 |
9 | \alias{parallel_slot_names,RangedSummarizedExperiment-method}
10 |
11 | % Coercion
12 | \alias{coerce,RangedSummarizedExperiment,SummarizedExperiment-method}
13 | \alias{coerce,SummarizedExperiment,RangedSummarizedExperiment-method}
14 |
15 | % Accessors
16 | \alias{rowRanges}
17 | \alias{rowRanges,SummarizedExperiment-method}
18 | \alias{rowRanges,RangedSummarizedExperiment-method}
19 | \alias{rowRanges<-}
20 | \alias{rowRanges<-,SummarizedExperiment,NULL-method}
21 | \alias{rowRanges<-,RangedSummarizedExperiment,NULL-method}
22 | \alias{rowRanges<-,SummarizedExperiment,GenomicRanges-method}
23 | \alias{rowRanges<-,SummarizedExperiment,GRangesList-method}
24 | \alias{names,RangedSummarizedExperiment-method}
25 | \alias{names<-,RangedSummarizedExperiment-method}
26 | \alias{dimnames<-,RangedSummarizedExperiment,list-method}
27 |
28 | % GenomicRanges compatibility methods
29 | \alias{Compare,ANY,RangedSummarizedExperiment-method}
30 | \alias{Compare,RangedSummarizedExperiment,ANY-method}
31 | \alias{Compare,RangedSummarizedExperiment,RangedSummarizedExperiment-method}
32 | \alias{pcompare,ANY,RangedSummarizedExperiment-method}
33 | \alias{pcompare,RangedSummarizedExperiment,ANY-method}
34 | \alias{pcompare,RangedSummarizedExperiment,RangedSummarizedExperiment-method}
35 | \alias{duplicated,RangedSummarizedExperiment-method}
36 | \alias{elementMetadata,RangedSummarizedExperiment-method}
37 | \alias{elementMetadata<-,RangedSummarizedExperiment-method}
38 | \alias{end,RangedSummarizedExperiment-method}
39 | \alias{end<-,RangedSummarizedExperiment-method}
40 | \alias{granges,RangedSummarizedExperiment-method}
41 | \alias{is.unsorted,RangedSummarizedExperiment-method}
42 | \alias{mcols,RangedSummarizedExperiment-method}
43 | \alias{mcols<-,RangedSummarizedExperiment-method}
44 | \alias{order,RangedSummarizedExperiment-method}
45 | \alias{ranges,RangedSummarizedExperiment-method}
46 | \alias{ranges<-,RangedSummarizedExperiment-method}
47 | \alias{rank,RangedSummarizedExperiment-method}
48 | \alias{seqinfo,RangedSummarizedExperiment-method}
49 | \alias{seqinfo<-,RangedSummarizedExperiment-method}
50 | \alias{seqnames,RangedSummarizedExperiment-method}
51 | \alias{sort,RangedSummarizedExperiment-method}
52 | \alias{split,RangedSummarizedExperiment-method}
53 | \alias{split,RangedSummarizedExperiment,ANY-method}
54 | \alias{start,RangedSummarizedExperiment-method}
55 | \alias{start<-,RangedSummarizedExperiment-method}
56 | \alias{strand,RangedSummarizedExperiment-method}
57 | \alias{strand<-,RangedSummarizedExperiment,ANY-method}
58 | \alias{subset,RangedSummarizedExperiment-method}
59 | \alias{width,RangedSummarizedExperiment-method}
60 | \alias{width<-,RangedSummarizedExperiment-method}
61 |
62 | % updateObject
63 | \alias{updateObject,RangedSummarizedExperiment-method}
64 |
65 | \title{RangedSummarizedExperiment objects}
66 |
67 | \description{
68 |
69 | The RangedSummarizedExperiment class is a matrix-like container where rows
70 | represent ranges of interest (as a \link{GRanges} or \link{GRangesList}
71 | object) and columns represent samples (with sample data summarized as a
72 | \link{DataFrame}). A RangedSummarizedExperiment object contains one or more
73 | assays, each represented by a matrix-like object of numeric or other mode.
74 |
75 | RangedSummarizedExperiment is a subclass of \link{SummarizedExperiment} and,
76 | as such, all the methods documented in \code{?SummarizedExperiment}
77 | also work on a RangedSummarizedExperiment object or any
78 | \link{SummarizedExperiment} derivative. The methods documented below
79 | are additional methods that are specific to RangedSummarizedExperiment
80 | objects.
81 | }
82 |
83 | \usage{
84 |
85 | ## Constructor
86 |
87 | # See ?SummarizedExperiment for the constructor function.
88 |
89 | ## Accessors
90 |
91 | rowRanges(x, ...)
92 | rowRanges(x, ...) <- value
93 |
94 | ## Subsetting
95 |
96 | \S4method{subset}{RangedSummarizedExperiment}(x, subset, select, ...)
97 |
98 | ## rowRanges access
99 | ## see 'GRanges compatibility', below
100 | }
101 |
102 | \arguments{
103 |
104 | \item{x}{A RangedSummarizedExperiment object or derivative. The
105 | \code{rowRanges} setter will also accept a \link{SummarizedExperiment}
106 | \emph{instance} and will first coerce it to RangedSummarizedExperiment
107 | before it sets \code{value} on it.}
108 |
109 | \item{...}{Further arguments to be passed to or from other methods.}
110 |
111 | \item{value}{A \link[GenomicRanges]{GRanges} or
112 | \link[GenomicRanges]{GRangesList} object.}
113 |
114 | \item{subset}{An expression which, when evaluated in the
115 | context of \code{rowRanges(x)}, is a logical vector indicating
116 | elements or rows to keep: missing values are taken as false.}
117 |
118 | \item{select}{An expression which, when evaluated in the
119 | context of \code{colData(x)}, is a logical vector indicating
120 | elements or rows to keep: missing values are taken as false.}
121 |
122 | }
123 |
124 | \details{
125 |
126 | The rows of a RangedSummarizedExperiment object represent ranges
127 | (in genomic coordinates) of interest. The ranges of interest are
128 | described by a \link{GRanges} or a \link{GRangesList} object, accessible
129 | using the \code{rowRanges} function, described below. The \link{GRanges}
130 | and \link{GRangesList} classes contains sequence (e.g., chromosome) name,
131 | genomic coordinates, and strand information. Each range can be
132 | annotated with additional data; this data might be used to describe
133 | the range or to summarize results (e.g., statistics of differential
134 | abundance) relevant to the range. Rows may or may not have row names;
135 | they often will not.
136 |
137 | }
138 |
139 | \section{Constructor}{
140 |
141 | RangedSummarizedExperiment instances are constructed using the
142 | \code{SummarizedExperiment()} function documented in
143 | \code{?\link{SummarizedExperiment}}.
144 |
145 | }
146 |
147 | \section{Accessors}{
148 |
149 | In the code snippets below, \code{x} is a RangedSummarizedExperiment object
150 | or derivative (e.g. a \link[SingleCellExperiment]{SingleCellExperiment}
151 | object).
152 |
153 | \describe{
154 |
155 | \item{\code{rowRanges(x)}, \code{rowRanges(x) <- value}:}{Get or set the
156 | row data. \code{value} is a \code{GenomicRanges} object. Row
157 | names of \code{value} must be NULL or consistent with the existing
158 | row names of \code{x}.}
159 |
160 | }
161 | }
162 |
163 | \section{GRanges compatibility (rowRanges access)}{
164 |
165 | Many \link{GRanges} and \link{GRangesList} operations are supported on
166 | RangedSummarizedExperiment objects, using \code{rowRanges}.
167 |
168 | Supported operations include: \code{\link{pcompare}},
169 | \code{\link{duplicated}}, \code{\link{end}}, \code{\link{end<-}},
170 | \code{\link{granges}}, \code{\link{is.unsorted}}, \code{\link{match}},
171 | \code{\link{mcols}}, \code{\link{mcols<-}}, \code{\link{order}},
172 | \code{\link{ranges}}, \code{\link{ranges<-}}, \code{\link{rank}},
173 | \code{\link{seqinfo}}, \code{\link{seqinfo<-}}, \code{\link{seqnames}},
174 | \code{\link{sort}}, \code{\link{start}}, \code{\link{start<-}},
175 | \code{\link{strand}}, \code{\link{strand<-}},
176 | \code{\link{width}}, \code{\link{width<-}}.
177 |
178 | See also \code{?\link[SummarizedExperiment]{shift}},
179 | \code{?\link[SummarizedExperiment]{isDisjoint}},
180 | \code{?\link[SummarizedExperiment]{coverage}},
181 | \code{?\link[SummarizedExperiment]{findOverlaps}}, and
182 | \code{?\link[SummarizedExperiment]{nearest}} for more
183 | \emph{GRanges compatibility methods}.
184 |
185 | Not all \link{GRanges} operations are supported, because
186 | they do not make sense for RangedSummarizedExperiment objects
187 | (e.g., length, name, as.data.frame, c, splitAsList), involve
188 | non-trivial combination or splitting of rows (e.g., disjoin, gaps,
189 | reduce, unique), or have not yet been implemented (Ops, map, window,
190 | window<-).
191 |
192 | }
193 |
194 | \section{Subsetting}{
195 |
196 | In the code snippets below, \code{x} is a RangedSummarizedExperiment object
197 | or derivative (e.g. a \link[SingleCellExperiment]{SingleCellExperiment}
198 | object).
199 |
200 | \describe{
201 |
202 | \item{\code{subset(x, subset, select)}:}{Create a subset of \code{x}
203 | using an expression \code{subset} referring to columns of
204 | \code{rowRanges(x)} (including \sQuote{seqnames}, \sQuote{start},
205 | \sQuote{end}, \sQuote{width}, \sQuote{strand}, and
206 | \code{names(rowData(x))}) and / or \code{select} referring to
207 | column names of \code{colData(x)}.}
208 |
209 | }
210 |
211 | }
212 |
213 | \section{Extension}{
214 |
215 | RangedSummarizedExperiment is implemented as an S4 class, and can be
216 | extended in the usual way, using \code{contains="RangedSummarizedExperiment"}
217 | in the new class definition.
218 |
219 | See the \link[SingleCellExperiment]{SingleCellExperiment} class defined
220 | in the \pkg{SingleCellExperiment} package for an example of such extension.
221 |
222 | }
223 |
224 | \author{Martin Morgan, \url{mtmorgan@fhcrc.org}}
225 |
226 | \seealso{
227 | \itemize{
228 | \item \link{SummarizedExperiment} for the parent class of
229 | RangedSummarizedExperiment and the
230 | RangedSummarizedExperiment/SummarizedExperiment constructor function.
231 |
232 | \item \link[SummarizedExperiment]{shift},
233 | \link[SummarizedExperiment]{isDisjoint},
234 | \link[SummarizedExperiment]{coverage},
235 | \link[SummarizedExperiment]{findOverlaps}, and
236 | \link[SummarizedExperiment]{nearest} for more
237 | \emph{GRanges compatibility methods}.
238 |
239 | \item \link[GenomicRanges]{GRanges} objects in the \pkg{GenomicRanges}
240 | package.
241 |
242 | \item The \link[SingleCellExperiment]{SingleCellExperiment} defined
243 | in the \pkg{SingleCellExperiment} package, a subclass of
244 | RangedSummarizedExperiment specifically designed to represent
245 | single-cell sequencing data.
246 |
247 | }
248 | }
249 |
250 | \examples{
251 | nrows <- 200; ncols <- 6
252 | counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
253 | rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)),
254 | IRanges(floor(runif(200, 1e5, 1e6)), width=100),
255 | strand=sample(c("+", "-"), 200, TRUE),
256 | feature_id=sprintf("ID\%03d", 1:200))
257 | colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3),
258 | row.names=LETTERS[1:6])
259 | rse <- SummarizedExperiment(assays=SimpleList(counts=counts),
260 | rowRanges=rowRanges, colData=colData)
261 | rse
262 | dim(rse)
263 | dimnames(rse)
264 | assayNames(rse)
265 | head(assay(rse))
266 | assays(rse) <- endoapply(assays(rse), asinh)
267 | head(assay(rse))
268 |
269 | rowRanges(rse)
270 | rowData(rse) # same as 'mcols(rowRanges(rse))'
271 | colData(rse)
272 |
273 | rse[ , rse$Treatment == "ChIP"]
274 |
275 | ## cbind() combines objects with the same ranges but different samples:
276 | rse1 <- rse
277 | rse2 <- rse1[ , 1:3]
278 | colnames(rse2) <- letters[1:ncol(rse2)]
279 | cmb1 <- cbind(rse1, rse2)
280 | dim(cmb1)
281 | dimnames(cmb1)
282 |
283 | ## rbind() combines objects with the same samples but different ranges:
284 | rse1 <- rse
285 | rse2 <- rse1[1:50, ]
286 | rownames(rse2) <- letters[1:nrow(rse2)]
287 | cmb2 <- rbind(rse1, rse2)
288 | dim(cmb2)
289 | dimnames(cmb2)
290 |
291 | ## Coercion to/from SummarizedExperiment:
292 | se0 <- as(rse, "SummarizedExperiment")
293 | se0
294 |
295 | as(se0, "RangedSummarizedExperiment")
296 |
297 | ## Setting rowRanges on a SummarizedExperiment object turns it into a
298 | ## RangedSummarizedExperiment object:
299 | se <- se0
300 | rowRanges(se) <- rowRanges
301 | se # RangedSummarizedExperiment
302 |
303 | ## Sanity checks:
304 | stopifnot(identical(assays(se0), assays(rse)))
305 | stopifnot(identical(dim(se0), dim(rse)))
306 | stopifnot(identical(dimnames(se0), dimnames(rse)))
307 | stopifnot(identical(rowData(se0), rowData(rse)))
308 | stopifnot(identical(colData(se0), colData(rse)))
309 | }
310 |
--------------------------------------------------------------------------------
/man/coverage-methods.Rd:
--------------------------------------------------------------------------------
1 | \name{coverage-methods}
2 |
3 | \alias{coverage-methods}
4 |
5 | \alias{coverage}
6 | \alias{coverage,RangedSummarizedExperiment-method}
7 |
8 |
9 | \title{Coverage of a RangedSummarizedExperiment object}
10 |
11 | \description{
12 | This man page documents the \code{coverage} method for
13 | \link{RangedSummarizedExperiment} objects.
14 | }
15 |
16 | \usage{
17 | \S4method{coverage}{RangedSummarizedExperiment}(x, shift=0L, width=NULL, weight=1L,
18 | method=c("auto", "sort", "hash"))
19 | }
20 |
21 | \arguments{
22 | \item{x}{
23 | A \link{RangedSummarizedExperiment} object.
24 | }
25 | \item{shift, width, weight, method}{
26 | See \code{?\link[GenomicRanges]{coverage}} in the \pkg{GenomicRanges}
27 | package.
28 | }
29 | }
30 |
31 | \details{
32 | This method operates on the \code{rowRanges} component of the
33 | \link{RangedSummarizedExperiment} object, which can be a
34 | \link[GenomicRanges]{GenomicRanges} or \link[GenomicRanges]{GRangesList}
35 | object.
36 |
37 | More precisely, on \link{RangedSummarizedExperiment} object \code{x},
38 | \code{coverage(x, ...)} is equivalent to \code{coverage(rowRanges(x), ...)}.
39 |
40 | See \code{?\link[GenomicRanges]{coverage}} in the \pkg{GenomicRanges}
41 | package for the details of how \code{coverage} operates on a
42 | \link[GenomicRanges]{GenomicRanges} or \link[GenomicRanges]{GRangesList}
43 | object.
44 | }
45 |
46 | \value{
47 | See \code{?\link[GenomicRanges]{coverage}} in the \pkg{GenomicRanges}
48 | package.
49 | }
50 |
51 | \seealso{
52 | \itemize{
53 | \item \link{RangedSummarizedExperiment} objects.
54 |
55 | \item The \link[GenomicRanges]{coverage} man page in the
56 | \pkg{GenomicRanges} package where the \code{coverage} methods
57 | for \link[GenomicRanges]{GenomicRanges} and
58 | \link[GenomicRanges]{GRangesList} objects are documented.
59 | }
60 | }
61 |
62 | \examples{
63 | nrows <- 20; ncols <- 6
64 | counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
65 | rowRanges <- GRanges(rep(c("chr1", "chr2"), c(5, 15)),
66 | IRanges(sample(1000L, 20), width=100),
67 | strand=Rle(c("+", "-"), c(12, 8)),
68 | seqlengths=c(chr1=1800, chr2=1300))
69 | colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3),
70 | row.names=LETTERS[1:6])
71 | rse <- SummarizedExperiment(assays=SimpleList(counts=counts),
72 | rowRanges=rowRanges, colData=colData)
73 |
74 | cvg <- coverage(rse)
75 | cvg
76 | stopifnot(identical(cvg, coverage(rowRanges(rse))))
77 | }
78 |
79 | \keyword{methods}
80 | \keyword{utilities}
81 |
--------------------------------------------------------------------------------
/man/findOverlaps-methods.Rd:
--------------------------------------------------------------------------------
1 | \name{findOverlaps-methods}
2 |
3 | \alias{findOverlaps-methods}
4 |
5 | \alias{findOverlaps}
6 | \alias{findOverlaps,RangedSummarizedExperiment,Vector-method}
7 | \alias{findOverlaps,Vector,RangedSummarizedExperiment-method}
8 | \alias{findOverlaps,RangedSummarizedExperiment,RangedSummarizedExperiment-method}
9 |
10 |
11 | \title{Finding overlapping ranges in RangedSummarizedExperiment objects}
12 |
13 | \description{
14 | This man page documents the \code{findOverlaps} methods for
15 | \link{RangedSummarizedExperiment} objects.
16 |
17 | \link{RangedSummarizedExperiment} objects also support
18 | \code{countOverlaps}, \code{overlapsAny}, and \code{subsetByOverlaps}
19 | thanks to the default methods defined in the \pkg{IRanges} package and
20 | to the \code{findOverlaps} methods defined in this package and documented
21 | below.
22 | }
23 |
24 | \usage{
25 | \S4method{findOverlaps}{RangedSummarizedExperiment,Vector}(query, subject,
26 | maxgap=-1L, minoverlap=0L,
27 | type=c("any", "start", "end", "within", "equal"),
28 | select=c("all", "first", "last", "arbitrary"),
29 | ignore.strand=FALSE)
30 | \S4method{findOverlaps}{Vector,RangedSummarizedExperiment}(query, subject,
31 | maxgap=-1L, minoverlap=0L,
32 | type=c("any", "start", "end", "within", "equal"),
33 | select=c("all", "first", "last", "arbitrary"),
34 | ignore.strand=FALSE)
35 | }
36 |
37 | \arguments{
38 | \item{query, subject}{
39 | One of these two arguments must be a \link{RangedSummarizedExperiment}
40 | object.
41 | }
42 | \item{maxgap, minoverlap, type}{
43 | See \code{?\link[GenomicRanges]{findOverlaps}} in the \pkg{GenomicRanges}
44 | package.
45 | }
46 | \item{select, ignore.strand}{
47 | See \code{?\link[GenomicRanges]{findOverlaps}} in the \pkg{GenomicRanges}
48 | package.
49 | }
50 | }
51 |
52 | \details{
53 | These methods operate on the \code{rowRanges} component of the
54 | \link{RangedSummarizedExperiment} object, which can be a
55 | \link[GenomicRanges]{GenomicRanges} or \link[GenomicRanges]{GRangesList}
56 | object.
57 |
58 | More precisely, if any of the above functions is passed a
59 | \link{RangedSummarizedExperiment} object thru the \code{query} and/or
60 | \code{subject} argument, then it behaves as if \code{rowRanges(query)}
61 | and/or \code{rowRanges(subject)} had been passed instead.
62 |
63 | See \code{?\link[GenomicRanges]{findOverlaps}} in the \pkg{GenomicRanges}
64 | package for the details of how \code{findOverlaps} and family operate on
65 | \link[GenomicRanges]{GenomicRanges} and \link[GenomicRanges]{GRangesList}
66 | objects.
67 | }
68 |
69 | \value{
70 | See \code{?\link[GenomicRanges]{findOverlaps}} in the \pkg{GenomicRanges}
71 | package.
72 | }
73 |
74 | \seealso{
75 | \itemize{
76 | \item \link{RangedSummarizedExperiment} objects.
77 |
78 | \item The \link[GenomicRanges]{findOverlaps} man page in the
79 | \pkg{GenomicRanges} package where the \code{findOverlaps} family
80 | of methods for \link[GenomicRanges]{GenomicRanges} and
81 | \link[GenomicRanges]{GRangesList} objects is documented.
82 | }
83 | }
84 |
85 | \examples{
86 | nrows <- 20; ncols <- 6
87 | counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
88 | rowRanges <- GRanges(rep(c("chr1", "chr2"), c(5, 15)),
89 | IRanges(sample(1000L, 20), width=100),
90 | strand=Rle(c("+", "-"), c(12, 8)))
91 | colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3),
92 | row.names=LETTERS[1:6])
93 | rse0 <- SummarizedExperiment(assays=SimpleList(counts=counts),
94 | rowRanges=rowRanges, colData=colData)
95 | rse1 <- shift(rse0, 100)
96 |
97 | hits <- findOverlaps(rse0, rse1)
98 | hits
99 | stopifnot(identical(hits, findOverlaps(rowRanges(rse0), rowRanges(rse1))))
100 | stopifnot(identical(hits, findOverlaps(rse0, rowRanges(rse1))))
101 | stopifnot(identical(hits, findOverlaps(rowRanges(rse0), rse1)))
102 | }
103 |
104 | \keyword{methods}
105 | \keyword{utilities}
106 |
--------------------------------------------------------------------------------
/man/inter-range-methods.Rd:
--------------------------------------------------------------------------------
1 | \name{inter-range-methods}
2 |
3 | \alias{inter-range-methods}
4 |
5 | \alias{isDisjoint}
6 | \alias{isDisjoint,RangedSummarizedExperiment-method}
7 |
8 | \alias{disjointBins}
9 | \alias{disjointBins,RangedSummarizedExperiment-method}
10 |
11 |
12 | \title{Inter range transformations of a RangedSummarizedExperiment object}
13 |
14 | \description{
15 | This man page documents the \emph{inter range transformations} that are
16 | supported on \link{RangedSummarizedExperiment} objects.
17 | }
18 |
19 | \usage{
20 | \S4method{isDisjoint}{RangedSummarizedExperiment}(x, ignore.strand=FALSE)
21 |
22 | \S4method{disjointBins}{RangedSummarizedExperiment}(x, ignore.strand=FALSE)
23 | }
24 |
25 | \arguments{
26 | \item{x}{
27 | A \link{RangedSummarizedExperiment} object.
28 | }
29 | \item{ignore.strand}{
30 | See \code{?\link[GenomicRanges]{isDisjoint}} in the
31 | \pkg{GenomicRanges} package.
32 | }
33 | }
34 |
35 | \details{
36 | These transformations operate on the \code{rowRanges} component of the
37 | \link{RangedSummarizedExperiment} object, which can be a
38 | \link[GenomicRanges]{GenomicRanges} or \link[GenomicRanges]{GRangesList}
39 | object.
40 |
41 | More precisely, any of the above functions performs the following
42 | transformation on \link{RangedSummarizedExperiment} object \code{x}:
43 | \preformatted{ f(rowRanges(x), ...)
44 | }
45 | where \code{f} is the name of the function and \code{...} any additional
46 | arguments passed to it.
47 |
48 | See \code{?\link[GenomicRanges]{isDisjoint}} in the \pkg{GenomicRanges}
49 | package for the details of how these transformations operate on a
50 | \link[GenomicRanges]{GenomicRanges} or \link[GenomicRanges]{GRangesList}
51 | object.
52 | }
53 |
54 | \value{
55 | See \code{?\link[GenomicRanges]{isDisjoint}} in the
56 | \pkg{GenomicRanges} package.
57 | }
58 |
59 | \seealso{
60 | \itemize{
61 | \item \link{RangedSummarizedExperiment} objects.
62 |
63 | \item The \link[GenomicRanges]{isDisjoint} man page in the
64 | \pkg{GenomicRanges} package where \emph{inter range transformations}
65 | of a \link[GenomicRanges]{GenomicRanges} or
66 | \link[GenomicRanges]{GRangesList} object are documented.
67 | }
68 | }
69 |
70 | \examples{
71 | nrows <- 20; ncols <- 6
72 | counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
73 | rowRanges <- GRanges(rep(c("chr1", "chr2"), c(5, 15)),
74 | IRanges(sample(1000L, 20), width=100),
75 | strand=Rle(c("+", "-"), c(12, 8)))
76 | colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3),
77 | row.names=LETTERS[1:6])
78 | rse0 <- SummarizedExperiment(assays=SimpleList(counts=counts),
79 | rowRanges=rowRanges, colData=colData)
80 | rse1 <- shift(rse0, 99*start(rse0))
81 |
82 | isDisjoint(rse0) # FALSE
83 | isDisjoint(rse1) # TRUE
84 |
85 | bins0 <- disjointBins(rse0)
86 | bins0
87 | stopifnot(identical(bins0, disjointBins(rowRanges(rse0))))
88 |
89 | bins1 <- disjointBins(rse1)
90 | bins1
91 | stopifnot(all(bins1 == bins1[1]))
92 | }
93 |
94 | \keyword{methods}
95 | \keyword{utilities}
96 |
--------------------------------------------------------------------------------
/man/intra-range-methods.Rd:
--------------------------------------------------------------------------------
1 | \name{intra-range-methods}
2 |
3 | \alias{intra-range-methods}
4 |
5 | \alias{shift}
6 | \alias{shift,RangedSummarizedExperiment-method}
7 |
8 | \alias{narrow}
9 | \alias{narrow,RangedSummarizedExperiment-method}
10 |
11 | \alias{resize}
12 | \alias{resize,RangedSummarizedExperiment-method}
13 |
14 | \alias{flank}
15 | \alias{flank,RangedSummarizedExperiment-method}
16 |
17 | \alias{promoters}
18 | \alias{terminators}
19 | \alias{promoters,RangedSummarizedExperiment-method}
20 | \alias{terminators,RangedSummarizedExperiment-method}
21 |
22 | \alias{restrict}
23 | \alias{restrict,RangedSummarizedExperiment-method}
24 |
25 | \alias{trim,RangedSummarizedExperiment-method}
26 |
27 |
28 | \title{Intra range transformations of a RangedSummarizedExperiment object}
29 |
30 | \description{
31 | This man page documents the \emph{intra range transformations} that are
32 | supported on \link{RangedSummarizedExperiment} objects.
33 | }
34 |
35 | \usage{
36 | \S4method{shift}{RangedSummarizedExperiment}(x, shift=0L, use.names=TRUE)
37 |
38 | \S4method{narrow}{RangedSummarizedExperiment}(x, start=NA, end=NA, width=NA, use.names=TRUE)
39 |
40 | \S4method{resize}{RangedSummarizedExperiment}(x, width, fix="start", use.names=TRUE,
41 | ignore.strand=FALSE)
42 |
43 | \S4method{flank}{RangedSummarizedExperiment}(x, width, start=TRUE, both=FALSE,
44 | use.names=TRUE, ignore.strand=FALSE)
45 |
46 | \S4method{promoters}{RangedSummarizedExperiment}(x, upstream=2000, downstream=200)
47 | \S4method{terminators}{RangedSummarizedExperiment}(x, upstream=2000, downstream=200)
48 |
49 | \S4method{restrict}{RangedSummarizedExperiment}(x, start=NA, end=NA, keep.all.ranges=FALSE,
50 | use.names=TRUE)
51 |
52 | \S4method{trim}{RangedSummarizedExperiment}(x, use.names=TRUE)
53 | }
54 |
55 | \arguments{
56 | \item{x}{
57 | A \link{RangedSummarizedExperiment} object.
58 | }
59 | \item{shift, use.names, start, end, width, fix, ignore.strand, both,
60 | upstream, downstream, keep.all.ranges}{
61 | See \code{?GenomicRanges::\link[GenomicRanges]{shift}} in the
62 | \pkg{GenomicRanges} package.
63 | }
64 | }
65 |
66 | \details{
67 | These transformations operate on the \code{rowRanges} component of the
68 | \link{RangedSummarizedExperiment} object, which can be a
69 | \link[GenomicRanges]{GenomicRanges} or \link[GenomicRanges]{GRangesList}
70 | object.
71 |
72 | More precisely, any of the above functions performs the following
73 | transformation on \link{RangedSummarizedExperiment} object \code{x}:
74 | \preformatted{ rowRanges(x) <- f(rowRanges(x), ...)
75 | }
76 | where \code{f} is the name of the function and \code{...} any additional
77 | arguments passed to it.
78 |
79 | See \code{?GenomicRanges::\link[GenomicRanges]{shift}} in the
80 | \pkg{GenomicRanges} package for the details of how these transformations
81 | operate on a \link[GenomicRanges]{GenomicRanges} or
82 | \link[GenomicRanges]{GRangesList}
83 | object.
84 | }
85 |
86 | \seealso{
87 | \itemize{
88 | \item \link{RangedSummarizedExperiment} objects.
89 |
90 | \item The \code{\link[GenomicRanges]{shift}} man page in the
91 | \pkg{GenomicRanges} package where \emph{intra range transformations}
92 | of a \link[GenomicRanges]{GenomicRanges} or
93 | \link[GenomicRanges]{GRangesList} object are documented.
94 | }
95 | }
96 |
97 | \examples{
98 | nrows <- 20; ncols <- 6
99 | counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
100 | rowRanges <- GRanges(rep(c("chr1", "chr2"), c(5, 15)),
101 | IRanges(sample(1000L, 20), width=100),
102 | strand=Rle(c("+", "-"), c(12, 8)))
103 | colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3),
104 | row.names=LETTERS[1:6])
105 | rse0 <- SummarizedExperiment(assays=SimpleList(counts=counts),
106 | rowRanges=rowRanges, colData=colData)
107 |
108 | rse1 <- shift(rse0, 1)
109 | stopifnot(identical(
110 | rowRanges(rse1),
111 | shift(rowRanges(rse0), 1)
112 | ))
113 |
114 | se2 <- narrow(rse0, start=10, end=-15)
115 | stopifnot(identical(
116 | rowRanges(se2),
117 | narrow(rowRanges(rse0), start=10, end=-15)
118 | ))
119 |
120 | se3 <- resize(rse0, width=75)
121 | stopifnot(identical(
122 | rowRanges(se3),
123 | resize(rowRanges(rse0), width=75)
124 | ))
125 |
126 | se4 <- flank(rse0, width=20)
127 | stopifnot(identical(
128 | rowRanges(se4),
129 | flank(rowRanges(rse0), width=20)
130 | ))
131 |
132 | se5 <- promoters(rse0, upstream=85, downstream=50)
133 | stopifnot(identical(
134 | rowRanges(se5),
135 | promoters(rowRanges(rse0), upstream=85, downstream=50)
136 | ))
137 |
138 | se6 <- terminators(rse0, upstream=85, downstream=50)
139 | stopifnot(identical(
140 | rowRanges(se6),
141 | terminators(rowRanges(rse0), upstream=85, downstream=50)
142 | ))
143 |
144 | se7 <- restrict(rse0, start=200, end=700, keep.all.ranges=TRUE)
145 | stopifnot(identical(
146 | rowRanges(se7),
147 | restrict(rowRanges(rse0), start=200, end=700, keep.all.ranges=TRUE)
148 | ))
149 | }
150 |
151 | \keyword{methods}
152 | \keyword{utilities}
153 |
--------------------------------------------------------------------------------
/man/makeSummarizedExperimentFromDataFrame.Rd:
--------------------------------------------------------------------------------
1 | \name{makeSummarizedExperimentFromDataFrame}
2 |
3 | \alias{makeSummarizedExperimentFromDataFrame}
4 |
5 | \title{Make a RangedSummarizedExperiment from a data.frame or DataFrame}
6 |
7 | \description{
8 | \code{makeSummarizedExperimentFromDataFrame} uses \code{data.frame}
9 | or \code{DataFrame} column names to create a \link{GRanges} object for the
10 | \code{rowRanges} of the resulting \link{SummarizedExperiment} object.
11 | It requires that non-range data columns be coercible into a \code{numeric}
12 | \code{matrix} for the \link{SummarizedExperiment} constructor. All columns
13 | that are not part of the row ranges attribute are assumed to be experiment
14 | data; thus, keeping metadata columns will not be supported. Note that this
15 | function only returns \link{SummarizedExperiment} objects with a single
16 | assay.
17 |
18 | If metadata columns are to be kept, one can first construct the row ranges
19 | attribute by using the \link{makeGRangesFromDataFrame} function and
20 | subsequently creating the \link{SummarizedExperiment}.
21 | }
22 |
23 | \usage{
24 | makeSummarizedExperimentFromDataFrame(df,
25 | ...,
26 | seqinfo = NULL,
27 | starts.in.df.are.0based = FALSE)
28 | }
29 | \arguments{
30 | \item{df}{
31 | A data.frame or \link[S4Vectors]{DataFrame} object. If not, then
32 | the function first tries to turn \code{df} into a data frame with
33 | \code{as.data.frame(df)}.
34 | }
35 | \item{...}{
36 | Additional arguments passed on to \link{makeGRangesFromDataFrame}
37 | }
38 | \item{seqinfo}{
39 | Either \code{NULL}, or a \link{Seqinfo} object, or a character vector
40 | of seqlevels, or a named numeric vector of sequence lengths.
41 | When not \code{NULL}, it must be compatible with the genomic ranges
42 | in \code{df} i.e. it must include at least the sequence levels
43 | represented in \code{df}.
44 | }
45 | \item{starts.in.df.are.0based}{
46 | \code{TRUE} or \code{FALSE} (the default).
47 | If \code{TRUE}, then the start positions of the genomic ranges in
48 | \code{df} are considered to be \emph{0-based} and are converted to
49 | \emph{1-based} in the returned \link{GRanges} object.
50 | This feature is intended to make it more convenient to handle input
51 | that contains data obtained from resources using the "0-based
52 | start" convention. A notorious example of such resource is the UCSC
53 | Table Browser (\url{http://genome.ucsc.edu/cgi-bin/hgTables}).
54 | }
55 | }
56 | \value{
57 | A \link{RangedSummarizedExperiment} object with rowRanges and a single assay
58 | }
59 | \author{
60 | M. Ramos
61 | }
62 | \seealso{
63 | \itemize{
64 | \item \link{makeGRangesFromDataFrame}
65 | }
66 | }
67 |
68 | \examples{
69 | ## ---------------------------------------------------------------------
70 | ## BASIC EXAMPLES
71 | ## ---------------------------------------------------------------------
72 |
73 | # Note that rownames of the data.frame are also rownames of the result
74 | df <- data.frame(chr="chr2", start = 11:15, end = 12:16,
75 | strand = c("+", "-", "+", "*", "."), expr0 = 3:7,
76 | expr1 = 8:12, expr2 = 12:16,
77 | row.names = paste0("GENE", letters[5:1]))
78 | df
79 |
80 | exRSE <- makeSummarizedExperimentFromDataFrame(df)
81 |
82 | exRSE
83 |
84 | assay(exRSE)
85 |
86 | rowRanges(exRSE)
87 | }
88 |
--------------------------------------------------------------------------------
/man/makeSummarizedExperimentFromExpressionSet.Rd:
--------------------------------------------------------------------------------
1 | \name{makeSummarizedExperimentFromExpressionSet}
2 |
3 | \alias{makeSummarizedExperimentFromExpressionSet}
4 | \alias{naiveRangeMapper}
5 | \alias{probeRangeMapper}
6 | \alias{geneRangeMapper}
7 | \alias{coerce,ExpressionSet,RangedSummarizedExperiment-method}
8 | \alias{coerce,ExpressionSet,SummarizedExperiment-method}
9 | \alias{coerce,RangedSummarizedExperiment,ExpressionSet-method}
10 | \alias{coerce,SummarizedExperiment,ExpressionSet-method}
11 |
12 |
13 | \title{Make a RangedSummarizedExperiment object from an ExpressionSet and
14 | vice-versa}
15 |
16 | \description{
17 | Coercion between \link{RangedSummarizedExperiment} and
18 | \link[Biobase]{ExpressionSet} is supported in both directions.
19 |
20 | For going from \link[Biobase]{ExpressionSet} to
21 | \link{RangedSummarizedExperiment}, the
22 | \code{makeSummarizedExperimentFromExpressionSet} function is also
23 | provided to let the user control how to map features to ranges.
24 | }
25 |
26 | \usage{
27 | makeSummarizedExperimentFromExpressionSet(from,
28 | mapFun=naiveRangeMapper,
29 | ...)
30 |
31 | ## range mapping functions
32 | naiveRangeMapper(from)
33 | probeRangeMapper(from)
34 | geneRangeMapper(txDbPackage, key = "ENTREZID")
35 | }
36 |
37 | \arguments{
38 | \item{from}{
39 | An \link[Biobase]{ExpressionSet} object.
40 | }
41 | \item{mapFun}{
42 | A function which takes an \link[Biobase]{ExpressionSet} object and
43 | returns a \link{GRanges}, or \link{GRangesList} object which
44 | corresponds to the genomic ranges used in the ExpressionSet. The
45 | \link[base]{rownames} of the returned \link[GenomicRanges]{GRanges}
46 | are used to match the \link[Biobase]{featureNames} of the
47 | \link[Biobase]{ExpressionSet}.
48 |
49 | The \code{naiveRangeMapper} function is used by default.
50 | }
51 | \item{...}{
52 | Additional arguments passed to \code{mapFun}.
53 | }
54 | \item{txDbPackage}{
55 | A character string with the Transcript Database to use for the mapping.
56 | }
57 | \item{key}{
58 | A character string with the Gene key to use for the mapping.
59 | }
60 | }
61 |
62 | \value{
63 | \code{makeSummarizedExperimentFromExpressionSet} takes an
64 | \link[Biobase]{ExpressionSet} object as input and a \emph{range mapping
65 | function} that maps the features to ranges. It then returns a
66 | \link{RangedSummarizedExperiment} object that corresponds to the input.
67 |
68 | The range mapping functions return a \link{GRanges} object, with the
69 | \code{rownames} corresponding to the \link[Biobase]{featureNames} of
70 | the \link[Biobase]{ExpressionSet} object.
71 | }
72 |
73 | \author{Jim Hester, \url{james.f.hester@gmail.com}}
74 |
75 | \seealso{
76 | \itemize{
77 | \item \link{RangedSummarizedExperiment} objects.
78 |
79 | \item \link[Biobase]{ExpressionSet} objects in the \pkg{Biobase} package.
80 |
81 | \item \link[GenomicFeatures]{TxDb} objects in the \pkg{GenomicFeatures}
82 | package.
83 | }
84 | }
85 |
86 | \examples{
87 | ## ---------------------------------------------------------------------
88 | ## GOING FROM ExpressionSet TO SummarizedExperiment
89 | ## ---------------------------------------------------------------------
90 |
91 | data(sample.ExpressionSet, package="Biobase")
92 |
93 | # naive coercion
94 | makeSummarizedExperimentFromExpressionSet(sample.ExpressionSet)
95 | as(sample.ExpressionSet, "RangedSummarizedExperiment")
96 | as(sample.ExpressionSet, "SummarizedExperiment")
97 |
98 | # using probe range mapper
99 | makeSummarizedExperimentFromExpressionSet(sample.ExpressionSet, probeRangeMapper)
100 |
101 | # using the gene range mapper
102 | se <- makeSummarizedExperimentFromExpressionSet(
103 | sample.ExpressionSet,
104 | geneRangeMapper("TxDb.Hsapiens.UCSC.hg19.knownGene")
105 | )
106 | se
107 | rowData(se) # duplicate row names
108 |
109 | ## ---------------------------------------------------------------------
110 | ## GOING FROM SummarizedExperiment TO ExpressionSet
111 | ## ---------------------------------------------------------------------
112 |
113 | example(RangedSummarizedExperiment) # to create 'rse'
114 | rse
115 | as(rse, "ExpressionSet")
116 | }
117 |
118 | \keyword{manip}
119 |
--------------------------------------------------------------------------------
/man/makeSummarizedExperimentFromLoom.Rd:
--------------------------------------------------------------------------------
1 | \name{makeSummarizedExperimentFromLoom}
2 |
3 | \alias{makeSummarizedExperimentFromLoom}
4 |
5 | \title{Make a SummarizedExperiment from a '.loom' hdf5 file}
6 |
7 | \description{
8 | \code{makeSummarizedExperimentFromLoom} represents a '.loom' file as
9 | a \code{SummarizedExperiment}. The \code{'/matrix'} and
10 | \code{'/layers'} are represented as \code{HDF5Array} objects; row
11 | and column attributes are parsed to \code{DataFrame}. Optionally,
12 | row or column attributes can be specified as row and and column
13 | names.
14 | }
15 |
16 | \usage{
17 | makeSummarizedExperimentFromLoom(file,
18 | rownames_attr = NULL,
19 | colnames_attr = NULL)
20 | }
21 | \arguments{
22 | \item{file}{
23 | The path (as a single character string) to the HDF5 file where
24 | the dataset is located.
25 | }
26 | \item{rownames_attr}{
27 | The name of the row attribute to be used as row names.
28 | }
29 | \item{colnames_attr}{
30 | The name of the column attribute to be used as column names.
31 | }
32 | }
33 | \value{
34 | A \link{SummarizedExperiment} object with row and column data and
35 | one or more assays.
36 | }
37 | \author{
38 | Martin Morgan
39 | }
40 | \seealso{
41 | \url{http://loompy.org/loompy-docs/format/index.html} for a
42 | specification of the .loom format.
43 | }
44 | \examples{
45 | ## ---------------------------------------------------------------------
46 | ## BASIC EXAMPLE
47 | ## ---------------------------------------------------------------------
48 |
49 | file <- system.file(
50 | package="SummarizedExperiment", "extdata", "example.loom"
51 | )
52 | se <- makeSummarizedExperimentFromLoom(file)
53 | se
54 | assay(se)
55 | metadata(se)
56 | }
57 |
--------------------------------------------------------------------------------
/man/nearest-methods.Rd:
--------------------------------------------------------------------------------
1 | \name{nearest-methods}
2 |
3 | \alias{nearest-methods}
4 |
5 | \alias{precede}
6 | \alias{precede,RangedSummarizedExperiment,ANY-method}
7 | \alias{precede,ANY,RangedSummarizedExperiment-method}
8 | \alias{precede,RangedSummarizedExperiment,RangedSummarizedExperiment-method}
9 |
10 | \alias{follow}
11 | \alias{follow,RangedSummarizedExperiment,ANY-method}
12 | \alias{follow,ANY,RangedSummarizedExperiment-method}
13 | \alias{follow,RangedSummarizedExperiment,RangedSummarizedExperiment-method}
14 |
15 | \alias{nearest}
16 | \alias{nearest,RangedSummarizedExperiment,ANY-method}
17 | \alias{nearest,ANY,RangedSummarizedExperiment-method}
18 | \alias{nearest,RangedSummarizedExperiment,RangedSummarizedExperiment-method}
19 |
20 | \alias{distance}
21 | \alias{distance,RangedSummarizedExperiment,ANY-method}
22 | \alias{distance,ANY,RangedSummarizedExperiment-method}
23 | \alias{distance,RangedSummarizedExperiment,RangedSummarizedExperiment-method}
24 |
25 | \alias{distanceToNearest}
26 | \alias{distanceToNearest,RangedSummarizedExperiment,ANY-method}
27 | \alias{distanceToNearest,ANY,RangedSummarizedExperiment-method}
28 | \alias{distanceToNearest,RangedSummarizedExperiment,RangedSummarizedExperiment-method}
29 |
30 |
31 | \title{Finding the nearest range neighbor in RangedSummarizedExperiment objects}
32 |
33 | \description{
34 | This man page documents the \code{nearest} methods and family (i.e.
35 | \code{precede}, \code{follow}, \code{distance}, and \code{distanceToNearest}
36 | methods) for \link{RangedSummarizedExperiment} objects.
37 | }
38 |
39 | \usage{
40 | \S4method{precede}{RangedSummarizedExperiment,ANY}(x, subject, select=c("arbitrary", "all"),
41 | ignore.strand=FALSE)
42 | \S4method{precede}{ANY,RangedSummarizedExperiment}(x, subject, select=c("arbitrary", "all"),
43 | ignore.strand=FALSE)
44 |
45 | \S4method{follow}{RangedSummarizedExperiment,ANY}(x, subject, select=c("arbitrary", "all"),
46 | ignore.strand=FALSE)
47 | \S4method{follow}{ANY,RangedSummarizedExperiment}(x, subject, select=c("arbitrary", "all"),
48 | ignore.strand=FALSE)
49 |
50 | \S4method{nearest}{RangedSummarizedExperiment,ANY}(x, subject, select=c("arbitrary", "all"), ignore.strand=FALSE)
51 | \S4method{nearest}{ANY,RangedSummarizedExperiment}(x, subject, select=c("arbitrary", "all"), ignore.strand=FALSE)
52 |
53 | \S4method{distance}{RangedSummarizedExperiment,ANY}(x, y, ignore.strand=FALSE, ...)
54 | \S4method{distance}{ANY,RangedSummarizedExperiment}(x, y, ignore.strand=FALSE, ...)
55 |
56 | \S4method{distanceToNearest}{RangedSummarizedExperiment,ANY}(x, subject, ignore.strand=FALSE, ...)
57 | \S4method{distanceToNearest}{ANY,RangedSummarizedExperiment}(x, subject, ignore.strand=FALSE, ...)
58 | }
59 |
60 | \arguments{
61 | \item{x, subject}{
62 | One of these two arguments must be a \link{RangedSummarizedExperiment}
63 | object.
64 | }
65 | \item{select, ignore.strand}{
66 | See \code{?\link[GenomicRanges]{nearest}} in the \pkg{GenomicRanges}
67 | package.
68 | }
69 | \item{y}{
70 | For the \code{distance} methods, one of \code{x} or \code{y} must be a
71 | \link{RangedSummarizedExperiment} object.
72 | }
73 | \item{...}{Additional arguments for methods.}
74 | }
75 |
76 | \details{
77 | These methods operate on the \code{rowRanges} component of the
78 | \link{RangedSummarizedExperiment} object, which can be a
79 | \link[GenomicRanges]{GenomicRanges} or \link[GenomicRanges]{GRangesList}
80 | object.
81 |
82 | More precisely, if any of the above functions is passed a
83 | \link{RangedSummarizedExperiment} object thru the \code{x}, \code{subject},
84 | and/or \code{y} argument, then it behaves as if \code{rowRanges(x)},
85 | \code{rowRanges(subject)}, and/or \code{rowRanges(y)} had been passed
86 | instead.
87 |
88 | See \code{?\link[GenomicRanges]{nearest}} in the \pkg{GenomicRanges}
89 | package for the details of how \code{nearest} and family operate on
90 | \link[GenomicRanges]{GenomicRanges} and \link[GenomicRanges]{GRangesList}
91 | objects.
92 | }
93 |
94 | \value{
95 | See \code{?\link[GenomicRanges]{nearest}} in the \pkg{GenomicRanges}
96 | package.
97 | }
98 |
99 | \seealso{
100 | \itemize{
101 | \item \link{RangedSummarizedExperiment} objects.
102 |
103 | \item The \link[GenomicRanges]{nearest} man page in the
104 | \pkg{GenomicRanges} package where the \code{nearest} family
105 | of methods for \link[GenomicRanges]{GenomicRanges} and
106 | \link[GenomicRanges]{GRangesList} objects is documented.
107 | }
108 | }
109 |
110 | \examples{
111 | nrows <- 20; ncols <- 6
112 | counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
113 | rowRanges <- GRanges(rep(c("chr1", "chr2"), c(5, 15)),
114 | IRanges(sample(1000L, 20), width=100),
115 | strand=Rle(c("+", "-"), c(12, 8)))
116 | colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3),
117 | row.names=LETTERS[1:6])
118 | rse0 <- SummarizedExperiment(assays=SimpleList(counts=counts),
119 | rowRanges=rowRanges, colData=colData)
120 | rse1 <- shift(rse0, 100)
121 |
122 | res <- nearest(rse0, rse1)
123 | res
124 | stopifnot(identical(res, nearest(rowRanges(rse0), rowRanges(rse1))))
125 | stopifnot(identical(res, nearest(rse0, rowRanges(rse1))))
126 | stopifnot(identical(res, nearest(rowRanges(rse0), rse1)))
127 |
128 | res <- nearest(rse0) # missing subject
129 | res
130 | stopifnot(identical(res, nearest(rowRanges(rse0))))
131 |
132 | hits <- nearest(rse0, rse1, select="all")
133 | hits
134 | stopifnot(identical(
135 | hits,
136 | nearest(rowRanges(rse0), rowRanges(rse1), select="all")
137 | ))
138 | stopifnot(identical(
139 | hits,
140 | nearest(rse0, rowRanges(rse1), select="all")
141 | ))
142 | stopifnot(identical(
143 | hits,
144 | nearest(rowRanges(rse0), rse1, select="all")
145 | ))
146 | }
147 |
148 | \keyword{methods}
149 | \keyword{utilities}
150 |
--------------------------------------------------------------------------------
/tests/run_unitTests.R:
--------------------------------------------------------------------------------
1 | require("SummarizedExperiment") || stop("unable to load SummarizedExperiment package")
2 | SummarizedExperiment:::.test()
3 |
--------------------------------------------------------------------------------
/vignettes/.install_extras:
--------------------------------------------------------------------------------
1 | SE.svg
2 |
--------------------------------------------------------------------------------
/vignettes/SummarizedExperiment.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "_SummarizedExperiment_ for Coordinating Experimental Assays, Samples, and Regions of Interest"
3 | author: "Martin Morgan, Valerie Obenchain, Jim Hester, Hervé Pagès"
4 | date: "Revised: 5 Jan, 2023"
5 | output:
6 | BiocStyle::html_document:
7 | toc: true
8 | vignette: >
9 | %\VignetteIndexEntry{1. SummarizedExperiment for Coordinating Experimental Assays, Samples, and Regions of Interest}
10 | %\VignetteEngine{knitr::rmarkdown}
11 | \usepackage[utf8]{inputenc}
12 | ---
13 |
14 | ```{r style, echo=FALSE, results='asis'}
15 | BiocStyle::markdown()
16 | ```
17 |
18 |
19 | # Introduction
20 |
21 | The `SummarizedExperiment` class is used to store rectangular matrices of
22 | experimental results, which are commonly produced by sequencing and microarray
23 | experiments. Note that `SummarizedExperiment` can simultaneously manage several
24 | experimental results or `assays` as long as they be of the same dimensions.
25 |
26 | Each object stores observations of one or more samples, along
27 | with additional meta-data describing both the observations (features) and
28 | samples (phenotypes).
29 |
30 | A key aspect of the `SummarizedExperiment` class is the coordination of the
31 | meta-data and assays when subsetting. For example, if you want to exclude a
32 | given sample you can do for both the meta-data and assay in one operation,
33 | which ensures the meta-data and observed data will remain in sync. Improperly
34 | accounting for meta and observational data has resulted in a number of
35 | incorrect results and retractions so this is a very desirable
36 | property.
37 |
38 | `SummarizedExperiment` is in many ways similar to the historical
39 | `ExpressionSet`, the main distinction being that `SummarizedExperiment` is more
40 | flexible in it's row information, allowing both `GRanges` based as well as those
41 | described by arbitrary `DataFrame`s. This makes it ideally suited to a variety
42 | of experiments, particularly sequencing based experiments such as RNA-Seq and
43 | ChIp-Seq.
44 |
45 | # Anatomy of a `SummarizedExperiment`
46 |
47 | The _SummarizedExperiment_ package contains two classes:
48 | `SummarizedExperiment` and `RangedSummarizedExperiment`.
49 |
50 | `SummarizedExperiment` is a matrix-like container where rows represent features
51 | of interest (e.g. genes, transcripts, exons, etc.) and columns represent
52 | samples. The objects contain one or more assays, each represented by a
53 | matrix-like object of numeric or other mode. The rows of a
54 | `SummarizedExperiment` object represent features of interest. Information
55 | about these features is stored in a `DataFrame` object, accessible using the
56 | function `rowData()`. Each row of the `DataFrame` provides information on the
57 | feature in the corresponding row of the `SummarizedExperiment` object. Columns
58 | of the DataFrame represent different attributes of the features of interest,
59 | e.g., gene or transcript IDs, etc.
60 |
61 | `RangedSummarizedExperiment` is the child of the `SummarizedExperiment` class
62 | which means that all the methods on `SummarizedExperiment` also work on a
63 | `RangedSummarizedExperiment`.
64 |
65 | The fundamental difference between the two classes is that the rows of a
66 | `RangedSummarizedExperiment` object represent genomic ranges of interest
67 | instead of a `DataFrame` of features. The `RangedSummarizedExperiment` ranges
68 | are described by a `GRanges` or a `GRangesList` object, accessible using the
69 | `rowRanges()` function.
70 |
71 | The following graphic displays the class geometry and highlights the
72 | vertical (column) and horizontal (row) relationships.
73 |
74 | 
75 |
76 | ## Assays
77 |
78 | The `airway` package contains an example dataset from an RNA-Seq experiment of
79 | read counts per gene for airway smooth muscles. These data are stored
80 | in a `RangedSummarizedExperiment` object which contains 8 different
81 | experimental and assays 64,102 gene transcripts.
82 |
83 | ```{r, echo=FALSE}
84 | suppressPackageStartupMessages(library(SummarizedExperiment))
85 | suppressPackageStartupMessages(data(airway, package="airway"))
86 | ```
87 |
88 | ```{r}
89 | library(SummarizedExperiment)
90 | data(airway, package="airway")
91 | se <- airway
92 | se
93 | ```
94 |
95 | To retrieve the experiment data from a `SummarizedExperiment` object one can
96 | use the `assays()` accessor. An object can have multiple assay datasets
97 | each of which can be accessed using the `$` operator.
98 | The `airway` dataset contains only one assay (`counts`). Here each row
99 | represents a gene transcript and each column one of the samples.
100 |
101 | ```{r assays, eval = FALSE}
102 | assays(se)$counts
103 | ```
104 |
105 | ```{r assays_table, echo = FALSE}
106 | knitr::kable(assays(se)$counts[1:10,])
107 | ```
108 |
109 | ## 'Row' (regions-of-interest) data
110 | The `rowRanges()` accessor is used to view the range information for a
111 | `RangedSummarizedExperiment`. (Note if this were the parent
112 | `SummarizedExperiment` class we'd use `rowData()`). The data are stored in a
113 | `GRangesList` object, where each list element corresponds to one gene
114 | transcript and the ranges in each `GRanges` correspond to the exons in the
115 | transcript.
116 |
117 | ```{r rowRanges}
118 | rowRanges(se)
119 | ```
120 |
121 | ## 'Column' (sample) data
122 |
123 | Sample meta-data describing the samples can be accessed using `colData()`, and
124 | is a `DataFrame` that can store any number of descriptive columns for each
125 | sample row.
126 |
127 | ```{r colData}
128 | colData(se)
129 | ```
130 |
131 | This sample metadata can be accessed using the `$` accessor which makes it
132 | easy to subset the entire object by a given phenotype.
133 |
134 | ```{r columnSubset}
135 | # subset for only those samples treated with dexamethasone
136 | se[, se$dex == "trt"]
137 | ```
138 |
139 | ## Experiment-wide metadata
140 |
141 | Meta-data describing the experimental methods and publication references can be
142 | accessed using `metadata()`.
143 |
144 | ```{r metadata}
145 | metadata(se)
146 | ```
147 |
148 | Note that `metadata()` is just a simple list, so it is appropriate for _any_
149 | experiment wide metadata the user wishes to save, such as storing model
150 | formulas.
151 |
152 | ```{r metadata-formula}
153 | metadata(se)$formula <- counts ~ dex + albut
154 |
155 | metadata(se)
156 | ```
157 |
158 | # Constructing a `SummarizedExperiment`
159 |
160 | Often, `SummarizedExperiment` or `RangedSummarizedExperiment` objects are
161 | returned by functions written by other packages. However it is possible to
162 | create them by hand with a call to the `SummarizedExperiment()` constructor.
163 |
164 | Constructing a `RangedSummarizedExperiment` with a `GRanges` as the
165 | _rowRanges_ argument:
166 |
167 | ```{r constructRSE}
168 | nrows <- 200
169 | ncols <- 6
170 | counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
171 | rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)),
172 | IRanges(floor(runif(200, 1e5, 1e6)), width=100),
173 | strand=sample(c("+", "-"), 200, TRUE),
174 | feature_id=sprintf("ID%03d", 1:200))
175 | colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3),
176 | row.names=LETTERS[1:6])
177 |
178 | SummarizedExperiment(assays=list(counts=counts),
179 | rowRanges=rowRanges, colData=colData)
180 | ```
181 |
182 | A `SummarizedExperiment` can be constructed with or without supplying
183 | a `DataFrame` for the _rowData_ argument:
184 |
185 | ```{r constructSE}
186 | SummarizedExperiment(assays=list(counts=counts), colData=colData)
187 | ```
188 |
189 | # Top-level dimnames vs assay-level dimnames
190 |
191 | In addition to the dimnames that are set on a `SummarizedExperiment` object
192 | itself, the individual assays that are stored in the object can have their
193 | own dimnames or not:
194 |
195 | ```{r construct_se3}
196 | a1 <- matrix(runif(24), ncol=6, dimnames=list(letters[1:4], LETTERS[1:6]))
197 | a2 <- matrix(rpois(24, 0.8), ncol=6)
198 | a3 <- matrix(101:124, ncol=6, dimnames=list(NULL, LETTERS[1:6]))
199 | se3 <- SummarizedExperiment(SimpleList(a1, a2, a3))
200 | ```
201 |
202 | The dimnames of the `SummarizedExperiment` object (top-level dimnames):
203 |
204 | ```{r top_level_dimnames}
205 | dimnames(se3)
206 | ```
207 |
208 | When extracting assays from the object, the top-level dimnames are put on
209 | them by default:
210 |
211 | ```{r top_level_dimnames_are_propagated}
212 | assay(se3, 2) # this is 'a2', but with the top-level dimnames on it
213 |
214 | assay(se3, 3) # this is 'a3', but with the top-level dimnames on it
215 | ```
216 |
217 | However if using `withDimnames=FALSE` then the assays are returned
218 | _as-is_, i.e. with their original dimnames (this is how they are stored
219 | in the `SummarizedExperiment` object):
220 |
221 | ```{r assay_level_dimnames}
222 | assay(se3, 2, withDimnames=FALSE) # identical to 'a2'
223 |
224 | assay(se3, 3, withDimnames=FALSE) # identical to 'a3'
225 |
226 | rownames(se3) <- strrep(letters[1:4], 3)
227 |
228 | dimnames(se3)
229 |
230 | assay(se3, 1) # this is 'a1', but with the top-level dimnames on it
231 |
232 | assay(se3, 1, withDimnames=FALSE) # identical to 'a1'
233 | ```
234 |
235 | # Common operations on `SummarizedExperiment`
236 |
237 | ## Subsetting
238 |
239 | - `[` Performs two dimensional subsetting, just like subsetting a matrix
240 | or data frame.
241 | ```{r 2d}
242 | # subset the first five transcripts and first three samples
243 | se[1:5, 1:3]
244 | ```
245 | - `$` operates on `colData()` columns, for easy sample extraction.
246 | ```{r colDataExtraction}
247 | se[, se$cell == "N61311"]
248 | ```
249 |
250 | ## Getters and setters
251 |
252 | - `rowRanges()` / (`rowData()`), `colData()`, `metadata()`
253 | ```{r getSet}
254 | counts <- matrix(1:15, 5, 3, dimnames=list(LETTERS[1:5], LETTERS[1:3]))
255 |
256 | dates <- SummarizedExperiment(assays=list(counts=counts),
257 | rowData=DataFrame(month=month.name[1:5], day=1:5))
258 |
259 | # Subset all January assays
260 | dates[rowData(dates)$month == "January", ]
261 | ```
262 |
263 | - `assay()` versus `assays()`
264 | There are two accessor functions for extracting the assay data from a
265 | `SummarizedExperiment` object. `assays()` operates on the entire list of assay
266 | data as a whole, while `assay()` operates on only one assay at a time.
267 | `assay(x, i)` is simply a convenience function which is equivalent to
268 | `assays(x)[[i]]`.
269 |
270 | ```{r assay_assays}
271 | assays(se)
272 |
273 | assays(se)[[1]][1:5, 1:5]
274 |
275 | # assay defaults to the first assay if no i is given
276 | assay(se)[1:5, 1:5]
277 |
278 | assay(se, 1)[1:5, 1:5]
279 | ```
280 |
281 | ## Range-based operations
282 |
283 | - `subsetByOverlaps()`
284 | `SummarizedExperiment` objects support all of the `findOverlaps()` methods and
285 | associated functions. This includes `subsetByOverlaps()`, which makes it easy
286 | to subset a `SummarizedExperiment` object by an interval.
287 |
288 | ```{r overlap}
289 | # Subset for only rows which are in the interval 100,000 to 110,000 of
290 | # chromosome 1
291 | roi <- GRanges(seqnames="1", ranges=100000:1100000)
292 | subsetByOverlaps(se, roi)
293 | ```
294 |
295 | # Interactive visualization
296 |
297 | The `r BiocStyle::Biocpkg("iSEE")` package provides functions for creating an interactive user interface based on the `r BiocStyle::CRANpkg("shiny")` package for exploring data stored in `SummarizedExperiment` objects.
298 | Information stored in standard components of `SummarizedExperiment` objects -- including assay data, and row and column metadata -- are automatically detected and used to populate the interactive multi-panel user interface.
299 | Particular attention is given to the `r BiocStyle::Biocpkg("SingleCellExperiment")` extension of the `SummarizedExperiment` class, with visualization of dimensionality reduction results.
300 |
301 | Extensions to the `r BiocStyle::Biocpkg("iSEE")` package provide support for more context-dependent functionality:
302 |
303 | - `r BiocStyle::Biocpkg("iSEEde")` provides additional panels that facilitate the interactive visualization of differential expression results, including the `DESeqDataSet` extension of `SummarizedExperiment` implemented in `r BiocStyle::Biocpkg("DESeq2")`.
304 | - `r BiocStyle::Biocpkg("iSEEpathways")` provides additional panels for the interactive visualization of pathway analysis results.
305 | - `r BiocStyle::Biocpkg("iSEEhub")` provides functionality to import data sets stored in the Bioconductor `r BiocStyle::Biocpkg("ExperimentHub")`.
306 | - `r BiocStyle::Biocpkg("iSEEhub")` provides functionality to import data sets from custom sources (local and remote).
307 |
308 | # Session information
309 |
310 | ```{r}
311 | sessionInfo()
312 | ```
313 |
--------------------------------------------------------------------------------