├── .Rbuildignore ├── .github └── workflows │ └── r-cmd-check.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R ├── AllClasses.R ├── TileDBArray.R ├── TileDBRealizationSink.R ├── globals.R ├── package.R └── utils.R ├── README.md ├── inst └── NEWS.Rd ├── man ├── TileDBArray-globals.Rd ├── TileDBArray-pkg.Rd ├── TileDBArray.Rd └── TileDBRealizationSink.Rd ├── tests ├── testthat.R └── testthat │ ├── test-read.R │ └── test-write.R └── vignettes └── userguide.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^\.gitignore$ 2 | ^\.github$ 3 | -------------------------------------------------------------------------------- /.github/workflows/r-cmd-check.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | pull_request: 6 | 7 | name: R CMD check 8 | 9 | jobs: 10 | check: 11 | name: R CMD check 12 | runs-on: ubuntu-latest 13 | container: bioconductor/bioconductor_docker:devel 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | 18 | - name: Set directories 19 | run: | 20 | echo "R_PKG_DIR=${R_HOME}/site-library" >> $GITHUB_ENV 21 | 22 | - name: Restore the package directory 23 | uses: actions/cache@v4 24 | with: 25 | path: ${{ env.R_PKG_DIR }} 26 | key: check-packages 27 | 28 | - name: Install dependencies 29 | shell: Rscript {0} 30 | run: | 31 | stuff <- read.dcf("DESCRIPTION") 32 | stuff <- stuff[,intersect(colnames(stuff), c("Imports", "LinkingTo", "Suggests", "Depends"))] 33 | deps <- sub(" .*", "", unlist(strsplit(stuff, ",\\s*"), use.names=FALSE)) 34 | BiocManager::install(deps) 35 | 36 | - name: Build the package 37 | run: R CMD build . 38 | 39 | - name: Check the package 40 | run: | 41 | tarball=$(ls *.tar.gz) 42 | R CMD check --no-manual $tarball 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.swp 4 | *.html 5 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: TileDBArray 2 | Version: 1.15.4 3 | Date: 2024-10-01 4 | Title: Using TileDB as a DelayedArray Backend 5 | Description: Implements a DelayedArray backend for reading and 6 | writing dense or sparse arrays in the TileDB format. The 7 | resulting TileDBArrays are compatible with all Bioconductor 8 | pipelines that can accept DelayedArray instances. 9 | Authors@R: 10 | c( 11 | person("Aaron", "Lun", role=c("aut", "cre"), email="infinite.monkeys.with.keyboards@gmail.com"), 12 | person("Genentech, Inc.", role="cph") 13 | ) 14 | License: MIT + file LICENSE 15 | Depends: 16 | SparseArray (>= 1.5.20), 17 | DelayedArray (>= 0.31.7) 18 | Imports: 19 | methods, 20 | tiledb, 21 | S4Vectors 22 | Suggests: 23 | knitr, 24 | Matrix, 25 | rmarkdown, 26 | BiocStyle, 27 | BiocParallel, 28 | testthat 29 | biocViews: DataRepresentation, Infrastructure, Software 30 | VignetteBuilder: knitr 31 | BugReports: https://github.com/LTLA/TileDBArray 32 | URL: https://github.com/LTLA/TileDBArray 33 | RoxygenNote: 7.3.2 34 | Encoding: UTF-8 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | COPYRIGHT HOLDER: Genentech Inc. 2 | YEAR: 2020 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(TileDBArray) 4 | export(TileDBArraySeed) 5 | export(TileDBRealizationSink) 6 | export(getTileDBAttr) 7 | export(getTileDBCapacity) 8 | export(getTileDBCellOrder) 9 | export(getTileDBContext) 10 | export(getTileDBDimType) 11 | export(getTileDBExtent) 12 | export(getTileDBPath) 13 | export(getTileDBTileOrder) 14 | export(setTileDBAttr) 15 | export(setTileDBCapacity) 16 | export(setTileDBCellOrder) 17 | export(setTileDBContext) 18 | export(setTileDBDimType) 19 | export(setTileDBExtent) 20 | export(setTileDBPath) 21 | export(setTileDBTileOrder) 22 | export(writeTileDBArray) 23 | exportClasses(TileDBArray) 24 | exportClasses(TileDBArraySeed) 25 | exportClasses(TileDBMatrix) 26 | exportClasses(TileDBRealizationSink) 27 | exportMethods(DelayedArray) 28 | exportMethods(chunkdim) 29 | exportMethods(extract_array) 30 | exportMethods(extract_sparse_array) 31 | exportMethods(is_sparse) 32 | exportMethods(matrixClass) 33 | exportMethods(path) 34 | exportMethods(show) 35 | exportMethods(type) 36 | exportMethods(write_block) 37 | import(DelayedArray) 38 | import(methods) 39 | import(tiledb) 40 | importClassesFrom(SparseArray,COO_SparseArray) 41 | importFrom(DelayedArray,path) 42 | importFrom(DelayedArray,start) 43 | importFrom(DelayedArray,width) 44 | importFrom(S4Vectors,isSingleString) 45 | importFrom(S4Vectors,setValidity2) 46 | importFrom(SparseArray,COO_SparseArray) 47 | importFrom(SparseArray,extract_sparse_array) 48 | importFrom(SparseArray,nzcoo) 49 | importFrom(SparseArray,nzdata) 50 | importFrom(SparseArray,nzwhich) 51 | importFrom(methods,show) 52 | -------------------------------------------------------------------------------- /R/AllClasses.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | setClass("TileDBArraySeed", 3 | slots=c( 4 | dim="integer", 5 | dimnames="list", 6 | path="character", 7 | sparse="logical", 8 | attr="character", 9 | type="character", 10 | extent="integer", 11 | offset="integer" 12 | ) 13 | ) 14 | 15 | #' @export 16 | setClass("TileDBArray", contains="DelayedArray", slots=c(seed="TileDBArraySeed")) 17 | 18 | #' @export 19 | setClass("TileDBMatrix", contains="DelayedMatrix", slots=c(seed="TileDBArraySeed")) 20 | 21 | #' @export 22 | setClass("TileDBRealizationSink", 23 | contains="RealizationSink", 24 | slots=c(dim="integer", path="character", sparse="logical", attr="character", type="character", offset="integer") 25 | ) 26 | -------------------------------------------------------------------------------- /R/TileDBArray.R: -------------------------------------------------------------------------------- 1 | #' Delayed TileDB arrays 2 | #' 3 | #' The TileDBArray class provides a \linkS4class{DelayedArray} backend for TileDB arrays (sparse and dense). 4 | #' 5 | #' @section Constructing a TileDBArray: 6 | #' \code{TileDBArray(x, attr)} returns a TileDBArray object given: 7 | #' \itemize{ 8 | #' \item \code{x}, a string containing a URI to a TileDB backend, most typically a path to a directory. 9 | #' \item \code{attr}, a string specifying the attribute to represent in the array. 10 | #' Defaults to the first attribute. 11 | #' } 12 | #' Alternatively, \code{x} can be a TileDBArraySeed object, in which case \code{attr} is ignored. 13 | #' 14 | #' \code{TileDBArraySeed(x, attr)} returns a TileDBArraySeed 15 | #' with the same arguments as described for \code{TileDBArray}. 16 | #' If \code{x} is already a TileDBArraySeed, it is returned 17 | #' directly without further modification. 18 | #' 19 | #' \code{\link{DelayedArray}(x)} returns a TileDBArray object 20 | #' given \code{x}, a TileDBArraySeed. 21 | #' 22 | #' In all cases, two-dimensional arrays will automatically generate a TileDBMatrix, 23 | #' a subclass of the TileDBArray. 24 | #' 25 | #' @section Available operations: 26 | #' \code{\link{extract_array}(x, index)} will return an ordinary array containing values from the TileDBArraySeed \code{x}, 27 | #' subsetted to the indices specified in \code{index}. 28 | #' The latter should be a list of length equal to the number of dimensions in \code{x}, 29 | #' where each entry is an integer vector or \code{NULL} (in which case the entirety of the dimension is used). 30 | #' 31 | #' \code{\link{extract_sparse_array}(x, index)} will return a \linkS4class{COO_SparseArray} 32 | #' representing the subset of \code{x} corresponding to the indices in \code{index}. 33 | #' The latter should be a list of the same structure as described for \code{extract_array}. 34 | #' 35 | #' \code{\link{type}(x)} will return a string containing the type of the TileDBArraySeed object \code{x}. 36 | #' Currently, only \code{"integer"}, \code{"logical"} and \code{"double"}-precision is supported. 37 | #' 38 | #' \code{\link{is_sparse}(x)} will return a logical scalar indicating 39 | #' whether the TileDBArraySeed \code{x} uses a sparse format in the TileDB backend. 40 | #' 41 | #' \code{\link{path}(x)} will return a string containing the path to the TileDB backend directory. 42 | #' 43 | #' \code{\link{chunkdim}(x)} will return an integer vector containing the tile extent in each dimension. 44 | #' This will be used as the chunk dimensions in methods like \code{\link{chunkGrid}}. 45 | #' 46 | #' All of the operations described above are also equally applicable to TileDBArray objects, 47 | #' as their methods simply delegate to those of the TileDBArraySeed. 48 | #' 49 | #' All operations supported by \linkS4class{DelayedArray} objects are 50 | #' also available for TileDBArray objects. 51 | #' 52 | #' @aliases 53 | #' TileDBArraySeed 54 | #' TileDBArraySeed-class 55 | #' TileDBArray 56 | #' TileDBArray-class 57 | #' TileDBMatrix 58 | #' TileDBMatrix-class 59 | #' 60 | #' show,TileDBArraySeed-method 61 | #' is_sparse,TileDBArraySeed-method 62 | #' type,TileDBArraySeed-method 63 | #' extract_array,TileDBArraySeed-method 64 | #' extract_sparse_array,TileDBArraySeed-method 65 | #' DelayedArray,TileDBArraySeed-method 66 | #' path,TileDBArraySeed-method 67 | #' chunkdim,TileDBArraySeed-method 68 | #' matrixClass,TileDBArray-method 69 | #' 70 | #' @author Aaron Lun 71 | #' 72 | #' @examples 73 | #' data <- matrix(rpois(10000, 5), nrow=100, ncol=100) 74 | #' B <- as(data, "TileDBArray") 75 | #' B 76 | #' 77 | #' # Apply typical DelayedArray operations: 78 | #' as.matrix(B[1:10,1:10]) 79 | #' B %*% runif(ncol(B)) 80 | #' 81 | #' # This also works for sparse arrays: 82 | #' sdata <- Matrix::rsparsematrix(nrow=100, ncol=100, density=0.1) 83 | #' C <- as(sdata, "TileDBArray") 84 | #' C 85 | #' 86 | #' @name TileDBArray 87 | NULL 88 | 89 | #' @export 90 | TileDBArraySeed <- function(x, attr) { 91 | if (is(x, "TileDBArraySeed")) { 92 | return(x) 93 | } 94 | 95 | obj <- tiledb_array(x) 96 | on.exit(tiledb_array_close(obj), add=TRUE, after=FALSE) 97 | 98 | s <- schema(obj) 99 | dims <- dimensions(s) 100 | doms <- lapply(dims, domain) 101 | o <- vapply(doms, function(x) x[1L], 0L) 102 | d <- vapply(doms, function(x) diff(x) + 1L, 0L) 103 | e <- vapply(dims, tiledb::tile, 0L) 104 | 105 | a <- attrs(s) 106 | if (missing(attr)) { 107 | attr <- names(a)[1] 108 | } else if (!attr %in% names(a)) { 109 | stop("'attr' not in the TileDB attributes") 110 | } 111 | 112 | my.type <- tiledb_datatype_R_type(datatype(a[[attr]])) 113 | if (!(my.type %in% c("logical", "double", "integer", "character"))) { 114 | stop("'attr' refers to an unsupported type") 115 | } 116 | 117 | opened <- tiledb_array_open(obj, "READ") 118 | on.exit(tiledb_array_close(opened), add=TRUE, after=FALSE) # do I need to do this if we already close it above? I don't know. 119 | rtype <- tiledb_get_metadata(opened, "type") 120 | if (my.type=="integer" && identical(rtype, "logical")) { 121 | my.type <- rtype 122 | } 123 | 124 | dimnames <- tiledb_get_metadata(opened, "dimnames") 125 | if (!is.null(dimnames)) { 126 | dimnames <- .unpack_dimnames(dimnames) 127 | } else { 128 | dimnames <- vector("list", length(d)) 129 | } 130 | 131 | new("TileDBArraySeed", 132 | dim=d, 133 | dimnames=dimnames, 134 | path=x, 135 | sparse=is.sparse(s), 136 | attr=attr, 137 | type=my.type, 138 | extent=e, 139 | offset=o 140 | ) 141 | } 142 | 143 | #' @importFrom S4Vectors setValidity2 144 | setValidity2("TileDBArraySeed", function(object) { 145 | msg <- .common_checks(object) 146 | 147 | d <- dim(object) 148 | o <- object@offset 149 | if (length(o) != length(d)) { 150 | msg <- c(msg, "'offset' must have the same length as 'dim'") 151 | } 152 | 153 | e <- object@extent 154 | if (length(e) != length(d)) { 155 | msg <- c(msg, "'extent' must have the same length as 'dim'") 156 | } 157 | if (!all(e >= 0L)) { 158 | msg <- c(msg, "'extent' must contain non-negative integers") 159 | } 160 | 161 | dn <- dimnames(object) 162 | if (length(dn)!=length(d)) { 163 | msg <- c(msg, "'dimnames' must the same length as 'dim'") 164 | } 165 | if (!all(d==lengths(dn) | vapply(dn, is.null, FALSE))) { 166 | msg <- c(msg, "each 'dimnames' must be NULL or the same length as the corresponding dimension") 167 | } 168 | 169 | if (length(msg)) { 170 | msg 171 | } else { 172 | TRUE 173 | } 174 | }) 175 | 176 | #' @export 177 | #' @importFrom methods show 178 | setMethod("show", "TileDBArraySeed", function(object) { 179 | cat(sprintf("%i x %i TileDBArraySeed object\n", nrow(object), ncol(object))) 180 | }) 181 | 182 | #' @export 183 | setMethod("is_sparse", "TileDBArraySeed", function(x) x@sparse) 184 | 185 | #' @export 186 | setMethod("type", "TileDBArraySeed", function(x) x@type) 187 | 188 | #' @export 189 | setMethod("chunkdim", "TileDBArraySeed", function(x) { 190 | x@extent 191 | }) 192 | 193 | #' @export 194 | #' @importFrom DelayedArray path 195 | setMethod("path", "TileDBArraySeed", function(object, ...) { 196 | object@path 197 | }) 198 | 199 | .compact_ranges <- function(selected, delta=diff(selected)) { 200 | is.not.contig <- which(delta != 1L) 201 | cbind( 202 | selected[c(1L, is.not.contig + 1L)], 203 | selected[c(is.not.contig, length(selected))] 204 | ) 205 | } 206 | 207 | #' @export 208 | setMethod("extract_array", "TileDBArraySeed", function(x, index) { 209 | fill <- switch(type(x), double=0, integer=0L, logical=FALSE) 210 | d2 <- .get_block_dims(x, index) 211 | 212 | # Hack to overcome zero-length indices that cause tiledb to throw. 213 | if (any(d2==0L)) { 214 | return(array(fill, dim=d2)) 215 | } 216 | 217 | raw.output.type <- if (x@sparse) "data.frame" else "array" 218 | obj <- tiledb_array(x@path, attrs=x@attr, query_type="READ", return_as=raw.output.type) 219 | on.exit(tiledb_array_close(obj)) 220 | 221 | ndim <- length(index) 222 | contiguous <- remapping <- expanders <- vector("list", ndim) 223 | any.modified <- FALSE 224 | 225 | for (i in seq_len(ndim)) { 226 | cur.index <- index[[i]] 227 | 228 | if (is.null(cur.index)) { 229 | curd <- x@dim[i] 230 | contiguous[[i]] <- cbind(1L, curd) + x@offset[i] - 1L 231 | 232 | } else { 233 | original <- cur.index + x@offset[i] - 1L 234 | selected <- original 235 | 236 | # Need to account for unsorted or duplicate indices. 237 | modified <- FALSE 238 | if (is.unsorted(selected)) { 239 | modified <- TRUE 240 | selected <- sort(selected) 241 | } 242 | delta <- diff(selected) 243 | is.dup <- delta == 0L 244 | if (any(is.dup)) { 245 | modified <- TRUE 246 | selected <- selected[c(TRUE, !is.dup)] 247 | delta <- diff(selected) 248 | } 249 | 250 | if (modified) { 251 | any.modified <- TRUE 252 | expanders[[i]] <- match(original, selected) 253 | } 254 | if (raw.output.type == "data.frame") { 255 | remapping[[i]] <- selected 256 | } 257 | 258 | contiguous[[i]] <- .compact_ranges(selected, delta) 259 | } 260 | } 261 | 262 | selected_ranges(obj) <- contiguous 263 | output <- obj[] 264 | 265 | if (raw.output.type == "data.frame") { 266 | indices <- matrix(0L, nrow(output), ndim) 267 | quick.dim <- integer(ndim) 268 | for (i in seq_len(ndim)) { 269 | curremap <- remapping[[i]] 270 | if (!is.null(curremap)) { 271 | indices[,i] <- match(output[[i]], curremap) 272 | quick.dim[i] <- length(curremap) 273 | } else { 274 | indices[,i] <- output[[i]] - x@offset[i] + 1L 275 | quick.dim[i] <- x@dim[i] 276 | } 277 | } 278 | extracted <- array(fill, dim=quick.dim) 279 | extracted[indices] <- as(output[[ndim + 1L]], x@type) 280 | } else { 281 | extracted <- output[[x@attr]] 282 | storage.mode(extracted) <- x@type 283 | } 284 | 285 | if (any.modified) { 286 | for (i in seq_along(expanders)) { 287 | if (is.null(expanders[[i]])) { 288 | expanders[[i]] <- substitute() 289 | } 290 | } 291 | expanders$drop <- FALSE 292 | extracted <- do.call(`[`, c(list(extracted), expanders)) 293 | } 294 | extracted 295 | }) 296 | 297 | .get_block_dims <- function(x, index) { 298 | d <- dim(x) 299 | for (i in seq_along(index)) { 300 | if (!is.null(index[[i]])) { 301 | d[i] <- length(index[[i]]) 302 | } 303 | } 304 | d 305 | } 306 | 307 | #' @export 308 | #' @importFrom SparseArray extract_sparse_array COO_SparseArray 309 | setMethod("extract_sparse_array", "TileDBArraySeed", function(x, index) { 310 | d2 <- .get_block_dims(x, index) 311 | if (any(d2==0L)) { 312 | return(COO_SparseArray(d2, nzdata=vector(type(x)))) 313 | } 314 | 315 | obj <- tiledb_array(x@path, attrs=x@attr, query_type="READ", return_as="data.frame") 316 | on.exit(tiledb_array_close(obj)) 317 | 318 | ndim <- length(index) 319 | contiguous <- remapping <- vector("list", ndim) 320 | 321 | for (i in seq_len(ndim)) { 322 | cur.index <- index[[i]] 323 | 324 | if (is.null(cur.index)) { 325 | curd <- x@dim[i] 326 | contiguous[[i]] <- cbind(1L, curd) + x@offset[i] - 1L 327 | 328 | } else { 329 | selected <- cur.index + x@offset[i] - 1L 330 | remapping[[i]] <- selected 331 | 332 | # No need to worry about duplicates here. 333 | if (is.unsorted(selected)) { 334 | o <- order(selected) 335 | selected <- selected[o] 336 | } 337 | 338 | contiguous[[i]] <- .compact_ranges(selected) 339 | } 340 | } 341 | 342 | selected_ranges(obj) <- contiguous 343 | extracted <- obj[] 344 | indices <- matrix(0L, nrow(extracted), ndim) 345 | for (i in seq_len(ndim)) { 346 | curremap <- remapping[[i]] 347 | if (!is.null(curremap)) { 348 | indices[,i] <- match(extracted[[i]], curremap) 349 | } else { 350 | indices[,i] <- extracted[[i]] - x@offset[i] + 1L 351 | } 352 | } 353 | 354 | COO_SparseArray(d2, nzcoo=indices, nzdata=as(extracted[,ndim + 1L], x@type)) 355 | }) 356 | 357 | #' @export 358 | TileDBArray <- function(x, ...) { 359 | DelayedArray(TileDBArraySeed(x, ...)) 360 | } 361 | 362 | #' @export 363 | setMethod("DelayedArray", "TileDBArraySeed", 364 | function(seed) new_DelayedArray(seed, Class="TileDBArray") 365 | ) 366 | 367 | #' @export 368 | setMethod("matrixClass", "TileDBArray", function(x) "TileDBMatrix") 369 | -------------------------------------------------------------------------------- /R/TileDBRealizationSink.R: -------------------------------------------------------------------------------- 1 | #' Write arrays to TileDB 2 | #' 3 | #' Write array data to a TileDB backend via \pkg{DelayedArray}'s \linkS4class{RealizationSink} machinery. 4 | #' 5 | #' @section Writing a TileDBArray: 6 | #' \preformatted{TileDBRealizationSink( 7 | #' dim, 8 | #' dimnames=NULL, 9 | #' type="double", 10 | #' path=getTileDBPath(), 11 | #' attr=getTileDBAttr(), 12 | #' storagetype=NULL, 13 | #' dimtype=getTileDBDimType(), 14 | #' sparse=FALSE, 15 | #' extent=getTileDBExtent(), 16 | #' offset=1L, 17 | #' cellorder=getTileDBCellOrder(), 18 | #' tileorder=getTileDBTileOrder(), 19 | #' capacity=getTileDBCapacity(), 20 | #' context=getTileDBContext() 21 | #' )} 22 | #' returns a TileDBRealizationSink object that can be used to write content to a TileDB backend. 23 | #' It accepts the following arguments: 24 | #' \itemize{ 25 | #' \item \code{dim}, an integer vector (usually of length 2) to specify the array dimensions. 26 | #' \item \code{dimnames}, a list of length equal to \code{dim}, containing character vectors with names for each dimension. 27 | #' Defaults to \code{NULL}, i.e., no dimnames. 28 | #' \item \code{type}, a string specifying the R data type for the newly written array. 29 | #' Currently only \code{"double"}, \code{"integer"} and \code{"logical"} arrays are supported. 30 | #' \item \code{path}, a string containing the location of the new TileDB backend. 31 | #' \item \code{attr}, a string specifying the name of the attribute to store. 32 | #' \item \code{storagetype}, a string specifying the TileDB data type for the attribute, e.g., \code{"UINT8"}, \code{"FLOAT32"}. 33 | #' If \code{NULL}, this is automatically determined from \code{type} using \code{\link{r_to_tiledb_type}}. 34 | #' \item \code{dimtype}, a string specifying the TileDB data type for the dimension. 35 | #' \item \code{sparse}, a logical scalar indicating whether the array should be stored in sparse form. 36 | #' \item \code{extent}, an integer scalar (or vector of length equal to \code{dim}) specifying the tile extent for each dimension. 37 | #' Larger values improve compression at the cost of unnecessary data extraction during reads. 38 | #' \item \code{offset}, an integer scalar (or vector of length equal to \code{dim}) specifying the starting offset for each dimension's domain. 39 | #' \item \code{cellorder}, a string specifying the ordering of cells within each tile. 40 | #' \item \code{tileorder}, a string specifying the ordering of tiles across the array. 41 | #' \item \code{capacity}, an integer scalar specifying the size of each data tile in the sparse case. 42 | #' \item \code{context} is the TileDB context, defaulting to the output of \code{\link{tiledb_ctx}()}. 43 | #' } 44 | #' 45 | #' \code{writeTileDBArray(x, sparse=is_sparse(x), ...)} writes the matrix-like object \code{x} to a TileDB backend, 46 | #' returning a \linkS4class{TileDBArray} object referring to that backend. 47 | #' Appropriate values for \code{dim}, \code{dimnames} and \code{type} are determined automatically from \code{x} itself. 48 | #' All other arguments described for \code{TileDBRealizationSink} can be passed into \code{...} to configure the representation. 49 | #' 50 | #' @section Coercing to a TileDBArray: 51 | #' \code{as(x, "TileDBArray")} will coerce a matrix-like object \code{x} to a TileDBArray object. 52 | #' 53 | #' \code{as(x, "TileDBArraySeed")} will coerce a matrix-like object \code{x} to a TileDBArraySeed object. 54 | #' 55 | #' \code{as(x, "TileDBMatrix")} will coerce a matrix-like object \code{x} to a TileDBArraySeed object. 56 | #' 57 | #' \code{as(x, "TileDBArray")} will coerce a TileDBRealizationSink \code{x} to a TileDBArray object. 58 | #' 59 | #' \code{as(x, "TileDBArraySeed")} will coerce a TileDBRealizationSink \code{x} to a TileDBArraySeed object. 60 | #' 61 | #' \code{as(x, "DelayedArray")} will coerce a TileDBRealizationSink \code{x} to a TileDBArray object. 62 | #' 63 | #' @section Sink internals: 64 | #' \code{write_block(sink, viewport, block)} will write the subarray \code{block} to the TileDBRealizationSink \code{sink} 65 | #' at the specified \code{viewport}, returning \code{sink} upon completion. 66 | #' See \code{\link{write_block}} in \pkg{DelayedArray} for more details. 67 | #' 68 | #' \code{type(x)} will return a string specifying the type of the TileDBRealizationSink \code{x}. 69 | #' 70 | #' @examples 71 | #' X <- matrix(rnorm(100000), ncol=200) 72 | #' path <- tempfile() 73 | #' out <- writeTileDBArray(X, path=path) 74 | #' 75 | #' # Works for integer matrices. 76 | #' Xi <- matrix(rpois(100000, 2), ncol=200) 77 | #' pathi <- tempfile() 78 | #' outi <- writeTileDBArray(Xi, path=pathi) 79 | #' 80 | #' # Works for logical matrices. 81 | #' Xl <- matrix(rpois(100000, 0.5) > 0, ncol=200) 82 | #' pathl <- tempfile() 83 | #' outl <- writeTileDBArray(Xl, path=pathl) 84 | #' 85 | #' # Works for sparse numeric matrices. 86 | #' Y <- Matrix::rsparsematrix(1000, 1000, density=0.01) 87 | #' path2 <- tempfile() 88 | #' out2 <- writeTileDBArray(Y, path=path2) 89 | #' 90 | #' # And for sparse logical matrices. 91 | #' path2l <- tempfile() 92 | #' out2l <- writeTileDBArray(Y > 0, path=path2l) 93 | #' 94 | #' # Works for dimnames. 95 | #' rownames(X) <- sprintf("GENE_%i", seq_len(nrow(X))) 96 | #' path3 <- tempfile() 97 | #' out3 <- writeTileDBArray(X, path=path3) 98 | #' 99 | #' @aliases 100 | #' writeTileDBArray 101 | #' TileDBRealizationSink 102 | #' TileDBRealizationSink-class 103 | #' write_block,TileDBRealizationSink-method 104 | #' type,TileDBRealizationSink-method 105 | #' coerce,TileDBRealizationSink,TileDBMatrix-method 106 | #' coerce,TileDBRealizationSink,TileDBArray-method 107 | #' coerce,TileDBRealizationSink,DelayedArray-method 108 | #' coerce,ANY,TileDBArray-method 109 | #' coerce,ANY,TileDBMatrix-method 110 | #' coerce,ANY,TileDBRealizationSink-method 111 | #' 112 | #' @name TileDBRealizationSink 113 | NULL 114 | 115 | #' @export 116 | TileDBRealizationSink <- function( 117 | dim, 118 | dimnames=NULL, 119 | type="double", 120 | path=getTileDBPath(), 121 | attr=getTileDBAttr(), 122 | storagetype=NULL, 123 | dimtype=getTileDBDimType(), 124 | sparse=FALSE, 125 | extent=getTileDBExtent(), 126 | offset=1L, 127 | cellorder=getTileDBCellOrder(), 128 | tileorder=getTileDBTileOrder(), 129 | capacity=getTileDBCapacity(), 130 | context=getTileDBContext()) 131 | { 132 | ndim <- length(dim) 133 | collected <- vector("list", ndim) 134 | extent <- rep(as.integer(extent), ndim) 135 | offset <- rep(as.integer(offset), ndim) 136 | 137 | for (i in seq_along(dim)) { 138 | curdim <- dim[i] 139 | ex <- min(extent[i], curdim) 140 | collected[[i]] <- tiledb_dim( 141 | ctx=context, 142 | paste0("d", i), 143 | offset[i] - 1L + c(1L, curdim), 144 | tile=ex, 145 | type=dimtype 146 | ) 147 | } 148 | dom <- tiledb_domain(ctx=context, dims=collected) 149 | 150 | if (is.null(storagetype)) { 151 | storagetype <- r_to_tiledb_type(vector(type)) 152 | } 153 | 154 | schema <- tiledb_array_schema( 155 | ctx=context, 156 | domain=dom, 157 | sparse=sparse, 158 | attrs=list(tiledb_attr(ctx=context, attr, type=storagetype)), 159 | cell_order = cellorder, 160 | tile_order = tileorder, 161 | capacity = capacity 162 | ) 163 | 164 | if (is.null(path)) { 165 | path <- tempfile() 166 | } 167 | 168 | tiledb_array_create(path, schema) 169 | .edit_metadata(path, sparse=sparse, type=type, dimnames=dimnames) 170 | 171 | new("TileDBRealizationSink", dim=dim, type=type, path=path, sparse=sparse, attr=attr, offset=offset) 172 | } 173 | 174 | .edit_metadata <- function(path, sparse, type, dimnames) { 175 | has.logical <- type=="logical" 176 | has.dimnames <- !is.null(dimnames) && !all(vapply(dimnames, is.null, FALSE)) 177 | if (!has.logical && !has.dimnames) { 178 | return(NULL) 179 | } 180 | 181 | obj <- tiledb_array(path) 182 | on.exit(tiledb_array_close(obj)) 183 | obj <- tiledb_array_open(obj, "WRITE") # not sure why it doesn't work with query_type="WRITE". 184 | 185 | # Need to keep track of the differences between INTs and LGLs. 186 | if (has.logical) { 187 | tiledb_put_metadata(obj, "type", type) 188 | } 189 | 190 | # Adding dimnames by packing them into base-64 encoding. 191 | if (has.dimnames) { 192 | tiledb_put_metadata(obj, "dimnames", .pack_dimnames(dimnames)) 193 | } 194 | 195 | NULL 196 | } 197 | 198 | #' @importFrom S4Vectors setValidity2 199 | setValidity2("TileDBRealizationSink", function(object) { 200 | msg <- .common_checks(object) 201 | if (length(msg)) { 202 | msg 203 | } else { 204 | TRUE 205 | } 206 | }) 207 | 208 | #' @export 209 | #' @importClassesFrom SparseArray COO_SparseArray 210 | #' @importFrom SparseArray nzcoo nzdata nzwhich 211 | #' @importFrom DelayedArray start width 212 | setMethod("write_block", "TileDBRealizationSink", function(sink, viewport, block) { 213 | starts <- start(viewport) - 1L 214 | obj <- tiledb_array(sink@path, attrs=sink@attr, query_type="WRITE") 215 | on.exit(tiledb_array_close(obj)) 216 | 217 | if (sink@sparse) { 218 | # Need this because COO_SparseArray doesn't support [. 219 | if (is(block, "COO_SparseArray")) { 220 | idx <- nzcoo(block) 221 | vals <- nzdata(block) 222 | } else { 223 | idx <- nzwhich(block, arr.ind=TRUE) 224 | vals <- block[idx] 225 | } 226 | 227 | ndim <- ncol(idx) 228 | store <- vector("list", ndim + 1L) 229 | for (i in seq_len(ndim)) { 230 | store[[i]] <- idx[,i] + starts[i] + sink@offset[i] - 1L 231 | } 232 | store[[ndim + 1]] <- vals 233 | 234 | names(store) <- c(sprintf("d%i", seq_len(ndim)), sink@attr) 235 | obj[] <- data.frame(store) 236 | 237 | } else { 238 | args <- lapply(width(viewport), seq_len) 239 | for (i in seq_along(args)) { 240 | args[[i]] <- args[[i]] + starts[i] + sink@offset[i] - 1L 241 | } 242 | 243 | # Need to coerce the block, because it could be a SparseArray 244 | # derivative. 245 | args <- c(list(sink=obj), args, list(value=as.array(block))) 246 | do.call("[<-", args) 247 | } 248 | 249 | sink 250 | }) 251 | 252 | #' @export 253 | setMethod("type", "TileDBRealizationSink", function(x) x@type) 254 | 255 | #' @export 256 | writeTileDBArray <- function(x, sparse=is_sparse(x), ...) { 257 | sink <- TileDBRealizationSink(dim(x), dimnames=dimnames(x), type=type(x), sparse=sparse, ...) 258 | sink <- BLOCK_write_to_sink(sink, x) 259 | as(sink, "TileDBArray") 260 | } 261 | 262 | setAs("TileDBRealizationSink", "TileDBArraySeed", 263 | function(from) TileDBArraySeed(from@path) 264 | ) 265 | 266 | setAs("TileDBRealizationSink", "TileDBArray", 267 | function(from) DelayedArray(as(from, "TileDBArraySeed")) 268 | ) 269 | 270 | setAs("TileDBRealizationSink", "DelayedArray", 271 | function(from) DelayedArray(as(from, "TileDBArraySeed")) 272 | ) 273 | 274 | .as_TileDBArray <- function(from) writeTileDBArray(from) 275 | 276 | setAs("ANY", "TileDBArray", .as_TileDBArray) 277 | 278 | setAs("DelayedArray", "TileDBArray", .as_TileDBArray) 279 | 280 | setAs("DelayedMatrix", "TileDBMatrix", .as_TileDBArray) 281 | -------------------------------------------------------------------------------- /R/globals.R: -------------------------------------------------------------------------------- 1 | .globals <- (function () { 2 | current <- list(path=NULL, attr=NULL, dimtype=NULL, tile=NULL, cellorder=NULL, ctx=NULL) 3 | list( 4 | get=function(x) current[[x]], 5 | set=function(x, value) current[[x]] <<- value 6 | ) 7 | })() 8 | 9 | #' TileDBArray global options 10 | #' 11 | #' Global options for writing TileDBArray backends, 12 | #' intended for parameters that cannot be automatically derived from the data. 13 | #' 14 | #' @param path String containing a path to a TileDB backend. 15 | #' @param attr String containing the name of a TileDB attribute. 16 | #' @param dimtype String specifying the TileDB datatype to use for the dimensions. 17 | #' @param extent Integer scalar specifying the tile extent for all dimensions. 18 | #' Alternatively, an integer vector of length equal to the number of dimensions, 19 | #' specifying a different extent for each dimension in the array to be created. 20 | #' @param cellorder String specifying the desired cell order. 21 | #' @param tileorder String specifying the desired tile order. 22 | #' @param capacity Integer scalar specifying the data tile capacity for sparse arrays. 23 | #' @param context A TileDB context object, see \code{\link{tiledb_ctx}} for an example. 24 | #' 25 | #' @return 26 | #' All of the getter functions return the current global value, 27 | #' or a default value if the former is \code{NULL}: 28 | #' \itemize{ 29 | #' \item \code{path} defaults to a temporary file in \code{\link{tempdir}}. 30 | #' \item \code{attr} defaults to \code{"x"}. 31 | #' \item \code{dimtype} defaults to \code{"INT32"}. 32 | #' \item \code{extent} defaults to \code{100L}. 33 | #' \item \code{cellorder} defaults to \code{"COL_MAJOR"}. 34 | #' \item \code{tileorder} defaults to \code{"COL_MAJOR"}. 35 | #' \item \code{capacity} defaults to \code{10000L}. 36 | #' \item \code{context} defaults to the value of \code{\link{tiledb_ctx}()}. 37 | #' } 38 | #' 39 | #' All setter functions change the global value and return \code{NULL} invisibly. 40 | #' 41 | #' @author Aaron Lun 42 | #' @examples 43 | #' \dontshow{old <- getTileDBPath()} 44 | #' 45 | #' setTileDBPath("my_local_dir") 46 | #' getTileDBPath() 47 | #' 48 | #' \dontshow{setTileDBPath(old)} 49 | #' 50 | #' @seealso \code{\link{writeTileDBArray}}, where these functions are most often used. 51 | #' 52 | #' @name TileDBArray-globals 53 | NULL 54 | 55 | #' @export 56 | #' @rdname TileDBArray-globals 57 | getTileDBPath <- function() { 58 | .globals$get("path") 59 | } 60 | 61 | #' @export 62 | #' @rdname TileDBArray-globals 63 | setTileDBPath <- function(path=NULL) { 64 | .globals$set("path", path) 65 | invisible(NULL) 66 | } 67 | 68 | #' @export 69 | #' @rdname TileDBArray-globals 70 | getTileDBAttr <- function() { 71 | if (is.null(attr <- .globals$get("attr"))) { 72 | "x" 73 | } else { 74 | attr 75 | } 76 | } 77 | 78 | #' @export 79 | #' @rdname TileDBArray-globals 80 | setTileDBAttr <- function(attr=NULL) { 81 | .globals$set("attr", attr) 82 | invisible(NULL) 83 | } 84 | 85 | #' @export 86 | #' @rdname TileDBArray-globals 87 | getTileDBDimType <- function() { 88 | if (is.null(dimtype <- .globals$get("dimtype"))) { 89 | "INT32" 90 | } else { 91 | dimtype 92 | } 93 | } 94 | 95 | #' @export 96 | #' @rdname TileDBArray-globals 97 | setTileDBDimType <- function(dimtype=NULL) { 98 | .globals$set("dimtype", dimtype) 99 | invisible(NULL) 100 | } 101 | 102 | #' @export 103 | #' @rdname TileDBArray-globals 104 | getTileDBExtent <- function() { 105 | if (is.null(extent <- .globals$get("extent"))) { 106 | 100L 107 | } else { 108 | extent 109 | } 110 | } 111 | 112 | #' @export 113 | #' @rdname TileDBArray-globals 114 | setTileDBExtent <- function(extent=NULL) { 115 | .globals$set("extent", extent) 116 | invisible(NULL) 117 | } 118 | 119 | #' @export 120 | #' @rdname TileDBArray-globals 121 | getTileDBContext <- function() { 122 | if (is.null(context <- .globals$get("context"))) { 123 | tiledb_ctx() 124 | } else { 125 | context 126 | } 127 | } 128 | 129 | #' @export 130 | #' @rdname TileDBArray-globals 131 | setTileDBContext <- function(context=NULL) { 132 | .globals$set("context", context) 133 | invisible(NULL) 134 | } 135 | 136 | #' @export 137 | #' @rdname TileDBArray-globals 138 | getTileDBCellOrder <- function() { 139 | if (is.null(cellorder <- .globals$get("cellorder"))) { 140 | "COL_MAJOR" 141 | } else { 142 | cellorder 143 | } 144 | } 145 | 146 | #' @export 147 | #' @rdname TileDBArray-globals 148 | setTileDBCellOrder <- function(cellorder=NULL) { 149 | .globals$set("cellorder", cellorder) 150 | invisible(NULL) 151 | } 152 | 153 | #' @export 154 | #' @rdname TileDBArray-globals 155 | getTileDBTileOrder <- function() { 156 | if (is.null(tileorder <- .globals$get("tileorder"))) { 157 | "COL_MAJOR" 158 | } else { 159 | tileorder 160 | } 161 | } 162 | 163 | #' @export 164 | #' @rdname TileDBArray-globals 165 | setTileDBTileOrder <- function(tileorder=NULL) { 166 | .globals$set("tileorder", tileorder) 167 | invisible(NULL) 168 | } 169 | 170 | #' @export 171 | #' @rdname TileDBArray-globals 172 | getTileDBCapacity <- function() { 173 | if (is.null(capacity <- .globals$get("capacity"))) { 174 | 10000L 175 | } else { 176 | capacity 177 | } 178 | } 179 | 180 | #' @export 181 | #' @rdname TileDBArray-globals 182 | setTileDBCapacity <- function(capacity=NULL) { 183 | .globals$set("capacity", capacity) 184 | invisible(NULL) 185 | } 186 | -------------------------------------------------------------------------------- /R/package.R: -------------------------------------------------------------------------------- 1 | #' The \pkg{TileDBArray} package 2 | #' 3 | #' Implements the TileDB framework as a \linkS4class{DelayedArray} backend, 4 | #' with read and write functionality for both dense and sparse arrays. 5 | #' Currently only integer, logical and double-precision values are supported. 6 | #' 7 | #' @author Aaron Lun 8 | #' @name TileDBArray-pkg 9 | #' @import tiledb 10 | #' @import DelayedArray 11 | #' @import methods 12 | NULL 13 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | #' @importFrom S4Vectors isSingleString 2 | .common_checks <- function(object) { 3 | msg <- character(0) 4 | 5 | d <- dim(object) 6 | if (!all(d >= 0L)) { 7 | msg <- c(msg, "'dim' must contain non-negative integers") 8 | } 9 | 10 | if (!isSingleString(object@path)) { 11 | msg <- c(msg, "'path' must be a non-NA string") 12 | } 13 | 14 | if (!isSingleString(object@attr)) { 15 | msg <- c(msg, "'attr' must be a non-NA string") 16 | } 17 | 18 | if (!isSingleString(object@type)) { 19 | msg <- c(msg, "'type' must be a non-NA string") 20 | } 21 | 22 | s <- object@sparse 23 | if (length(s)!=1L || is.na(s)) { 24 | msg <- c(msg, "'sparse' must be a non-NA logical scalar") 25 | } 26 | 27 | msg 28 | } 29 | 30 | .pack_dimnames <- function(x) { 31 | as.integer(memCompress(serialize(x, NULL), "gzip")) 32 | } 33 | 34 | .unpack_dimnames <- function(x) { 35 | unserialize(memDecompress(as.raw(x), "gzip")) 36 | } 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DelayedArray backends for TileDB 2 | 3 | |Build|Status| 4 | |-----|----| 5 | | Bioc-release | [![](http://bioconductor.org/shields/build/release/bioc/TileDBArray.svg)](http://bioconductor.org/checkResults/release/bioc-LATEST/TileDBArray) | 6 | | Bioc-devel | [![](http://bioconductor.org/shields/build/devel/bioc/TileDBArray.svg)](http://bioconductor.org/checkResults/devel/bioc-LATEST/TileDBArray) | 7 | 8 | This package implements [**DelayedArray**](https://github.com/Bioconductor/DelayedArray) backend 9 | for [TileDB](https://tiledb.com/) to read, write and store dense and sparse arrays. 10 | The resulting `TileDBArray` objects are directly compatible with any Bioconductor package that accepts `DelayedArray` objects, 11 | serving as a swap-in replacement for the predominant [`HDF5Array`](https://github.com/Bioconductor/HDF5Array) 12 | that is currently used throughout the Bioconductor ecosystem for representing large datasets. 13 | See the [official Bioconductor landing page](https://bioconductor.org/packages/devel/bioc/html/TileDBArray.html) for more details. 14 | -------------------------------------------------------------------------------- /inst/NEWS.Rd: -------------------------------------------------------------------------------- 1 | \name{TileDBArrayNews} 2 | \title{TileDBArray News} 3 | \encoding{UTF-8} 4 | 5 | \section{Version 1.16.0}{\itemize{ 6 | \item Minor fix for \code{as.data.frame=} deprecation in \code{tiledb_array()}. 7 | 8 | \item Support other datatypes for the dimensions and storage when configuring a \code{TileDBRealizationSink}. 9 | This is achieved via the new \code{storagetype=} and \code{dimtype=} arguments. 10 | Also added \code{getTileDBDimType()} and \code{setTileDBDimType()} to globally define the choice of dimension datatype. 11 | 12 | \item Bugfix to \code{TileDBArraySeed} to correctly handle dimension domains that do not start at 1. 13 | This requires a modification to the class to record the domain offset. 14 | 15 | \item Added a \code{offset=} option to \code{TileDBRealizationSink()} to create arrays with dimension domains that do not start at 1. 16 | This requires a modification to the associated class to record the domain offset. 17 | }} 18 | 19 | \section{Version 1.2.0}{\itemize{ 20 | \item 21 | Add more schema options during TileDBArray construction. 22 | 23 | \item 24 | Use \code{tiledb_array()} for both sparse and dense arrays. 25 | }} 26 | 27 | \section{Version 1.0.0}{\itemize{ 28 | \item 29 | New package \pkg{TileDBArray} implementing a DelayedArray backend for TileDB. 30 | }} 31 | 32 | -------------------------------------------------------------------------------- /man/TileDBArray-globals.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/globals.R 3 | \name{TileDBArray-globals} 4 | \alias{TileDBArray-globals} 5 | \alias{getTileDBPath} 6 | \alias{setTileDBPath} 7 | \alias{getTileDBAttr} 8 | \alias{setTileDBAttr} 9 | \alias{getTileDBDimType} 10 | \alias{setTileDBDimType} 11 | \alias{getTileDBExtent} 12 | \alias{setTileDBExtent} 13 | \alias{getTileDBContext} 14 | \alias{setTileDBContext} 15 | \alias{getTileDBCellOrder} 16 | \alias{setTileDBCellOrder} 17 | \alias{getTileDBTileOrder} 18 | \alias{setTileDBTileOrder} 19 | \alias{getTileDBCapacity} 20 | \alias{setTileDBCapacity} 21 | \title{TileDBArray global options} 22 | \usage{ 23 | getTileDBPath() 24 | 25 | setTileDBPath(path = NULL) 26 | 27 | getTileDBAttr() 28 | 29 | setTileDBAttr(attr = NULL) 30 | 31 | getTileDBDimType() 32 | 33 | setTileDBDimType(dimtype = NULL) 34 | 35 | getTileDBExtent() 36 | 37 | setTileDBExtent(extent = NULL) 38 | 39 | getTileDBContext() 40 | 41 | setTileDBContext(context = NULL) 42 | 43 | getTileDBCellOrder() 44 | 45 | setTileDBCellOrder(cellorder = NULL) 46 | 47 | getTileDBTileOrder() 48 | 49 | setTileDBTileOrder(tileorder = NULL) 50 | 51 | getTileDBCapacity() 52 | 53 | setTileDBCapacity(capacity = NULL) 54 | } 55 | \arguments{ 56 | \item{path}{String containing a path to a TileDB backend.} 57 | 58 | \item{attr}{String containing the name of a TileDB attribute.} 59 | 60 | \item{dimtype}{String specifying the TileDB datatype to use for the dimensions.} 61 | 62 | \item{extent}{Integer scalar specifying the tile extent for all dimensions. 63 | Alternatively, an integer vector of length equal to the number of dimensions, 64 | specifying a different extent for each dimension in the array to be created.} 65 | 66 | \item{context}{A TileDB context object, see \code{\link{tiledb_ctx}} for an example.} 67 | 68 | \item{cellorder}{String specifying the desired cell order.} 69 | 70 | \item{tileorder}{String specifying the desired tile order.} 71 | 72 | \item{capacity}{Integer scalar specifying the data tile capacity for sparse arrays.} 73 | } 74 | \value{ 75 | All of the getter functions return the current global value, 76 | or a default value if the former is \code{NULL}: 77 | \itemize{ 78 | \item \code{path} defaults to a temporary file in \code{\link{tempdir}}. 79 | \item \code{attr} defaults to \code{"x"}. 80 | \item \code{dimtype} defaults to \code{"INT32"}. 81 | \item \code{extent} defaults to \code{100L}. 82 | \item \code{cellorder} defaults to \code{"COL_MAJOR"}. 83 | \item \code{tileorder} defaults to \code{"COL_MAJOR"}. 84 | \item \code{capacity} defaults to \code{10000L}. 85 | \item \code{context} defaults to the value of \code{\link{tiledb_ctx}()}. 86 | } 87 | 88 | All setter functions change the global value and return \code{NULL} invisibly. 89 | } 90 | \description{ 91 | Global options for writing TileDBArray backends, 92 | intended for parameters that cannot be automatically derived from the data. 93 | } 94 | \examples{ 95 | \dontshow{old <- getTileDBPath()} 96 | 97 | setTileDBPath("my_local_dir") 98 | getTileDBPath() 99 | 100 | \dontshow{setTileDBPath(old)} 101 | 102 | } 103 | \seealso{ 104 | \code{\link{writeTileDBArray}}, where these functions are most often used. 105 | } 106 | \author{ 107 | Aaron Lun 108 | } 109 | -------------------------------------------------------------------------------- /man/TileDBArray-pkg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/package.R 3 | \name{TileDBArray-pkg} 4 | \alias{TileDBArray-pkg} 5 | \title{The \pkg{TileDBArray} package} 6 | \description{ 7 | Implements the TileDB framework as a \linkS4class{DelayedArray} backend, 8 | with read and write functionality for both dense and sparse arrays. 9 | Currently only integer, logical and double-precision values are supported. 10 | } 11 | \author{ 12 | Aaron Lun 13 | } 14 | -------------------------------------------------------------------------------- /man/TileDBArray.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TileDBArray.R 3 | \name{TileDBArray} 4 | \alias{TileDBArray} 5 | \alias{TileDBArraySeed} 6 | \alias{TileDBArraySeed-class} 7 | \alias{TileDBArray-class} 8 | \alias{TileDBMatrix} 9 | \alias{TileDBMatrix-class} 10 | \alias{show,TileDBArraySeed-method} 11 | \alias{is_sparse,TileDBArraySeed-method} 12 | \alias{type,TileDBArraySeed-method} 13 | \alias{extract_array,TileDBArraySeed-method} 14 | \alias{extract_sparse_array,TileDBArraySeed-method} 15 | \alias{DelayedArray,TileDBArraySeed-method} 16 | \alias{path,TileDBArraySeed-method} 17 | \alias{chunkdim,TileDBArraySeed-method} 18 | \alias{matrixClass,TileDBArray-method} 19 | \title{Delayed TileDB arrays} 20 | \description{ 21 | The TileDBArray class provides a \linkS4class{DelayedArray} backend for TileDB arrays (sparse and dense). 22 | } 23 | \section{Constructing a TileDBArray}{ 24 | 25 | \code{TileDBArray(x, attr)} returns a TileDBArray object given: 26 | \itemize{ 27 | \item \code{x}, a string containing a URI to a TileDB backend, most typically a path to a directory. 28 | \item \code{attr}, a string specifying the attribute to represent in the array. 29 | Defaults to the first attribute. 30 | } 31 | Alternatively, \code{x} can be a TileDBArraySeed object, in which case \code{attr} is ignored. 32 | 33 | \code{TileDBArraySeed(x, attr)} returns a TileDBArraySeed 34 | with the same arguments as described for \code{TileDBArray}. 35 | If \code{x} is already a TileDBArraySeed, it is returned 36 | directly without further modification. 37 | 38 | \code{\link{DelayedArray}(x)} returns a TileDBArray object 39 | given \code{x}, a TileDBArraySeed. 40 | 41 | In all cases, two-dimensional arrays will automatically generate a TileDBMatrix, 42 | a subclass of the TileDBArray. 43 | } 44 | 45 | \section{Available operations}{ 46 | 47 | \code{\link{extract_array}(x, index)} will return an ordinary array containing values from the TileDBArraySeed \code{x}, 48 | subsetted to the indices specified in \code{index}. 49 | The latter should be a list of length equal to the number of dimensions in \code{x}, 50 | where each entry is an integer vector or \code{NULL} (in which case the entirety of the dimension is used). 51 | 52 | \code{\link{extract_sparse_array}(x, index)} will return a \linkS4class{COO_SparseArray} 53 | representing the subset of \code{x} corresponding to the indices in \code{index}. 54 | The latter should be a list of the same structure as described for \code{extract_array}. 55 | 56 | \code{\link{type}(x)} will return a string containing the type of the TileDBArraySeed object \code{x}. 57 | Currently, only \code{"integer"}, \code{"logical"} and \code{"double"}-precision is supported. 58 | 59 | \code{\link{is_sparse}(x)} will return a logical scalar indicating 60 | whether the TileDBArraySeed \code{x} uses a sparse format in the TileDB backend. 61 | 62 | \code{\link{path}(x)} will return a string containing the path to the TileDB backend directory. 63 | 64 | \code{\link{chunkdim}(x)} will return an integer vector containing the tile extent in each dimension. 65 | This will be used as the chunk dimensions in methods like \code{\link{chunkGrid}}. 66 | 67 | All of the operations described above are also equally applicable to TileDBArray objects, 68 | as their methods simply delegate to those of the TileDBArraySeed. 69 | 70 | All operations supported by \linkS4class{DelayedArray} objects are 71 | also available for TileDBArray objects. 72 | } 73 | 74 | \examples{ 75 | data <- matrix(rpois(10000, 5), nrow=100, ncol=100) 76 | B <- as(data, "TileDBArray") 77 | B 78 | 79 | # Apply typical DelayedArray operations: 80 | as.matrix(B[1:10,1:10]) 81 | B \%*\% runif(ncol(B)) 82 | 83 | # This also works for sparse arrays: 84 | sdata <- Matrix::rsparsematrix(nrow=100, ncol=100, density=0.1) 85 | C <- as(sdata, "TileDBArray") 86 | C 87 | 88 | } 89 | \author{ 90 | Aaron Lun 91 | } 92 | -------------------------------------------------------------------------------- /man/TileDBRealizationSink.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TileDBRealizationSink.R 3 | \name{TileDBRealizationSink} 4 | \alias{TileDBRealizationSink} 5 | \alias{writeTileDBArray} 6 | \alias{TileDBRealizationSink-class} 7 | \alias{write_block,TileDBRealizationSink-method} 8 | \alias{type,TileDBRealizationSink-method} 9 | \alias{coerce,TileDBRealizationSink,TileDBMatrix-method} 10 | \alias{coerce,TileDBRealizationSink,TileDBArray-method} 11 | \alias{coerce,TileDBRealizationSink,DelayedArray-method} 12 | \alias{coerce,ANY,TileDBArray-method} 13 | \alias{coerce,ANY,TileDBMatrix-method} 14 | \alias{coerce,ANY,TileDBRealizationSink-method} 15 | \title{Write arrays to TileDB} 16 | \description{ 17 | Write array data to a TileDB backend via \pkg{DelayedArray}'s \linkS4class{RealizationSink} machinery. 18 | } 19 | \section{Writing a TileDBArray}{ 20 | 21 | \preformatted{TileDBRealizationSink( 22 | dim, 23 | dimnames=NULL, 24 | type="double", 25 | path=getTileDBPath(), 26 | attr=getTileDBAttr(), 27 | storagetype=NULL, 28 | dimtype=getTileDBDimType(), 29 | sparse=FALSE, 30 | extent=getTileDBExtent(), 31 | offset=1L, 32 | cellorder=getTileDBCellOrder(), 33 | tileorder=getTileDBTileOrder(), 34 | capacity=getTileDBCapacity(), 35 | context=getTileDBContext() 36 | )} 37 | returns a TileDBRealizationSink object that can be used to write content to a TileDB backend. 38 | It accepts the following arguments: 39 | \itemize{ 40 | \item \code{dim}, an integer vector (usually of length 2) to specify the array dimensions. 41 | \item \code{dimnames}, a list of length equal to \code{dim}, containing character vectors with names for each dimension. 42 | Defaults to \code{NULL}, i.e., no dimnames. 43 | \item \code{type}, a string specifying the R data type for the newly written array. 44 | Currently only \code{"double"}, \code{"integer"} and \code{"logical"} arrays are supported. 45 | \item \code{path}, a string containing the location of the new TileDB backend. 46 | \item \code{attr}, a string specifying the name of the attribute to store. 47 | \item \code{storagetype}, a string specifying the TileDB data type for the attribute, e.g., \code{"UINT8"}, \code{"FLOAT32"}. 48 | If \code{NULL}, this is automatically determined from \code{type} using \code{\link{r_to_tiledb_type}}. 49 | \item \code{dimtype}, a string specifying the TileDB data type for the dimension. 50 | \item \code{sparse}, a logical scalar indicating whether the array should be stored in sparse form. 51 | \item \code{extent}, an integer scalar (or vector of length equal to \code{dim}) specifying the tile extent for each dimension. 52 | Larger values improve compression at the cost of unnecessary data extraction during reads. 53 | \item \code{offset}, an integer scalar (or vector of length equal to \code{dim}) specifying the starting offset for each dimension's domain. 54 | \item \code{cellorder}, a string specifying the ordering of cells within each tile. 55 | \item \code{tileorder}, a string specifying the ordering of tiles across the array. 56 | \item \code{capacity}, an integer scalar specifying the size of each data tile in the sparse case. 57 | \item \code{context} is the TileDB context, defaulting to the output of \code{\link{tiledb_ctx}()}. 58 | } 59 | 60 | \code{writeTileDBArray(x, sparse=is_sparse(x), ...)} writes the matrix-like object \code{x} to a TileDB backend, 61 | returning a \linkS4class{TileDBArray} object referring to that backend. 62 | Appropriate values for \code{dim}, \code{dimnames} and \code{type} are determined automatically from \code{x} itself. 63 | All other arguments described for \code{TileDBRealizationSink} can be passed into \code{...} to configure the representation. 64 | } 65 | 66 | \section{Coercing to a TileDBArray}{ 67 | 68 | \code{as(x, "TileDBArray")} will coerce a matrix-like object \code{x} to a TileDBArray object. 69 | 70 | \code{as(x, "TileDBArraySeed")} will coerce a matrix-like object \code{x} to a TileDBArraySeed object. 71 | 72 | \code{as(x, "TileDBMatrix")} will coerce a matrix-like object \code{x} to a TileDBArraySeed object. 73 | 74 | \code{as(x, "TileDBArray")} will coerce a TileDBRealizationSink \code{x} to a TileDBArray object. 75 | 76 | \code{as(x, "TileDBArraySeed")} will coerce a TileDBRealizationSink \code{x} to a TileDBArraySeed object. 77 | 78 | \code{as(x, "DelayedArray")} will coerce a TileDBRealizationSink \code{x} to a TileDBArray object. 79 | } 80 | 81 | \section{Sink internals}{ 82 | 83 | \code{write_block(sink, viewport, block)} will write the subarray \code{block} to the TileDBRealizationSink \code{sink} 84 | at the specified \code{viewport}, returning \code{sink} upon completion. 85 | See \code{\link{write_block}} in \pkg{DelayedArray} for more details. 86 | 87 | \code{type(x)} will return a string specifying the type of the TileDBRealizationSink \code{x}. 88 | } 89 | 90 | \examples{ 91 | X <- matrix(rnorm(100000), ncol=200) 92 | path <- tempfile() 93 | out <- writeTileDBArray(X, path=path) 94 | 95 | # Works for integer matrices. 96 | Xi <- matrix(rpois(100000, 2), ncol=200) 97 | pathi <- tempfile() 98 | outi <- writeTileDBArray(Xi, path=pathi) 99 | 100 | # Works for logical matrices. 101 | Xl <- matrix(rpois(100000, 0.5) > 0, ncol=200) 102 | pathl <- tempfile() 103 | outl <- writeTileDBArray(Xl, path=pathl) 104 | 105 | # Works for sparse numeric matrices. 106 | Y <- Matrix::rsparsematrix(1000, 1000, density=0.01) 107 | path2 <- tempfile() 108 | out2 <- writeTileDBArray(Y, path=path2) 109 | 110 | # And for sparse logical matrices. 111 | path2l <- tempfile() 112 | out2l <- writeTileDBArray(Y > 0, path=path2l) 113 | 114 | # Works for dimnames. 115 | rownames(X) <- sprintf("GENE_\%i", seq_len(nrow(X))) 116 | path3 <- tempfile() 117 | out3 <- writeTileDBArray(X, path=path3) 118 | 119 | } 120 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(TileDBArray) 3 | test_check("TileDBArray") 4 | -------------------------------------------------------------------------------- /tests/testthat/test-read.R: -------------------------------------------------------------------------------- 1 | # This tests the reading capability of our beloved TileDBArray. 2 | # library(testthat); library(TileDBArray); source("test-read.R") 3 | 4 | DI <- matrix(rpois(10000, 5), nrow=200) 5 | XI <- as(DI, "TileDBArray") 6 | 7 | DD <- matrix(rnorm(10000), nrow=200) 8 | XD <- as(DD, "TileDBArray") 9 | 10 | DL <- DD > 0 11 | XL <- as(DL, "TileDBArray") 12 | 13 | SD <- Matrix::rsparsematrix(50, 50, density=0.1) 14 | YD <- as(SD, "TileDBArray") 15 | 16 | SL <- SD > 0 17 | YL <- as(SL, "TileDBArray") 18 | 19 | tdb <- list(XI, XD, XL, YD, YL) 20 | ref <- list(DI, DD, DL, SD, SL) 21 | 22 | test_that("basic read operations work correctly ", { 23 | for (x in seq_along(tdb)) { 24 | r <- ref[[x]] 25 | t <- tdb[[x]] 26 | 27 | expect_identical(dim(r), dim(t)) 28 | expect_identical(type(r), type(t)) 29 | } 30 | }) 31 | 32 | test_that("subsetting works correctly ", { 33 | for (x in seq_along(tdb)) { 34 | r <- ref[[x]] 35 | t <- tdb[[x]] 36 | 37 | for (i in sample(nrow(r), 10)) { 38 | expect_identical(r[i,] , t[i,]) 39 | } 40 | 41 | for (j in sample(ncol(r), 10)) { 42 | expect_identical(r[,j] , t[,j]) 43 | } 44 | } 45 | }) 46 | 47 | test_that("matrix subset extraction works correctly ", { 48 | for (x in seq_along(tdb)) { 49 | r <- ref[[x]] 50 | t <- tdb[[x]] 51 | 52 | i <- sample(nrow(r), 20, replace=TRUE) 53 | j <- sample(ncol(r), 20, replace=TRUE) 54 | 55 | expect_equivalent(as.matrix(r[i,]), as.matrix(t[i,])) 56 | expect_equivalent(as.matrix(r[,j]), as.matrix(t[,j])) 57 | expect_equivalent(as.matrix(r[i,j]), as.matrix(t[i,j])) 58 | } 59 | }) 60 | 61 | test_that("more complex matrix operations work correctly", { 62 | for (x in seq_along(tdb)) { 63 | r <- ref[[x]] 64 | t <- tdb[[x]] 65 | 66 | expect_equal(colSums(r), colSums(t)) 67 | expect_equal(rowSums(r), rowSums(t)) 68 | 69 | v <- matrix(rnorm(ncol(r)*2), ncol=2) 70 | expect_equivalent(as.matrix(r %*% v), as.matrix(t %*% v)) 71 | 72 | expect_equivalent(as.matrix(t(r)), as.matrix(t(t))) 73 | } 74 | }) 75 | 76 | test_that("read operations work correctly with a non-zero offset", { 77 | XI <- writeTileDBArray(DI, offset=c(5L, -5L)) 78 | expect_equivalent(as.matrix(XI), as.matrix(DI)) 79 | 80 | YD <- writeTileDBArray(SD, offset=c(-10L, 10L)) 81 | expect_equivalent(as.matrix(YD), as.matrix(SD)) 82 | }) 83 | 84 | test_that("extract_array works in a wide variety of scenarios", { 85 | XI <- writeTileDBArray(DI, offset=c(5L, -5L)) 86 | YD <- writeTileDBArray(SD, offset=c(-10L, 10L)) 87 | tests <- list( 88 | list(mat=DI, ref=XI), 89 | list(mat=YD, ref=SD) 90 | ) 91 | 92 | for (x in tests) { 93 | NR <- nrow(x$ref) 94 | NC <- ncol(x$ref) 95 | 96 | for (i in 1:7) { 97 | if (i == 1L) { 98 | indices <- list(NULL, NULL) 99 | } else if (i == 2L) { 100 | indices <- list(NULL, 1:NC) # sorted, unique 101 | } else if (i == 3L) { 102 | indices <- list(sample(NR), NULL) # unsorted, unique 103 | } else if (i == 4L) { 104 | indices <- list(rep(1:NR, each=2L), NULL) # sorted, duplicate 105 | } else if (i == 5L) { 106 | indices <- list(sample(rep(1:NR, each=2)), sample(rep(1:NC, each=3))) # unsorted, duplicate 107 | } else if (i == 6L) { 108 | indices <- list(seq(1, NR, by=2), seq(1, NC, by=3)) # non-consecutive jumps. 109 | } else { 110 | indices <- list(integer(0), integer(0)) 111 | } 112 | 113 | expect_equivalent( 114 | extract_array(x$mat, indices), 115 | extract_array(x$ref, indices) 116 | ) 117 | } 118 | } 119 | }) 120 | 121 | test_that("extract_sparse_array works in a wide variety of scenarios", { 122 | YD <- writeTileDBArray(SD, offset=c(-10L, 10L)) 123 | NR <- nrow(YD) 124 | NC <- ncol(YD) 125 | 126 | for (i in 1:5) { 127 | if (i == 1L) { 128 | indices <- list(NULL, NULL) 129 | } else if (i == 2L) { 130 | indices <- list(NULL, 1:NC) # sorted, unique 131 | } else if (i == 3L) { 132 | indices <- list(sample(NR), NULL) # unsorted, unique 133 | } else if (i == 4L) { 134 | indices <- list(seq(1, NR, by=2), seq(1, NC, by=3)) # non-consecutive jumps. 135 | } else { 136 | indices <- list(integer(0), integer(0)) 137 | } 138 | 139 | expect_equivalent(extract_sparse_array(YD, indices), extract_sparse_array(SD, indices)) 140 | } 141 | }) 142 | -------------------------------------------------------------------------------- /tests/testthat/test-write.R: -------------------------------------------------------------------------------- 1 | # This tests the reading capability of our beloved TileDBArray. 2 | # library(testthat); library(TileDBArray); source("test-write.R") 3 | 4 | DI <- matrix(rpois(10000, 5), nrow=200) 5 | DD <- matrix(rnorm(10000), nrow=200) 6 | DL <- DD > 0 7 | 8 | SD <- Matrix::rsparsematrix(50, 50, density=0.1) 9 | SL <- SD > 0 10 | 11 | ref <- list(DI, DD, DL, SD, SL) 12 | 13 | test_that("writing works as expected", { 14 | for (x in seq_along(ref)) { 15 | r <- ref[[x]] 16 | t <- as(r, "TileDBArray") 17 | 18 | expect_equivalent(as.matrix(r), as.matrix(t)) 19 | expect_identical(type(r), type(t)) 20 | 21 | # No dimnames yet. 22 | expect_identical(rownames(r), rownames(t)) 23 | expect_identical(colnames(r), colnames(t)) 24 | 25 | # Automatically sparse. 26 | expect_identical(is_sparse(r), is_sparse(t)) 27 | } 28 | }) 29 | 30 | test_that("handles and restores row names correctly", { 31 | Y <- DD 32 | rownames(Y) <- sprintf("GENE_%i", seq_len(nrow(Y))) 33 | colnames(Y) <- sprintf("SAMPLE_%i", seq_len(ncol(Y))) 34 | 35 | Z <- as(Y, "TileDBArray") 36 | expect_identical(dimnames(Y), dimnames(Z)) 37 | 38 | # Still the case when pulling from the path. 39 | AA <- TileDBArray(seed(Z)@path) 40 | expect_identical(dimnames(Y), dimnames(AA)) 41 | }) 42 | 43 | test_that("can shunt between sparse and non-sparse, regardless of the format", { 44 | for (x in seq_along(ref)) { 45 | r <- ref[[x]] 46 | 47 | t <- writeTileDBArray(r, sparse=FALSE) 48 | expect_identical(is_sparse(t), FALSE) 49 | expect_equivalent(as.matrix(r), as.matrix(t)) 50 | 51 | t <- writeTileDBArray(r, sparse=TRUE) 52 | expect_identical(is_sparse(t), TRUE) 53 | expect_equivalent(as.matrix(r), as.matrix(t)) 54 | } 55 | }) 56 | 57 | test_that("responds to the path", { 58 | path <- tempfile() 59 | expect_false(file.exists(path)) 60 | out <- writeTileDBArray(DD, path=path) 61 | expect_true(file.exists(path)) 62 | 63 | # As a global variable. 64 | path <- tempfile() 65 | expect_false(file.exists(path)) 66 | 67 | setTileDBPath(path) 68 | out <- as(DD, "TileDBArray") 69 | expect_true(file.exists(path)) 70 | 71 | # Unsets itself properly. 72 | setTileDBPath() 73 | expect_null(getTileDBPath()) 74 | }) 75 | 76 | test_that("stores non-unity offsets correctly", { 77 | path <- tempfile() 78 | expect_false(file.exists(path)) 79 | out <- writeTileDBArray(DD, path=path, offset=c(-5, 10)) 80 | expect_true(file.exists(path)) 81 | 82 | x <- tiledb::tiledb_array(path) 83 | s <- tiledb::schema(x) 84 | dims <- tiledb::dimensions(s) 85 | doms <- lapply(dims, tiledb::domain) 86 | expect_identical(c(-5L, 10L), vapply(doms, function(x) x[1L], 0L)) 87 | }) 88 | 89 | test_that("other global variables behave as expected", { 90 | expect_identical(getTileDBExtent(), 100L) 91 | setTileDBExtent(50L) 92 | expect_identical(getTileDBExtent(), 50L) 93 | setTileDBExtent(NULL) 94 | expect_identical(getTileDBExtent(), 100L) 95 | 96 | expect_identical(getTileDBAttr(), "x") 97 | setTileDBAttr("blah") 98 | expect_identical(getTileDBAttr(), "blah") 99 | setTileDBAttr(NULL) 100 | expect_identical(getTileDBAttr(), "x") 101 | 102 | default <- getTileDBContext() 103 | setTileDBContext("blah") 104 | expect_identical(getTileDBContext(), "blah") 105 | setTileDBContext(NULL) 106 | expect_identical(getTileDBContext(), default) 107 | }) 108 | -------------------------------------------------------------------------------- /vignettes/userguide.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: A DelayedArray backend for TileDB 3 | author: 4 | - name: Aaron Lun 5 | email: infinite.monkeys.with.keyboards@gmail.com 6 | date: "Revised: June 12, 2020" 7 | output: 8 | BiocStyle::html_document: 9 | toc_float: yes 10 | package: TileDBArray 11 | vignette: > 12 | %\VignetteIndexEntry{User guide} 13 | %\VignetteEngine{knitr::rmarkdown} 14 | %\VignetteEncoding{UTF-8} 15 | --- 16 | 17 | ```{r, echo=FALSE, results="hide"} 18 | knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE) 19 | library(BiocStyle) 20 | ``` 21 | 22 | # Introduction 23 | 24 | TileDB implements a framework for local and remote storage of dense and sparse arrays. 25 | We can use this as a `DelayedArray` backend to provide an array-level abstraction, 26 | thus allowing the data to be used in many places where an ordinary array or matrix might be used. 27 | The `r Biocpkg("TileDBArray")` package implements the necessary wrappers around `r Githubpkg("TileDB-Inc/TileDB-R")` 28 | to support read/write operations on TileDB arrays within the `r Biocpkg("DelayedArray")` framework. 29 | 30 | # Creating a `TileDBArray` 31 | 32 | Creating a `TileDBArray` is as easy as: 33 | 34 | ```{r} 35 | X <- matrix(rnorm(1000), ncol=10) 36 | library(TileDBArray) 37 | writeTileDBArray(X) 38 | ``` 39 | 40 | Alternatively, we can use coercion methods: 41 | 42 | ```{r} 43 | as(X, "TileDBArray") 44 | ``` 45 | 46 | This process works also for sparse matrices: 47 | 48 | ```{r} 49 | Y <- Matrix::rsparsematrix(1000, 1000, density=0.01) 50 | writeTileDBArray(Y) 51 | ``` 52 | 53 | Logical and integer matrices are supported: 54 | 55 | ```{r} 56 | writeTileDBArray(Y > 0) 57 | ``` 58 | 59 | As are matrices with dimension names: 60 | 61 | ```{r} 62 | rownames(X) <- sprintf("GENE_%i", seq_len(nrow(X))) 63 | colnames(X) <- sprintf("SAMP_%i", seq_len(ncol(X))) 64 | writeTileDBArray(X) 65 | ``` 66 | 67 | # Manipulating `TileDBArray`s 68 | 69 | `TileDBArray`s are simply `DelayedArray` objects and can be manipulated as such. 70 | The usual conventions for extracting data from matrix-like objects work as expected: 71 | 72 | ```{r} 73 | out <- as(X, "TileDBArray") 74 | dim(out) 75 | head(rownames(out)) 76 | head(out[,1]) 77 | ``` 78 | 79 | We can also perform manipulations like subsetting and arithmetic. 80 | Note that these operations do not affect the data in the TileDB backend; 81 | rather, they are delayed until the values are explicitly required, 82 | hence the creation of the `DelayedMatrix` object. 83 | 84 | ```{r} 85 | out[1:5,1:5] 86 | out * 2 87 | ``` 88 | 89 | We can also do more complex matrix operations that are supported by `r Biocpkg("DelayedArray")`: 90 | 91 | ```{r} 92 | colSums(out) 93 | out %*% runif(ncol(out)) 94 | ``` 95 | 96 | # Controlling backend creation 97 | 98 | We can adjust some parameters for creating the backend with appropriate arguments to `writeTileDBArray()`. 99 | For example, the example below allows us to control the path to the backend 100 | as well as the name of the attribute containing the data. 101 | 102 | ```{r} 103 | X <- matrix(rnorm(1000), ncol=10) 104 | path <- tempfile() 105 | writeTileDBArray(X, path=path, attr="WHEE") 106 | ``` 107 | 108 | As these arguments cannot be passed during coercion, 109 | we instead provide global variables that can be set or unset to affect the outcome. 110 | 111 | ```{r} 112 | path2 <- tempfile() 113 | setTileDBPath(path2) 114 | as(X, "TileDBArray") # uses path2 to store the backend. 115 | ``` 116 | 117 | # Session information 118 | 119 | ```{r} 120 | sessionInfo() 121 | ``` 122 | --------------------------------------------------------------------------------