├── R
├── sysdata.rda
├── AnVIL-package.R
├── zzz.R
├── TDR.R
├── authenticate.R
├── Rawls.R
├── Leonardo.R
├── Dockstore.R
├── Terra.R
├── utilities.R
├── api.R
├── Services.R
├── Service.R
└── gadgets.R
├── tests
├── testthat.R
└── testthat
│ ├── api-Rawls.rds
│ ├── api-Terra.rds
│ ├── api-Leonardo.rds
│ ├── test_api.R
│ └── test_Services.R
├── inst
├── extdata
│ ├── avworkflow_info_1.rds
│ └── avworkflow_info_2.rds
├── service
│ ├── terra
│ │ └── README.md
│ ├── rawls
│ │ └── README.md
│ ├── tdr
│ │ └── README.md
│ ├── leonardo
│ │ └── README.md
│ └── dockstore
│ │ └── README.md
└── scripts
│ └── update_dockstore_api.R
├── vignettes
├── images
│ └── AnVIL-Workspace-Data.png
├── BiocDockstore.Rmd
├── RunningWorkflow.Rmd
└── Introduction.Rmd
├── .Rbuildignore
├── .gitignore
├── man
├── utilities.Rd
├── figures
│ └── lifecycle-deprecated.svg
├── AnVIL-package.Rd
├── gadgets_developer.Rd
├── gadgets.Rd
├── Service.Rd
└── Services.Rd
├── inputs.json
├── README.md
├── .github
└── workflows
│ └── update_api.yml
├── DESCRIPTION
├── NAMESPACE
└── NEWS.md
/R/sysdata.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AnVIL/devel/R/sysdata.rda
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(AnVIL)
3 |
4 | test_check("AnVIL")
5 |
--------------------------------------------------------------------------------
/tests/testthat/api-Rawls.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AnVIL/devel/tests/testthat/api-Rawls.rds
--------------------------------------------------------------------------------
/tests/testthat/api-Terra.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AnVIL/devel/tests/testthat/api-Terra.rds
--------------------------------------------------------------------------------
/tests/testthat/api-Leonardo.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AnVIL/devel/tests/testthat/api-Leonardo.rds
--------------------------------------------------------------------------------
/inst/extdata/avworkflow_info_1.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AnVIL/devel/inst/extdata/avworkflow_info_1.rds
--------------------------------------------------------------------------------
/inst/extdata/avworkflow_info_2.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AnVIL/devel/inst/extdata/avworkflow_info_2.rds
--------------------------------------------------------------------------------
/R/AnVIL-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 |
4 | ## usethis namespace: start
5 | ## usethis namespace: end
6 | NULL
7 |
--------------------------------------------------------------------------------
/vignettes/images/AnVIL-Workspace-Data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bioconductor/AnVIL/devel/vignettes/images/AnVIL-Workspace-Data.png
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | \.httr-oauth$
2 | ^.*\.Rproj$
3 | ^\.Rproj\.user$
4 | ^\.httr-oauth$
5 | vignettes/.*R$
6 | vignettes/.*html
7 | vignettes/.*_cache
8 | ^doc$
9 | ^Meta$
10 | .github
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .httr-oauth
2 | auth.json
3 | .Rproj.user
4 | *.Rproj
5 | *.Rcheck
6 | *.tar.gz
7 | /*.R
8 | .Rhistory
9 | vignettes/*.md
10 | vignettes/*ipynb
11 | vignettes/*html
12 | vignettes/*R
13 | vignettes/*log
14 | vignettes/*pdf
15 | vignettes/*_cache
16 | vignettes/*_files
17 | /doc/
18 | /Meta/
19 |
--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | #' @import futile.logger
2 |
3 | .onLoad <-
4 | function(...)
5 | {
6 | opts <- list(
7 | anvil_client_id = paste(
8 | "250475993726-k70p3kf2fe2tpuq5jn39ogafbuj9fb8o",
9 | "apps",
10 | "googleusercontent",
11 | "com",
12 | sep = "."
13 | ),
14 | anvil_client_secret = oauth_secret
15 | )
16 | opts <- opts[!names(opts) %in% names(options())]
17 | options(opts)
18 | }
19 |
--------------------------------------------------------------------------------
/inst/service/terra/README.md:
--------------------------------------------------------------------------------
1 | The leonardo service uses openapi 3.0, but our software (currently)
2 | uses rapiclient, with a restriction to Swagger 2.0. The file in this
3 | directory was created using the [LucyBot][] api-spec-converter from
4 | the command line.
5 |
6 | On a macOS and following the README instructions on the git repository, I did
7 |
8 | ```
9 | $ brew install npm
10 | $ npm install -g api-spec-converter
11 | ```
12 |
13 | and then ran the command
14 |
15 | ```
16 | api-spec-converter -f openapi_3 -t swagger_2 \
17 | https://api.firecloud.org/api-docs.yaml > \
18 | api.yaml
19 | ```
20 |
21 | [LucyBot]: https://github.com/LucyBot-Inc/api-spec-converter
22 |
--------------------------------------------------------------------------------
/inst/service/rawls/README.md:
--------------------------------------------------------------------------------
1 | The rawls service uses openapi 3.0, but our software (currently)
2 | uses rapiclient, with a restriction to Swagger 2.0. The file in this
3 | directory was created using the [LucyBot][] api-spec-converter from
4 | the command line.
5 |
6 | On a macOS and following the README instructions on the git repository, I did
7 |
8 | ```
9 | $ brew install npm
10 | $ npm install -g api-spec-converter
11 | ```
12 |
13 | and then ran the command
14 |
15 | ```
16 | api-spec-converter -f openapi_3 -t swagger_2 \
17 | https://rawls.dsde-prod.broadinstitute.org/api-docs.yaml > \
18 | api.yaml
19 | ```
20 |
21 | [LucyBot]: https://github.com/LucyBot-Inc/api-spec-converter
22 |
--------------------------------------------------------------------------------
/inst/service/tdr/README.md:
--------------------------------------------------------------------------------
1 | The Terra Data Repository (TDR) service uses openapi 3.0, but our software
2 | (currently) uses rapiclient, with a restriction to Swagger 2.0. The file in this
3 | directory was created using the [LucyBot][] api-spec-converter from
4 | the command line.
5 |
6 | On a macOS and following the README instructions on the git repository, I did
7 |
8 | ```
9 | $ brew install npm
10 | $ npm install -g api-spec-converter
11 | ```
12 |
13 | and then ran the command
14 |
15 | ```
16 | api-spec-converter -f openapi_3 -t swagger_2 \
17 | https://data.terra.bio/data-repository-openapi.yaml > \
18 | api.yaml
19 | ```
20 |
21 | [LucyBot]: https://github.com/LucyBot-Inc/api-spec-converter
22 |
--------------------------------------------------------------------------------
/inst/service/leonardo/README.md:
--------------------------------------------------------------------------------
1 | The leonardo service uses openapi 3.0, but our software (currently)
2 | uses rapiclient, with a restriction to Swagger 2.0. The file in this
3 | directory was created using the [LucyBot][] api-spec-converter from
4 | the command line.
5 |
6 | On a macOS and following the README instructions on the git repository, I did
7 |
8 | ```
9 | $ brew install npm
10 | $ npm install -g api-spec-converter
11 | ```
12 |
13 | and then ran the command
14 |
15 | ```
16 | api-spec-converter -f openapi_3 -t swagger_2 \
17 | https://raw.githubusercontent.com/DataBiosphere/leonardo/develop/http/src/main/resources/swagger/api-docs.yaml > \
18 | api.yaml
19 | ```
20 |
21 | [LucyBot]: https://github.com/LucyBot-Inc/api-spec-converter
22 |
--------------------------------------------------------------------------------
/inst/service/dockstore/README.md:
--------------------------------------------------------------------------------
1 | The dockstore service uses openapi 3.0, but our software (currently)
2 | uses rapiclient, with a restriction to Swagger 2.0. The file in this
3 | directory was created using the [LucyBot][] api-spec-converter from
4 | the command line.
5 |
6 | On a macOS and following the README instructions on the git repository, I did
7 |
8 | ```
9 | $ brew install npm
10 | $ npm install -g api-spec-converter
11 | ```
12 |
13 | and then ran the command
14 |
15 | ```
16 | wget -O openapi.yaml https://dockstore.org/api/openapi.yaml
17 | ## Forbidden 403 on direct URL
18 | api-spec-converter -f openapi_3 -t swagger_2 \
19 | openapi.yaml > \
20 | api.yaml
21 | ```
22 |
23 | [LucyBot]: https://github.com/LucyBot-Inc/api-spec-converter
24 |
--------------------------------------------------------------------------------
/man/utilities.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utilities.R
3 | \name{utilities}
4 | \alias{utilities}
5 | \alias{add_libpaths}
6 | \title{Utilities for managing library paths}
7 | \usage{
8 | add_libpaths(paths)
9 | }
10 | \arguments{
11 | \item{paths}{\code{character()}: vector of directories to add to
12 | \code{.libPaths()}. Paths that do not exist will be created.}
13 | }
14 | \value{
15 | \code{add_libpaths()}: updated .libPaths(), invisibly.
16 | }
17 | \description{
18 | \code{add_libpaths()}: Add local library paths to
19 | \code{.libPaths()}.
20 | }
21 | \examples{
22 | \dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
23 | add_libpaths("/tmp/host-site-library")
24 | \dontshow{\}) # examplesIf}
25 | }
26 |
--------------------------------------------------------------------------------
/inputs.json:
--------------------------------------------------------------------------------
1 | {"Optimus.OptimusLoomGeneration.docker":"${}","Optimus.OptimusLoomGeneration.preemptible":"${}","Optimus.RunEmptyDrops.cpu":"${}","Optimus.RunEmptyDrops.disk":"${}","Optimus.RunEmptyDrops.docker":"${}","Optimus.RunEmptyDrops.fdr_cutoff":"${}","Optimus.RunEmptyDrops.machine_mem_mb":"${}","Optimus.RunEmptyDrops.min_molecules":"${}","Optimus.RunEmptyDrops.niters":"${}","Optimus.RunEmptyDrops.preemptible":"${}","Optimus.annotations_gtf":"${workspace.human_annotations_gtf}","Optimus.chemistry":"tenX_v2","Optimus.counting_mode":"${}","Optimus.emptydrops_lower":"${1}","Optimus.force_no_check":"${}","Optimus.i1_fastq":"${}","Optimus.input_id":"pbmc4k_human","Optimus.output_bam_basename":"${}","Optimus.r1_fastq":"${this.samples.r1_fastq}","Optimus.r2_fastq":"${this.samples.r2_fastq}","Optimus.ref_genome_fasta":"${workspace.human_ref_genome_fasta}","Optimus.tar_star_reference":"${workspace.human_tar_star_reference}","Optimus.whitelist":"${workspace.whitelist_v2}"}
--------------------------------------------------------------------------------
/tests/testthat/test_api.R:
--------------------------------------------------------------------------------
1 | ## Check that the API calls used by AnVIL are consistent with the API
2 | ## in the YAML. Requires manual investigation of any removed or
3 | ## updated_args_in_use functions.
4 | ##
5 | ## Use functionality in R/api.R:.api_test_write() to record the
6 | ## current interface; .api_test_check() to compare the current and
7 | ## previously recorded versions.
8 |
9 | test_that("Interfaces are current", {
10 | skip_if(!GCPtools::gcloud_exists())
11 |
12 | service_status <- .api_test_check(Terra(), "Terra")
13 | expect_identical(service_status$removed_in_use, character())
14 | expect_identical(service_status$updated_in_use, character())
15 |
16 | service_status <- .api_test_check(Rawls(), "Rawls")
17 | expect_identical(service_status$removed_in_use, character())
18 | expect_identical(service_status$updated_in_use, character())
19 |
20 | service_status <- .api_test_check(Leonardo(), "Leonardo")
21 | expect_identical(service_status$removed_in_use, character())
22 | expect_identical(service_status$updated_in_use, character())
23 | })
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | This archive contains an interface to AnVIL web services. The AnVIL
2 | package has been available since Bioconductor version 3.11. To install
3 | the released version, follow instructions in the package vignette
4 |
5 | if (!requireNamespace("BiocManager", quietly = TRUE))
6 | install.packages("BiocManager")
7 | BiocManager::install("AnVIL")
8 |
9 | To install the development (github master) version in a recent _R_,
10 | use
11 |
12 | if (!requireNamespace("BiocManager", quietly = TRUE))
13 | install.packages("BiocManager")
14 | if (!requireNamespace("remotes", quietly = TRUE))
15 | install.packages("remotes")
16 | BiocManager::install("Bioconductor/AnVIL")
17 |
18 | View the vignette (on [Bioconductor][bioc-vignette],
19 | [github][github-vignette], or in your R session
20 | `browseVignettes(package = "AnVIL")`) for usage and help pages for
21 | accurate documentation. Visit the Bioconductor package [landing page]
22 | for more information.
23 |
24 | [landing page]: https://bioconductor.org/packages/AnVIL
25 | [bioc-vignette]: https://bioconductor.org/packages/devel/bioc/vignettes/AnVIL/inst/doc/Introduction.html
26 | [github-vignette]: https://github.com/Bioconductor/AnVIL/blob/master/vignettes/Introduction.Rmd
27 |
--------------------------------------------------------------------------------
/inst/scripts/update_dockstore_api.R:
--------------------------------------------------------------------------------
1 | # setwd("~/bioc/AnVIL")
2 | file_loc <- "inst/service/dockstore/openapi.yaml"
3 |
4 | download.file(
5 | url = "https://dockstore.org/api/openapi.yaml",
6 | destfile = file_loc
7 | )
8 |
9 | docklines <- readLines("R/Dockstore.R")
10 |
11 | .DOCKSTORE_LINE <- ".DOCKSTORE_API_REFERENCE_VERSION <-"
12 |
13 | verline <- grep(
14 | pattern = .DOCKSTORE_LINE,
15 | x = docklines,
16 | fixed = TRUE,
17 | value = TRUE
18 | )
19 | oldver <- unlist(strsplit(verline, "\""))[[2L]]
20 | newver <- yaml::read_yaml(file_loc)[[c("info", "version")]]
21 |
22 | ## success -- updated API files and MD5
23 | if (!identical(oldver, newver)) {
24 | ## update the version in the R file
25 | lineIdx <- grep(.DOCKSTORE_LINE, docklines, fixed = TRUE)
26 | docklines[lineIdx] <- paste0(
27 | .DOCKSTORE_LINE, " \"", newver, "\""
28 | )
29 | writeLines(docklines, con = file("R/Dockstore.R"))
30 |
31 | ## update the API file
32 | oldwd <- setwd("inst/service/dockstore")
33 | on.exit(setwd(oldwd))
34 | system2(
35 | command = "api-spec-converter",
36 | args = "-f openapi_3 -t swagger_2 openapi.yaml > api.yaml",
37 | stdout = TRUE
38 | )
39 |
40 | quit(status = 0)
41 | } else {
42 | ## failure -- API the same
43 | quit(status = 1)
44 | }
45 |
--------------------------------------------------------------------------------
/man/figures/lifecycle-deprecated.svg:
--------------------------------------------------------------------------------
1 |
22 |
--------------------------------------------------------------------------------
/man/AnVIL-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AnVIL-package.R
3 | \docType{package}
4 | \name{AnVIL-package}
5 | \alias{AnVIL}
6 | \alias{AnVIL-package}
7 | \title{AnVIL: Bioconductor on the AnVIL compute environment}
8 | \description{
9 | The AnVIL is a cloud computing resource developed in part by the National Human Genome Research Institute. The AnVIL package provides programatic access to the Dockstore, Leonardo, Rawls, TDR, and Terra RESTful programming interfaces. For platform-specific user-level functionality, see either the AnVILGCP or AnVILAz package.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 | \item \url{https://github.com/Bioconductor/AnVIL}
15 | \item Report bugs at \url{https://github.com/Bioconductor/AnVIL/issues}
16 | }
17 |
18 | }
19 | \author{
20 | \strong{Maintainer}: Marcel Ramos \email{marcel.ramos@sph.cuny.edu} (\href{https://orcid.org/0000-0002-3242-0582}{ORCID})
21 |
22 | Authors:
23 | \itemize{
24 | \item Martin Morgan (\href{https://orcid.org/0000-0002-5874-8148}{ORCID})
25 | \item Kayla Interdonato
26 | \item Yubo Cheng
27 | \item Nitesh Turaga
28 | }
29 |
30 | Other contributors:
31 | \itemize{
32 | \item BJ Stubbs [contributor]
33 | \item Vincent Carey [contributor]
34 | \item Sehyun Oh [contributor]
35 | \item Sweta Gopaulakrishnan [contributor]
36 | \item Valerie Obenchain [contributor]
37 | }
38 |
39 | }
40 | \keyword{internal}
41 |
--------------------------------------------------------------------------------
/R/TDR.R:
--------------------------------------------------------------------------------
1 | #' @exportClass TDR
2 | .TDR <- setClass(
3 | "TDR",
4 | contains = "Service",
5 | slots = c(api_header = "character")
6 | )
7 |
8 | .api_header <- function(x) x@api_header
9 | .TDR_API_REFERENCE_VERSION <- "0.1.0"
10 |
11 | #' @rdname Services
12 | #'
13 | #' @aliases TDR-class operations,TDR-method
14 | #'
15 | #' @return `TDR()` creates the API of the Terra Data Repository to work with
16 | #' snapshot data in the Terra Data Repository at \url{https://data.terra.bio}.
17 | #'
18 | #' @format NULL
19 | #'
20 | #' @importFrom GCPtools gcloud_access_token
21 | #'
22 | #' @examples
23 | #' library(GCPtools)
24 | #' if (gcloud_exists())
25 | #' TDR()
26 | #'
27 | #' @export
28 | TDR <-
29 | function()
30 | {
31 | access_token <- gcloud_access_token("tdr")
32 | api_header <- c(
33 | Authorization = paste("Bearer", access_token)
34 | )
35 | .TDR(
36 | Service(
37 | "tdr",
38 | host = "data.terra.bio",
39 | config = httr::config(ssl_verifypeer = 0L, ssl_verifyhost = 0L),
40 | api_reference_version = .TDR_API_REFERENCE_VERSION,
41 | authenticate = FALSE,
42 | api_reference_url =
43 | "https://data.terra.bio/data-repository-openapi.yaml"
44 | ),
45 | api_header = api_header
46 | )
47 | }
48 |
49 |
50 | #' @export
51 | setMethod(
52 | "operations", "TDR",
53 | function(x, ..., .deprecated = FALSE)
54 | {
55 | callNextMethod(
56 | x, .headers = .api_header(x), ..., .deprecated = .deprecated
57 | )
58 | })
59 |
--------------------------------------------------------------------------------
/R/authenticate.R:
--------------------------------------------------------------------------------
1 | authenticate_path <- function(service)
2 | system.file(package="AnVIL", "service", service, "auth.json")
3 |
4 | authenticate_ok <-
5 | function(service)
6 | {
7 | path <- authenticate_path(service)
8 | test <- file.exists(path)
9 | if (!test)
10 | warning(
11 | "'", service, "' requires additional configuration; ",
12 | "see `?authenticate`",
13 | call. = FALSE
14 | )
15 | invisible(test)
16 | }
17 |
18 | #' @importFrom httr oauth_app oauth_endpoints oauth2.0_token
19 | #'
20 | #' @importFrom jsonlite read_json
21 | authenticate <-
22 | function(service, cache = getOption("httr_oauth_cache"))
23 | {
24 | interactive() || return(invisible(NULL))
25 | stopifnot(isScalarCharacter(service))
26 |
27 | access <- list(
28 | client_id = getOption("anvil_client_id"),
29 | client_secret = getOption("anvil_client_secret")
30 | )
31 |
32 | path <- authenticate_path(service)
33 | if (file.exists(path)) {
34 | access <- read_json(path)
35 | if ("installed" %in% names(access))
36 | access <- access$installed
37 | }
38 |
39 | app <- oauth_app(
40 | "AnVILBiocPackage",
41 | key = access$client_id,
42 | secret = access$client_secret
43 | )
44 |
45 | token <- oauth2.0_token(
46 | oauth_endpoints("google"), app,
47 | scope = "openid email",
48 | cache = cache
49 | )
50 |
51 | invisible(token)
52 | }
53 |
54 | authenticate_config <-
55 | function(service)
56 | {
57 | token <- authenticate(service)
58 | httr::config(token = token)
59 | }
60 |
--------------------------------------------------------------------------------
/R/Rawls.R:
--------------------------------------------------------------------------------
1 | #' @export
2 | .Rawls <- setClass(
3 | "Rawls",
4 | contains = "Service",
5 | slots = c(api_header = "character")
6 | )
7 |
8 | .RAWLS_API_REFERENCE_VERSION <- "1.0.0"
9 |
10 | ## construct a singleton instance for this service
11 |
12 | #' @rdname Services
13 | #'
14 | #' @aliases Rawls-class operations,Rawls-method schemas,Rawls-method
15 | #'
16 | #' @return `Rawls()` creates the API of the Rawls cloud computational
17 | #' environemnt at \url{https://rawls.dsde-prod.broadinstitute.org}.
18 | #'
19 | #' @format NULL
20 | #'
21 | #' @importFrom GCPtools gcloud_access_token
22 | #'
23 | #' @examples
24 | #' library(GCPtools)
25 | #' if (gcloud_exists()) {
26 | #' tags(Rawls())
27 | #' tags(Rawls(), "billing")
28 | #' }
29 | #'
30 | #' @export
31 | Rawls <-
32 | function()
33 | {
34 | access_token <- gcloud_access_token("rawls")
35 | api_header <- c(Authorization = paste("Bearer", access_token))
36 | .Rawls(
37 | Service(
38 | "rawls",
39 | host = "rawls.dsde-prod.broadinstitute.org",
40 | authenticate = FALSE,
41 | api_reference_version = .RAWLS_API_REFERENCE_VERSION,
42 | api_reference_url =
43 | "https://rawls.dsde-prod.broadinstitute.org/api-docs.yaml"
44 | ),
45 | api_header = api_header
46 | )
47 | }
48 |
49 | ## Some operations seem to have a poorly-defined operationId in the json
50 |
51 | #' @export
52 | setMethod(
53 | "operations", "Rawls",
54 | function(x, ..., .deprecated = FALSE)
55 | {
56 | callNextMethod(
57 | x, .headers = .api_header(x), ..., .deprecated = .deprecated
58 | )
59 | })
60 |
--------------------------------------------------------------------------------
/R/Leonardo.R:
--------------------------------------------------------------------------------
1 | #' @export
2 | .Leonardo <- setClass(
3 | "Leonardo",
4 | contains = "Service",
5 | slots = c(api_header = "character")
6 | )
7 |
8 | .LEONARDO_API_REFERENCE_VERSION <- "1.3.6"
9 |
10 | ## construct a singleton instance for this service
11 |
12 | #' @rdname Services
13 | #'
14 | #' @aliases Leonardo-class operations,Leonardo-method
15 | #'
16 | #' @return `Leonardo()` creates the API of the Leonardo container
17 | #' deployment service at
18 | #' \url{https://leonardo.dsde-prod.broadinstitute.org/api-docs.yaml}.
19 | #'
20 | #' @format NULL
21 | #'
22 | #' @importFrom GCPtools gcloud_access_token
23 | #'
24 | #' @examples
25 | #' library(GCPtools)
26 | #' if (gcloud_exists())
27 | #' Leonardo()
28 | #'
29 | #' @export
30 | Leonardo <-
31 | function()
32 | {
33 | access_token <- gcloud_access_token("leonardo")
34 | api_header <- c(
35 | Authorization = paste("Bearer", access_token),
36 | Referer = "https://leonardo.dsde-prod.broadinstitute.org"
37 | )
38 | .Leonardo(
39 | Service(
40 | "leonardo",
41 | host = "leonardo.dsde-prod.broadinstitute.org",
42 | config = httr::config(ssl_verifypeer = 0L, ssl_verifyhost = 0L),
43 | authenticate = FALSE,
44 | api_reference_version = .LEONARDO_API_REFERENCE_VERSION,
45 | api_reference_url =
46 | "https://leonardo.dsde-prod.broadinstitute.org/api-docs.yaml",
47 | ),
48 | api_header = api_header
49 | )
50 | }
51 |
52 | #' @export
53 | setMethod(
54 | "operations", "Leonardo",
55 | function(x, ..., .deprecated = FALSE)
56 | {
57 | callNextMethod(x, .headers = .api_header(x), ..., .deprecated = .deprecated)
58 | })
59 |
--------------------------------------------------------------------------------
/.github/workflows/update_api.yml:
--------------------------------------------------------------------------------
1 | name: Update to Dockstore API
2 |
3 | on:
4 | workflow_dispatch:
5 | inputs:
6 | branch:
7 | description: 'checkout branch'
8 | required: true
9 | default: 'devel'
10 |
11 | jobs:
12 | apiUpdate:
13 | runs-on: ubuntu-latest
14 | container: bioconductor/bioconductor_docker:${{ github.event.inputs.branch }}
15 |
16 | env:
17 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
18 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
19 |
20 | steps:
21 | - name: Checkout Repository
22 | uses: actions/checkout@v4
23 | with:
24 | ref: ${{ github.event.inputs.branch }}
25 |
26 | - name: Install Dependencies
27 | run: Rscript -e "install.packages('yaml')"
28 |
29 | - name: Install Node.js
30 | uses: actions/setup-node@v4
31 | with:
32 | node-version: 20
33 |
34 | - name: Install lucybot converter
35 | run: npm install -g api-spec-converter
36 |
37 | - name: Add safe directory
38 | run: git config --global --add safe.directory /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}
39 |
40 | - name: Download API and write MD5
41 | run: Rscript -e "source('./inst/scripts/update_dockstore_api.R')"
42 |
43 | - name: Commit changes
44 | if: ${{ success() }}
45 | uses: EndBug/add-and-commit@v9
46 | with:
47 | add: '["inst/service/dockstore/openapi.yaml", "R/Dockstore.R"]'
48 | message: 'Update API'
49 |
50 | - name: Push action
51 | uses: ad-m/github-push-action@master
52 | with:
53 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
54 | branch: ${{ github.event.inputs.branch }}
55 |
56 |
57 |
--------------------------------------------------------------------------------
/man/gadgets_developer.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/gadgets.R
3 | \name{.gadget_run}
4 | \alias{.gadget_run}
5 | \title{Functions to implement AnVIL gadget interfaces}
6 | \usage{
7 | .gadget_run(title, tibble, DONE_FUN)
8 | }
9 | \arguments{
10 | \item{title}{character(1) (required) title to appear at the base of
11 | the gadget, e.g., "AnVIL Workspaces".}
12 |
13 | \item{tibble}{a \code{tibble} or \code{data.frame} to be displayed in the
14 | gadget.}
15 |
16 | \item{DONE_FUN}{a function of two arguments, \code{tibble} and
17 | \code{row_selected}. The tibble is the \code{tibble} provided as an
18 | argument to \code{.gadget_run()}. \code{row_selected} is the row
19 | selected in the gadget by the user. The function is only
20 | invoked when the user selects a valid row.}
21 | }
22 | \value{
23 | \code{.gadget_run()} returns the result of \code{DONE_FUN()} if a row
24 | has been selected by the user, or \code{NULL} if no row is selected
25 | (the user presses \code{Cancel}, or \code{Done} prior to selecting any
26 | row).
27 | }
28 | \description{
29 | Functions documented on this page are primarily
30 | intended for package developers wishing to implement gadgets
31 | (graphical interfaces) to navigating AnVIL-generated tables.
32 |
33 | \code{.gadget_run()} presents the user with a
34 | tibble-navigating gadget, returning the value of \code{DONE_FUN} if
35 | a row of the tibble is selected, or NULL.
36 | }
37 | \examples{
38 | \dontshow{if (has_avworkspace(TRUE, platform = AnVILGCP::gcp())) withAutoprint(\{ # examplesIf}
39 | tibble <- avworkspaces(platform = AnVILGCP::gcp())
40 | DONE_FUN <- function(tibble, row_selected) {
41 | selected <- slice(tibble, row_selected)
42 | with(selected, paste0(namespace, "/", name))
43 | }
44 | .gadget_run("AnVIL Example", tibble, DONE_FUN)
45 | \dontshow{\}) # examplesIf}
46 | }
47 |
--------------------------------------------------------------------------------
/R/Dockstore.R:
--------------------------------------------------------------------------------
1 | ## sub-class to allow method dispatch
2 |
3 | #' @export
4 | .Dockstore <- setClass(
5 | "Dockstore",
6 | contains = "Service",
7 | slots = c(api_header = "character")
8 | )
9 |
10 | .api_header <- function(x) x@api_header
11 | .DOCKSTORE_API_REFERENCE_VERSION <- "1.18.2"
12 |
13 | ## construct a singleton instance for this service
14 |
15 | #' @rdname Services
16 | #'
17 | #' @aliases Dockstore-class operations,Dockstore-method
18 | #'
19 | #' @return `Dockstore()` represents the API of the Dockstore platform to
20 | #' share Docker-based tools in CWL or WDL or Nextflow at
21 | #' \url{https://dockstore.org}
22 | #'
23 | #' @format NULL
24 | #'
25 | #' @examples
26 | #' Dockstore()
27 | #'
28 | #' @export
29 | Dockstore <-
30 | function()
31 | {
32 | api_header <- character()
33 | path <- authenticate_path("dockstore")
34 | if (file.exists(path)) {
35 | token <- read_json(path)$token
36 | api_header <- c(Authorization = paste("Bearer", token))
37 | }
38 | .Dockstore(
39 | Service(
40 | "dockstore",
41 | host = "dockstore.org",
42 | config = httr::config(ssl_verifypeer = 0L, ssl_verifyhost = 0L),
43 | api_reference_version = .DOCKSTORE_API_REFERENCE_VERSION,
44 | authenticate = FALSE,
45 | api_reference_url = "https://dockstore.org/api/openapi.yaml",
46 | ),
47 | api_header = api_header
48 | )
49 | }
50 |
51 | #' @export
52 | setMethod(
53 | "operations", "Dockstore",
54 | function(x, ..., .deprecated = FALSE)
55 | {
56 | ## Use .api_header() for authentication.
57 | value <- callNextMethod(
58 | x, .headers = .api_header(x), ..., .deprecated = .deprecated
59 | )
60 | ## Some operations have a poorly defined operationId in the json
61 | value[grep("[_,]+", names(value), invert = TRUE)]
62 | })
63 |
--------------------------------------------------------------------------------
/R/Terra.R:
--------------------------------------------------------------------------------
1 | #' @export
2 | .Terra <- setClass(
3 | "Terra",
4 | contains = "Service",
5 | slots = c(api_header = "character")
6 | )
7 |
8 | .TERRA_API_REFERENCE_VERSION <- "0.1"
9 |
10 | ## construct a singleton instance for this service
11 |
12 | #' @rdname Services
13 | #'
14 | #' @aliases Terra-class operations,Terra-method schemas,Terra-method
15 | #'
16 | #' @return `Terra()` creates the API of the Terra cloud computational
17 | #' environemnt at \url{https://api.firecloud.org/}.
18 | #'
19 | #' @format NULL
20 | #'
21 | #' @importFrom GCPtools gcloud_access_token
22 | #'
23 | #' @examples
24 | #' library(GCPtools)
25 | #' if (gcloud_exists()) {
26 | #' tags(Terra())
27 | #' tags(Terra(), "Billing")
28 | #' }
29 | #'
30 | #' @export
31 | Terra <-
32 | function()
33 | {
34 | access_token <- gcloud_access_token("terra")
35 | api_header <- c(Authorization = paste("Bearer", access_token))
36 | .Terra(
37 | Service(
38 | "terra",
39 | host = "api.firecloud.org",
40 | ## api_url = "https://api.firecloud.org/api-docs.yaml",
41 | authenticate = FALSE,
42 | api_reference_version = .TERRA_API_REFERENCE_VERSION,
43 | api_reference_url = "https://api.firecloud.org/api-docs.yaml",
44 | ),
45 | api_header = api_header
46 | )
47 | }
48 |
49 | ## Some operations seem to have a poorly-defined operationId in the json
50 |
51 | #' @export
52 | setMethod(
53 | "operations", "Terra",
54 | function(x, ..., .deprecated = FALSE)
55 | {
56 | value <- callNextMethod(
57 | x, .headers = .api_header(x), ..., .deprecated = .deprecated
58 | )
59 | value[grep("[_,]+", names(value), invert = TRUE)]
60 | })
61 |
62 | #' @export
63 | setMethod(
64 | "schemas", "Terra",
65 | function(x)
66 | {
67 | value <- callNextMethod()
68 | value[grep("[_,]+", names(value), invert = TRUE)]
69 | })
70 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: AnVIL
2 | Title: Bioconductor on the AnVIL compute environment
3 | Version: 1.23.6
4 | Authors@R: c(
5 | person(
6 | "Marcel", "Ramos", role = c("aut", "cre"),
7 | email = "marcel.ramos@sph.cuny.edu",
8 | comment = c(ORCID = "0000-0002-3242-0582")
9 | ),
10 | person(
11 | "Martin", "Morgan", role = c("aut"),
12 | comment = c(ORCID = "0000-0002-5874-8148")
13 | ),
14 | person("Kayla", "Interdonato", role = "aut"),
15 | person("Yubo", "Cheng", role = "aut"),
16 | person("Nitesh", "Turaga", role = "aut"),
17 | person("BJ", "Stubbs", role = "ctb"),
18 | person("Vincent", "Carey", role = "ctb"),
19 | person("Sehyun", "Oh", role = "ctb"),
20 | person("Sweta", "Gopaulakrishnan", role = "ctb"),
21 | person("Valerie", "Obenchain", role = "ctb"))
22 | Description: The AnVIL is a cloud computing resource developed in part
23 | by the National Human Genome Research Institute. The AnVIL package
24 | provides programatic access to the Dockstore, Leonardo, Rawls,
25 | TDR, and Terra RESTful programming interfaces. For platform-specific
26 | user-level functionality, see either the AnVILGCP or AnVILAz package.
27 | License: Artistic-2.0
28 | Encoding: UTF-8
29 | Depends: R (>= 4.6.0), dplyr, AnVILBase
30 | Imports:
31 | stats, utils, methods,
32 | futile.logger,
33 | GCPtools,
34 | jsonlite, httr,
35 | rapiclient, yaml,
36 | tibble,
37 | shiny, DT, miniUI, htmltools,
38 | BiocBaseUtils
39 | Suggests: knitr, rmarkdown, testthat, withr, readr, BiocStyle,
40 | devtools, AnVILAz, AnVILGCP, lifecycle
41 | Collate:
42 | utilities.R authenticate.R api.R AnVIL-package.R
43 | Service.R Services.R Leonardo.R Terra.R Rawls.R Dockstore.R TDR.R
44 | gadgets.R zzz.R
45 | URL: https://github.com/Bioconductor/AnVIL
46 | BugReports: https://github.com/Bioconductor/AnVIL/issues
47 | VignetteBuilder: knitr
48 | biocViews: Infrastructure
49 | RoxygenNote: 7.3.3
50 | Roxygen: list(markdown = TRUE)
51 | Date: 2025-12-08
52 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | S3method(.DollarNames,Service)
4 | export(.Dockstore)
5 | export(.Leonardo)
6 | export(.Rawls)
7 | export(.Service)
8 | export(.Terra)
9 | export(.gadget_run)
10 | export(Dockstore)
11 | export(Leonardo)
12 | export(Rawls)
13 | export(Service)
14 | export(TDR)
15 | export(Terra)
16 | export(add_libpaths)
17 | export(avtable_gadget)
18 | export(avworkflow_gadget)
19 | export(avworkspace_gadget)
20 | export(browse_workspace)
21 | export(empty_object)
22 | export(operations)
23 | export(schemas)
24 | export(tags)
25 | exportClasses(Dockstore)
26 | exportClasses(Leonardo)
27 | exportClasses(Rawls)
28 | exportClasses(Service)
29 | exportClasses(TDR)
30 | exportClasses(Terra)
31 | exportMethods("$")
32 | exportMethods(operations)
33 | exportMethods(schemas)
34 | exportMethods(show)
35 | import(futile.logger)
36 | import(methods)
37 | importFrom(AnVILBase,avstop_for_status)
38 | importFrom(AnVILBase,avtable)
39 | importFrom(AnVILBase,avtables)
40 | importFrom(AnVILBase,avworkspace)
41 | importFrom(AnVILBase,avworkspace_name)
42 | importFrom(AnVILBase,avworkspaces)
43 | importFrom(BiocBaseUtils,checkInstalled)
44 | importFrom(BiocBaseUtils,isCharacter)
45 | importFrom(BiocBaseUtils,isScalarCharacter)
46 | importFrom(BiocBaseUtils,isScalarLogical)
47 | importFrom(DT,DTOutput)
48 | importFrom(DT,datatable)
49 | importFrom(DT,formatStyle)
50 | importFrom(DT,renderDT)
51 | importFrom(GCPtools,gcloud_access_token)
52 | importFrom(dplyr,arrange)
53 | importFrom(dplyr,filter)
54 | importFrom(dplyr,full_join)
55 | importFrom(dplyr,select)
56 | importFrom(htmltools,p)
57 | importFrom(htmltools,strong)
58 | importFrom(httr,GET)
59 | importFrom(httr,add_headers)
60 | importFrom(httr,oauth2.0_token)
61 | importFrom(httr,oauth_app)
62 | importFrom(httr,oauth_endpoints)
63 | importFrom(httr,write_disk)
64 | importFrom(jsonlite,read_json)
65 | importFrom(miniUI,gadgetTitleBar)
66 | importFrom(miniUI,miniContentPanel)
67 | importFrom(miniUI,miniPage)
68 | importFrom(rapiclient,get_api)
69 | importFrom(rapiclient,get_operations)
70 | importFrom(rapiclient,get_schemas)
71 | importFrom(shiny,observeEvent)
72 | importFrom(shiny,renderText)
73 | importFrom(shiny,runGadget)
74 | importFrom(shiny,stopApp)
75 | importFrom(shiny,textOutput)
76 | importFrom(stats,setNames)
77 | importFrom(tibble,as_tibble)
78 | importFrom(tibble,tibble)
79 | importFrom(tools,md5sum)
80 | importFrom(utils,.DollarNames)
81 | importFrom(utils,browseURL)
82 | importFrom(utils,download.file)
83 | importFrom(utils,head)
84 |
--------------------------------------------------------------------------------
/man/gadgets.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/gadgets.R
3 | \name{avworkspace_gadget}
4 | \alias{avworkspace_gadget}
5 | \alias{browse_workspace}
6 | \alias{avtable_gadget}
7 | \alias{avworkflow_gadget}
8 | \title{Graphical user interfaces for common AnVIL operations}
9 | \usage{
10 | avworkspace_gadget()
11 |
12 | browse_workspace(use_avworkspace = TRUE)
13 |
14 | avtable_gadget()
15 |
16 | avworkflow_gadget()
17 | }
18 | \arguments{
19 | \item{use_avworkspace}{logical(1) when \code{TRUE} (default), use the
20 | selected workspace (via \code{workspace()} or \code{avworkspace()} if
21 | available. If \code{FALSE} or no workspace is currently selected,
22 | use \code{workspace()} to allow the user to select the workspace.}
23 | }
24 | \value{
25 | \code{workspace()} returns the selected workspace as a
26 | character(1) using the format namespace/name, or character(0)
27 | if no workspace is selected.
28 |
29 | \code{browse_workspace()} returns the status of a \code{system()}
30 | call to launch the browser, invisibly.
31 |
32 | \code{table()} returns a \code{tibble} representing the selected
33 | AnVIL table.
34 |
35 | \code{workflow()} returns an \code{avworkflow_configuration} object
36 | representing the inputs and outputs of the selected
37 | workflow. This can be edited and updated as described in the
38 | "Running an AnVIL workflow within R" vigenette.
39 | }
40 | \description{
41 | \code{workspace()} allows choice of workspace for
42 | subsequent use. It is the equivalent of displaying workspaces
43 | with \code{avworkspaces()}, and setting the selected workspace with
44 | \code{avworkspace()}.
45 |
46 | \code{browse_workspace()} uses \code{browseURL()} to open a
47 | browser window pointing to the Terra workspace.
48 |
49 | \code{table()} allows choice of table in the current
50 | workspace (selected by \code{avworkspace()} or \code{workspace()}) to be
51 | returned as a tibble. It is equivalent to invoking \code{avtables()}
52 | to show available tables, and \code{avtable()} to retrieve the
53 | selected table.
54 |
55 | \code{workflow()} allows choice of workflow for
56 | retrieval. It is the equivalent of \code{avworkflows()} for listing
57 | available workflows, and \code{avworkflow_configuration_get()} for
58 | retrieving the workflow.
59 | }
60 | \examples{
61 | \dontshow{if (has_avworkspace(TRUE, platform = AnVILGCP::gcp())) withAutoprint(\{ # examplesIf}
62 | workspace()
63 | browse_workspace(use_avworkspace = FALSE)
64 | tbl <- table()
65 | wkflw <- avworkflow_gadget()
66 | \dontshow{\}) # examplesIf}
67 | }
68 |
--------------------------------------------------------------------------------
/R/utilities.R:
--------------------------------------------------------------------------------
1 | #' @name utilities
2 | #'
3 | #' @title Utilities for managing library paths
4 | #'
5 | #' @description `add_libpaths()`: Add local library paths to
6 | #' `.libPaths()`.
7 | #'
8 | #' @param paths `character()`: vector of directories to add to
9 | #' `.libPaths()`. Paths that do not exist will be created.
10 | #'
11 | #' @return `add_libpaths()`: updated .libPaths(), invisibly.
12 | #'
13 | #' @examplesIf interactive()
14 | #' add_libpaths("/tmp/host-site-library")
15 | #' @export
16 | add_libpaths <-
17 | function(paths)
18 | {
19 | stopifnot(is.character(paths))
20 |
21 | ## make sure all paths exist
22 | exist <- vapply(paths, dir.exists, logical(1))
23 | ok <- vapply(paths[!exist], dir.create, logical(1))
24 | if (!all(ok))
25 | stop(
26 | "'add_libpaths()' failed to create directories:\n",
27 | " '", paste(paths[!exist][!ok], collapse="'\n '"), "'"
28 | )
29 |
30 | .libPaths(c(paths, .libPaths()))
31 | }
32 |
33 | isScalarCharacter_or_NULL <- function(x, na.ok = FALSE, zchar = FALSE)
34 | isScalarCharacter(x, na.ok, zchar) || is.null(x)
35 |
36 | .is_local_directory <- function(x)
37 | isScalarCharacter(x) && dir.exists(x)
38 |
39 | .is_https <- function(x)
40 | isCharacter(x) & startsWith(x, "https://")
41 |
42 | .is_workspace <-
43 | function(x)
44 | {
45 | isScalarCharacter(x) &&
46 | ## exactly 1 `/`
47 | identical(lengths(regmatches(x, gregexpr("/", x, fixed = TRUE))), 1L)
48 | }
49 |
50 | #' @importFrom dplyr full_join select
51 | #' @importFrom tibble as_tibble
52 | .tbl_with_template <-
53 | function(tbl, tmpl)
54 | {
55 | result <- as_tibble(tmpl)
56 | if (nrow(tbl)) {
57 | have <- intersect(names(tbl), names(tmpl))
58 | tbl <- select(tbl, have)
59 | result <-
60 | full_join(tbl, result, by = have) |>
61 | select(names(tmpl))
62 | }
63 | result
64 | }
65 |
66 | #' @importFrom utils head
67 | .pretty <- function(x, indent = 2, exdent = 0, some=FALSE) {
68 | len <- length(x)
69 | if (some && len > 6)
70 | x <- head(x, 5)
71 | pad <- paste0(rep(" ", indent), collapse="")
72 | paste(c(
73 | strwrap(paste(x, collapse=", "), indent = indent, exdent = exdent),
74 | if (some && len > 6)
75 | paste0(pad, "# ... with ", len, " more elements")
76 | ), collapse = "\n")
77 | }
78 |
79 | .pretty_text <- function(..., indent = 0L, exdent = 0L) {
80 | text <- paste(..., collapse = " ")
81 | paste(strwrap(text, indent = indent, exdent = exdent), collapse = "\n")
82 | }
83 |
--------------------------------------------------------------------------------
/R/api.R:
--------------------------------------------------------------------------------
1 | .CLASS_OPERATION <- "rapi_operation"
2 |
3 | .api <-
4 | function(x)
5 | {
6 | x@api
7 | }
8 |
9 | .api_path <-
10 | function(service, package)
11 | {
12 | fl <- system.file(package = package, "service", service, "api.json")
13 | if (!file.exists(fl))
14 | fl <- system.file(package = package, "service", service, "api.yaml")
15 | if (!file.exists(fl))
16 | stop("could not find api.json or api.yaml for service '", service, "'")
17 | fl
18 | }
19 |
20 | .api_paths_fix <-
21 | function(x)
22 | {
23 | ## 'produces' needs to be character(1) for httr 1.4.1
24 | if ("produces" %in% names(x))
25 | x[["produces"]] <- paste(x[["produces"]], collapse = ", ")
26 | else if (is.list(x))
27 | x <- lapply(x, .api_paths_fix)
28 | x
29 | }
30 |
31 | ## The following functions are for use by tests/testthat/test_api.R
32 |
33 | .api_test_file_path <-
34 | function(name)
35 | {
36 | devtools::package_file("tests", "testthat", paste0("api-", name, ".rds"))
37 | }
38 |
39 | .api_test_in_use <- function(function_names, service_name, r_content) {
40 | in_use <- vapply(
41 | function_names, function(function_name, service_name, r_content) {
42 | ## FIXME: fragile
43 | function_call <- paste0(service_name, "()$", function_name, "(")
44 | any(grepl(function_call, r_content, fixed = TRUE))
45 | }, logical(1), service_name, r_content)
46 | function_names[in_use]
47 | }
48 |
49 | .api_test_write <-
50 | function(service, name)
51 | {
52 | file_path <- .api_test_file_path(name)
53 | ops <- lapply(operations(service), formals)
54 | saveRDS(ops, file_path)
55 | }
56 |
57 | .api_test_check <-
58 | function(service, name)
59 | {
60 | ops <- lapply(operations(service), formals)
61 | ops_saved <- readRDS(.api_test_file_path(name))
62 |
63 | common <- intersect(names(ops), names(ops_saved))
64 | added <- setdiff(names(ops), names(ops_saved))
65 | removed <- setdiff(names(ops_saved), names(ops))
66 |
67 | is_updated <- vapply(common, function(op_name) {
68 | !identical(ops_saved[[op_name]], ops[[op_name]])
69 | }, logical(1))
70 | updated <- common[is_updated]
71 |
72 | r_files <- dir(
73 | devtools::package_file("R"), full.names = TRUE, pattern = "\\.R$"
74 | )
75 | r_content <- unlist(lapply(r_files, readLines))
76 |
77 | common_in_use <- .api_test_in_use(common, name, r_content)
78 | removed_in_use <- .api_test_in_use(removed, name, r_content)
79 | updated_in_use <- .api_test_in_use(updated, name, r_content)
80 |
81 | list(
82 | common = common, added = added, removed = removed, updated = updated,
83 | common_in_use = common_in_use,
84 | removed_in_use = removed_in_use,
85 | updated_in_use = updated_in_use
86 | )
87 | }
88 |
--------------------------------------------------------------------------------
/man/Service.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/Service.R
3 | \name{Service}
4 | \alias{Service}
5 | \title{RESTful service constructor}
6 | \usage{
7 | Service(
8 | service,
9 | host,
10 | config = httr::config(),
11 | authenticate = TRUE,
12 | api_url = character(),
13 | package = "AnVIL",
14 | schemes = "https",
15 | api_reference_url = api_url,
16 | api_reference_md5sum = character(),
17 | api_reference_version = character(),
18 | api_reference_headers = NULL,
19 | ...
20 | )
21 | }
22 | \arguments{
23 | \item{service}{character(1) The \code{Service} class name, e.g., \code{"terra"}.}
24 |
25 | \item{host}{character(1) host name that provides the API resource,
26 | e.g., \code{"leonardo.dsde-prod.broadinstitute.org"}.}
27 |
28 | \item{config}{httr::config() curl options}
29 |
30 | \item{authenticate}{logical(1) use credentials from authentication
31 | service file 'auth.json' in the specified package?}
32 |
33 | \item{api_url}{optional character(1) url location of OpenAPI
34 | \code{.json} or \code{.yaml} service definition.}
35 |
36 | \item{package}{character(1) (default \code{AnVIL}) The package where
37 | 'api.json' yaml and (optionally) 'auth.json' files are located.}
38 |
39 | \item{schemes}{character(1) (default 'https') Specifies the
40 | transfer protocol supported by the API service.}
41 |
42 | \item{api_reference_url}{character(1) path to reference API. See
43 | Details.}
44 |
45 | \item{api_reference_md5sum}{character(1) the result of
46 | \code{tools::md5sum()} applied to the reference API.}
47 |
48 | \item{api_reference_version}{character(1) the version of the
49 | reference API. This is used to check that the version of the
50 | service matches the version of the reference API. It is usally
51 | set by the service generation function,. e.g., \code{AnVIL::Rawls()}.}
52 |
53 | \item{api_reference_headers}{character() header(s) to be used
54 | (e.g., \code{c(Authorization = paste("Bearer", token))}) when
55 | retrieving the API reference for validation.}
56 |
57 | \item{...}{additional arguments passed to \code{rapiclient::get_api()}}
58 | }
59 | \value{
60 | An object of class \code{Service}.
61 | }
62 | \description{
63 | RESTful service constructor
64 | }
65 | \details{
66 | This function creates a RESTful interface to a service
67 | provided by a host, e.g., "leonardo.dsde-prod.broadinstitute.org".
68 | The function requires an OpenAPI \code{.json} or \code{.yaml} specifcation
69 | as well as an (optional) \code{.json} authentication token. These files
70 | are located in the source directory of a pacakge, at
71 | \verb{/inst/service//api.json} and
72 | \verb{/inst/service//auth.json}, or at \code{api_url}.
73 |
74 | When provided, the \code{api_reference_md5sum} is used to check that
75 | the file described at \code{api_reference_url} has the same checksum
76 | as an author-validated version.
77 |
78 | The service is usually a singleton, created at the package
79 | level during \code{.onLoad()}.
80 | }
81 | \examples{
82 | .MyService <- setClass("MyService", contains = "Service")
83 |
84 | MyService <- function() {
85 | .MyService(Service("my_service", host="my.api.org"))
86 | }
87 |
88 | }
89 |
--------------------------------------------------------------------------------
/tests/testthat/test_Services.R:
--------------------------------------------------------------------------------
1 | test_that("Services are current", {
2 | skip_if(!GCPtools::gcloud_exists())
3 | expect_silent(Terra())
4 | expect_silent(Leonardo())
5 | expect_silent(Rawls())
6 | expect_silent(Dockstore())
7 | })
8 |
9 | test_that("host is captured in Service", {
10 | api_reference_url <- "https://dockstore.org/openapi.yaml"
11 | api_reference_version <- AnVIL:::.DOCKSTORE_API_REFERENCE_VERSION
12 | .host <- function(x) x@host
13 | myHost <- "dockstore.org"
14 |
15 | .MyService <- setClass("MyService", contains = "Service")
16 | MyService <- function() {
17 | .MyService(
18 | Service(
19 | "dockstore",
20 | host = myHost,
21 | config = httr::config(ssl_verifypeer = 0L, ssl_verifyhost = 0L),
22 | api_reference_version = api_reference_version,
23 | authenticate = FALSE,
24 | api_reference_url = "https://dockstore.org/api/openapi.yaml",
25 | )
26 | )
27 | }
28 |
29 | expect_identical(
30 | .host(MyService()), myHost
31 | )
32 | })
33 |
34 | test_that("Dockstore API reference version is constant", {
35 | api_reference_url <- "https://dockstore.org/openapi.yaml"
36 | # .service_read_version(api_reference_url)
37 | api_reference_headers <- NULL
38 | api_reference_version <- AnVIL:::.DOCKSTORE_API_REFERENCE_VERSION
39 | api_file <- .service_get_api_file(
40 | reference_url = api_reference_url,
41 | reference_headers = api_reference_headers
42 | )
43 |
44 | expect_true(
45 | .service_validate_version(
46 | api_reference_url, api_reference_version,
47 | api_reference_headers, api_file
48 | )
49 | )
50 | })
51 |
52 | test_that("Rawls API reference version is constant", {
53 | api_reference_url <-
54 | "https://rawls.dsde-prod.broadinstitute.org/api-docs.yaml"
55 | # .service_read_version(api_reference_url)
56 | api_reference_headers <- NULL
57 | api_reference_version <- AnVIL:::.RAWLS_API_REFERENCE_VERSION
58 | api_file <- .service_get_api_file(
59 | reference_url = api_reference_url,
60 | reference_headers = api_reference_headers
61 | )
62 |
63 | expect_true(
64 | .service_validate_version(
65 | api_reference_url, api_reference_version,
66 | api_reference_headers, api_file
67 | )
68 | )
69 | })
70 |
71 | test_that("Leonardo API reference version is constant", {
72 | api_reference_url <-
73 | "https://leonardo.dsde-prod.broadinstitute.org/api-docs.yaml"
74 | # .service_read_version(api_reference_url)
75 | api_reference_headers <- NULL
76 | api_reference_version <- AnVIL:::.LEONARDO_API_REFERENCE_VERSION
77 | api_file <- .service_get_api_file(
78 | reference_url = api_reference_url,
79 | reference_headers = api_reference_headers
80 | )
81 |
82 | expect_true(
83 | .service_validate_version(
84 | api_reference_url, api_reference_version,
85 | api_reference_headers, api_file
86 | )
87 | )
88 | })
89 |
90 | test_that("Terra API reference version is constant", {
91 | api_reference_url <- "https://api.firecloud.org/api-docs.yaml"
92 | # .service_read_version(api_reference_url)
93 | api_reference_headers <- NULL
94 | api_reference_version <- AnVIL:::.TERRA_API_REFERENCE_VERSION
95 | api_file <- .service_get_api_file(
96 | reference_url = api_reference_url,
97 | reference_headers = api_reference_headers
98 | )
99 |
100 | expect_true(
101 | .service_validate_version(
102 | api_reference_url, api_reference_version,
103 | api_reference_headers, api_file
104 | )
105 | )
106 | })
107 |
--------------------------------------------------------------------------------
/man/Services.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/Services.R, R/Leonardo.R, R/Terra.R, R/Rawls.R,
3 | % R/Dockstore.R, R/TDR.R
4 | \docType{methods}
5 | \name{Services}
6 | \alias{Services}
7 | \alias{.DollarNames.Service}
8 | \alias{operations,Service-method}
9 | \alias{schemas,Service-method}
10 | \alias{show,Service-method}
11 | \alias{Service-class}
12 | \alias{empty_object}
13 | \alias{operations}
14 | \alias{schemas}
15 | \alias{tags}
16 | \alias{$,Service-method}
17 | \alias{Leonardo}
18 | \alias{Leonardo-class}
19 | \alias{operations,Leonardo-method}
20 | \alias{Terra}
21 | \alias{Terra-class}
22 | \alias{operations,Terra-method}
23 | \alias{schemas,Terra-method}
24 | \alias{Rawls}
25 | \alias{Rawls-class}
26 | \alias{operations,Rawls-method}
27 | \alias{schemas,Rawls-method}
28 | \alias{Dockstore}
29 | \alias{Dockstore-class}
30 | \alias{operations,Dockstore-method}
31 | \alias{TDR}
32 | \alias{TDR-class}
33 | \alias{operations,TDR-method}
34 | \title{RESTful services useful for AnVIL developers}
35 | \format{
36 | An object of class \code{list} of length 0.
37 | }
38 | \usage{
39 | empty_object
40 |
41 | operations(x, ..., .deprecated = FALSE)
42 |
43 | \S4method{operations}{Service}(x, ..., auto_unbox = FALSE, .deprecated = FALSE)
44 |
45 | schemas(x)
46 |
47 | tags(x, .tags, .deprecated = FALSE)
48 |
49 | \S4method{$}{Service}(x, name)
50 |
51 | Leonardo()
52 |
53 | Terra()
54 |
55 | Rawls()
56 |
57 | Dockstore()
58 |
59 | TDR()
60 | }
61 | \arguments{
62 | \item{x}{A \code{Service} instance, usually a singleton provided by the
63 | package and documented on this page, e.g., \code{leonardo} or
64 | \code{terra}.}
65 |
66 | \item{...}{additional arguments passed to methods or, for
67 | \verb{operations,Service-method}, to the internal \code{get_operation()}
68 | function.}
69 |
70 | \item{.deprecated}{optional logical(1) include deprecated operations?}
71 |
72 | \item{auto_unbox}{logical(1) If FALSE (default) do not
73 | automatically 'unbox' R scalar values from JSON arrays to JSON
74 | scalers.}
75 |
76 | \item{.tags}{optional character() of tags to use to filter operations.}
77 |
78 | \item{name}{A symbol representing a defined operation, e.g.,
79 | \code{leonardo$listRuntimes()}.}
80 | }
81 | \value{
82 | \code{empty_object} returns a representation to be used as
83 | arguments in function calls expecting the empty json object
84 | \code{{}}.
85 |
86 | \code{Leonardo()} creates the API of the Leonardo container
87 | deployment service at
88 | \url{https://leonardo.dsde-prod.broadinstitute.org/api-docs.yaml}.
89 |
90 | \code{Terra()} creates the API of the Terra cloud computational
91 | environemnt at \url{https://api.firecloud.org/}.
92 |
93 | \code{Rawls()} creates the API of the Rawls cloud computational
94 | environemnt at \url{https://rawls.dsde-prod.broadinstitute.org}.
95 |
96 | \code{Dockstore()} represents the API of the Dockstore platform to
97 | share Docker-based tools in CWL or WDL or Nextflow at
98 | \url{https://dockstore.org}
99 |
100 | \code{TDR()} creates the API of the Terra Data Repository to work with
101 | snapshot data in the Terra Data Repository at \url{https://data.terra.bio}.
102 | }
103 | \description{
104 | RESTful services useful for AnVIL developers
105 | }
106 | \details{
107 | Note the services \code{Terra()}, \code{Rawls()}, and \code{Leonardo()} require the
108 | \code{GCPtools} package for authentication to the Google Cloud Platform. See
109 | \code{?GCPtools::gcloud_access_token()} for details.
110 |
111 | When using \code{$} to select a service, some arguments appear
112 | in 'body' of the REST request. Specify these using the
113 | \verb{.__body__=} argument, as illustrated for
114 | \code{createBillingProjectFull()}, below.
115 | }
116 | \examples{
117 | empty_object
118 |
119 | \dontshow{if (GCPtools::gcloud_exists()) withAutoprint(\{ # examplesIf}
120 | ## Arguments to be used as the 'body' (`.__body__=`) of a REST query
121 | Terra()$createBillingProjectFull # 6 arguments...
122 | ## ... passed as `.__body__ = list(...)`
123 | args(Terra()$createBillingProjectFull)
124 | \dontshow{\}) # examplesIf}
125 | library(GCPtools)
126 | if (gcloud_exists())
127 | Leonardo()
128 |
129 | library(GCPtools)
130 | if (gcloud_exists()) {
131 | tags(Terra())
132 | tags(Terra(), "Billing")
133 | }
134 |
135 | library(GCPtools)
136 | if (gcloud_exists()) {
137 | tags(Rawls())
138 | tags(Rawls(), "billing")
139 | }
140 |
141 | Dockstore()
142 |
143 | library(GCPtools)
144 | if (gcloud_exists())
145 | TDR()
146 |
147 | }
148 | \keyword{datasets}
149 |
--------------------------------------------------------------------------------
/R/Services.R:
--------------------------------------------------------------------------------
1 | #' @rdname Services
2 | #'
3 | #' @docType methods
4 | #'
5 | #' @name Services
6 | #'
7 | #' @title RESTful services useful for AnVIL developers
8 | #'
9 | #' @details Note the services `Terra()`, `Rawls()`, and `Leonardo()` require the
10 | #' `GCPtools` package for authentication to the Google Cloud Platform. See
11 | #' `?GCPtools::gcloud_access_token()` for details.
12 | #'
13 | #' @aliases .DollarNames.Service operations,Service-method
14 | #' schemas,Service-method show,Service-method Service-class
15 | NULL
16 |
17 | #' @rdname Services
18 | #'
19 | #' @return `empty_object` returns a representation to be used as
20 | #' arguments in function calls expecting the empty json object
21 | #' `{}`.
22 | #'
23 | #' @examples
24 | #' empty_object
25 | #'
26 | #' @importFrom stats setNames
27 | #'
28 | #' @export
29 | empty_object <- setNames(list(), character())
30 |
31 | #' @rdname Services
32 | #'
33 | #' @param ... additional arguments passed to methods or, for
34 | #' `operations,Service-method`, to the internal `get_operation()`
35 | #' function.
36 | #'
37 | #' @param .deprecated optional logical(1) include deprecated operations?
38 | #'
39 | #' @export
40 | setGeneric(
41 | "operations",
42 | function(x, ..., .deprecated = FALSE)
43 | standardGeneric("operations"),
44 | signature = "x"
45 | )
46 |
47 | #' @rdname Services
48 | #'
49 | #' @param auto_unbox logical(1) If FALSE (default) do not
50 | #' automatically 'unbox' R scalar values from JSON arrays to JSON
51 | #' scalers.
52 | #'
53 | #' @importFrom rapiclient get_operations
54 | #'
55 | #' @export
56 | setMethod(
57 | "operations", "Service",
58 | function(x, ..., auto_unbox = FALSE, .deprecated = FALSE)
59 | {
60 | stopifnot(
61 | isScalarLogical(auto_unbox)
62 | )
63 | operations <- get_operations(.api(x), ...)
64 | deprecated <- .operation_field(operations, "deprecated")
65 | keep <- .deprecated | !vapply(deprecated, isTRUE, logical(1))
66 | operations[keep]
67 | })
68 |
69 | #' @rdname Services
70 | #'
71 | #' @export
72 | setGeneric("schemas", function(x) standardGeneric("schemas"))
73 |
74 | #' @export
75 | #' @importFrom rapiclient get_schemas
76 | setMethod(
77 | "schemas", "Service",
78 | function(x)
79 | {
80 | get_schemas(.api(x))
81 | })
82 |
83 | .operation_field <-
84 | function(operations, field)
85 | {
86 | lapply(operations, function(operation) {
87 | definition <- attr(operation, "definition")
88 | definition[[field]]
89 | })
90 | }
91 |
92 | #' @rdname Services
93 | #'
94 | #' @param x A `Service` instance, usually a singleton provided by the
95 | #' package and documented on this page, e.g., `leonardo` or
96 | #' `terra`.
97 | #'
98 | #' @param .tags optional character() of tags to use to filter operations.
99 | #'
100 | #' @importFrom tibble tibble
101 | #' @importFrom dplyr filter arrange
102 | #'
103 | #' @export
104 | tags <-
105 | function(x, .tags, .deprecated = FALSE)
106 | {
107 | operations <- operations(x, .deprecated = .deprecated)
108 |
109 | tags <- .operation_field(operations, "tags")
110 | null_idx <- vapply(tags, is.null, logical(1))
111 | tags[null_idx] <- NA_character_
112 | names(tags) <- trimws(names(tags))
113 |
114 | summary <- .operation_field(operations, "summary")
115 | null_idx <- vapply(summary, is.null, logical(1))
116 | summary[null_idx] <- list(NA_character_)
117 | summary <- trimws(unlist(summary, use.names=FALSE))
118 | summary <- sub("\\\\", "", summary)
119 |
120 | tbl <- tibble(
121 | tag = unlist(tags, use.names=FALSE),
122 | operation = rep(names(tags), lengths(tags)),
123 | summary = rep(summary, lengths(tags))
124 | )
125 | if (!missing(.tags))
126 | tbl <- filter(tbl, tbl$tag %in% .tags)
127 | arrange(tbl, tbl$tag, tbl$operation)
128 | }
129 |
130 | #' @rdname Services
131 | #'
132 | #' @param name A symbol representing a defined operation, e.g.,
133 | #' `leonardo$listRuntimes()`.
134 | #'
135 | #' @details When using `$` to select a service, some arguments appear
136 | #' in 'body' of the REST request. Specify these using the
137 | #' `.__body__=` argument, as illustrated for
138 | #' `createBillingProjectFull()`, below.
139 | #'
140 | #' @examplesIf GCPtools::gcloud_exists()
141 | #' ## Arguments to be used as the 'body' (`.__body__=`) of a REST query
142 | #' Terra()$createBillingProjectFull # 6 arguments...
143 | #' ## ... passed as `.__body__ = list(...)`
144 | #' args(Terra()$createBillingProjectFull)
145 | #' @export
146 | setMethod(
147 | "$", "Service",
148 | function(x, name)
149 | {
150 | operation <- operations(x, .deprecated = TRUE)[name]
151 | if (isTRUE(.operation_field(operation, "deprecated")[[name]]))
152 | warning("'", name, "()' is deprecated")
153 | operation[[name]]
154 | })
155 |
--------------------------------------------------------------------------------
/vignettes/BiocDockstore.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "_Dockstore_ and _Bioconductor_ for AnVIL"
3 | author:
4 | - name: BJ Stubbs
5 | - name: S Gopaulakrishnan
6 | - name: Nitesh Truaga
7 | - name: Martin Morgan
8 | - name: Vincent Carey
9 | email: stvjc@channing.harvard.edu
10 | package: AnVIL
11 | output:
12 | BiocStyle::html_document
13 | vignette: >
14 | %\VignetteEngine{knitr::rmarkdown}
15 | %\VignetteIndexEntry{Dockstore and Bioconductor for AnVIL}
16 | %\VignetteEncoding{UTF-8}
17 | ---
18 |
19 | ```{r setup, include = FALSE}
20 | has_gcloud <- AnVILBase::has_avworkspace(
21 | platform = AnVILGCP::gcp()
22 | )
23 | knitr::opts_chunk$set(
24 | eval = has_gcloud, collapse = TRUE, cache = TRUE
25 | )
26 | ```
27 |
28 | # Introduction: Basic concepts of _Dockstore_ and _Bioconductor_
29 |
30 | _Dockstore_ is the "VM/Docker sharing infrastructure and management
31 | component" of the Global Alliance for Genomics and Health (GA4GH).
32 | Dockstore.org implements the infrastructure by defining APIs for
33 | coupling Docker images with formalized workflow specifications. The
34 | application of this concept to the PanCancer Analysis of Whole Genomes
35 | (PCAWG) is described in a [2017 paper][PCAWG] by O'Connor and
36 | colleagues.
37 |
38 | [PCAWG]: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5333608/
39 |
40 | [Bioconductor][] is a software ecosystem based in the R language for
41 | the analysis and comprehension of genome-scale experiments. [An
42 | overview][] was published in 2015.
43 |
44 | [Bioconductor]: https://bioconductor.org
45 | [PMC4509590]: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4509590/
46 |
47 | _Bioconductor_ implemented a ["common workflow"][workflow] concept a
48 | number of years ago. (The term "common" is used on the _Bioconductor_
49 | landing page, to indicate that many bioinformaticians would be
50 | expected to engage with tasks reviewed in these workflows. The
51 | "common workflow" phrase is not intended to indicate a relationship to
52 | CWL, the "common workflow language" standard.)
53 |
54 | [workflow]: https://www.bioconductor.org/packages/release/BiocViews.html#___Workflow
55 |
56 | In _Bioconductor_ parlance, a "common workflow" is an R package
57 | accompanied by a markdown or Sweave vignette that narrates and
58 | executes the workflow tasks using R. Any sufficiently endowed
59 | deployment of R on a sufficient computing platform will run the
60 | workflow tasks to completion; this assertion is checked on a regular
61 | basis through _Bioconductor_'s continuous integration system. When the
62 | assertion fails, the workflow maintainer is alerted and corrections
63 | are made.
64 |
65 | Advantages to marrying the _Bioconductor_ workflow concept with
66 | _Dockstore_ VM/Docker/workflow infrastructure include
67 |
68 | - Reduction in user burden of configuring and maintaining the
69 | execution platform
70 | - Utilization of parameterized formal workflow specification in CWL,
71 | WDL, or Nextflow
72 | - General advantages to users of fostering participation in GA4GH best
73 | practices related to reproducibility and transparency
74 |
75 | Open questions concern the balance between specification of workflow
76 | steps in R and in the formal workflow language. _Bioconductor_
77 | workflows can be written to take advantage of R's capabilities to
78 | drive computations on potentially heterogeneous clusters with
79 | programmable fault tolerance and job control. The particular
80 | advantages of CWL/WDL/Nextflow and other aspects of the _Dockstore_
81 | ecosystem need to be experienced, measured, and documented to help
82 | developers establish the appropriate balance between programming R and
83 | programming an exogenous workflow environment.
84 |
85 | # Working with the _Dockstore_ API in _Bioconductor_
86 |
87 | The [AnVIL][] package handles basic aspects of authentication and API
88 | element cataloguing for the AnVIL project.
89 |
90 | [AnVIL]: https://github.com/Bioconductor/AnVIL
91 |
92 | ```{r lka, message = FALSE}
93 | library(AnVIL)
94 | ```
95 |
96 | Create an object 'dockstore' representing the service and to be
97 | used to process API requests.
98 |
99 | ```{r dockstore}
100 | dockstore <- Dockstore()
101 | ```
102 |
103 | Groups of API components are obtained via `tags()`.
104 |
105 | ```{r getmeths}
106 | knitr::kable(tags(dockstore) |> count(tag))
107 | ```
108 |
109 | We're interested in the 'users' component. Higher level methods will
110 | be introduced to help here, but for now we stick to base R methods.
111 |
112 | ```{r lku}
113 | tags(dockstore, "users") |> print(n = Inf)
114 | ```
115 |
116 | We can use the following to determine our user identifier.
117 |
118 | ```{r lklk, eval = FALSE}
119 | myuid <- dockstore$getUser() |>
120 | as.list() |>
121 | pull("id")
122 | ```
123 |
124 | # Appendix
125 |
126 | ## Acknowledgments {.unnumbered}
127 |
128 | Research reported in this software package was supported by the US
129 | National Human Genomics Research Institute of the National Institutes
130 | of Health under award number [U24HG010263][]. The content is solely
131 | the responsibility of the authors and does not necessarily represent
132 | the official views of the National Institutes of Health.
133 |
134 | [U24HG010263]: https://projectreporter.nih.gov/project_info_description.cfm?aid=9789931&icde=49694078
135 |
136 | ## Session info {.unnumbered}
137 |
138 | ```{r sessionInfo, echo=FALSE}
139 | sessionInfo()
140 | ```
141 |
--------------------------------------------------------------------------------
/R/Service.R:
--------------------------------------------------------------------------------
1 | #' @import methods
2 |
3 | setOldClass("rapi_api")
4 |
5 | setOldClass("request")
6 |
7 | #' @importFrom rapiclient get_api
8 | #'
9 | #' @export
10 | .Service <- setClass(
11 | "Service",
12 | slots = c(
13 | service = "character",
14 | config = "request",
15 | api = "rapi_api",
16 | host = "character"
17 | )
18 | )
19 |
20 | .service <- function(x) x@service
21 | .host <- function(x) x@host
22 | .config <- function(x) x@config
23 |
24 | #' @importFrom httr write_disk GET add_headers
25 | #' @importFrom AnVILBase avstop_for_status
26 | .service_get_api_file <- function(reference_url, reference_headers) {
27 | fl <- tempfile()
28 | response <- GET(
29 | reference_url,
30 | add_headers(.headers = reference_headers),
31 | write_disk(fl)
32 | )
33 | avstop_for_status(response, ".service_get_api_file")
34 | fl
35 | }
36 |
37 | .service_validate_md5sum_warn <- new.env(parent = emptyenv())
38 |
39 | #' @importFrom tools md5sum
40 | #' @importFrom utils download.file
41 | .service_validate_md5sum <-
42 | function(reference_url, reference_md5sum, reference_headers, api_file)
43 | {
44 | flog.debug("Service reference url: %s", reference_url)
45 | flog.debug("Service reference md5sum: %s", reference_md5sum)
46 |
47 | if (length(reference_md5sum) == 0L)
48 | return()
49 |
50 | md5sum <- md5sum(api_file)
51 | test <-
52 | identical(unname(md5sum), reference_md5sum) ||
53 | exists(reference_url, envir = .service_validate_md5sum_warn)
54 | .service_validate_md5sum_warn[[reference_url]] <- TRUE
55 | if (!test)
56 | warning(
57 | "service version differs from validated version",
58 | "\n service url: ", reference_url,
59 | "\n observed md5sum: ", md5sum,
60 | "\n expected md5sum: ", reference_md5sum
61 | )
62 | test
63 | }
64 |
65 | .service_read_version <- function(file) {
66 | yaml_file <- yaml::read_yaml(file)
67 | yaml_file[["info"]][["version"]]
68 | }
69 |
70 | .service_validate_version <-
71 | function(reference_url, reference_version, reference_headers, api_file)
72 | {
73 | flog.debug("Service reference url: %s", reference_url)
74 | flog.debug("Service reference version: %s", reference_version)
75 |
76 | if (!length(reference_version))
77 | return()
78 |
79 | version <- .service_read_version(api_file)
80 |
81 | if (!length(version))
82 | return()
83 |
84 | test <- identical(version, reference_version)
85 | if (!test)
86 | warning(
87 | "service version differs from validated version",
88 | "\n service url: ", reference_url,
89 | "\n observed version: ", version,
90 | "\n expected version: ", reference_version
91 | )
92 | test
93 | }
94 |
95 | #' @rdname Service
96 | #'
97 | #' @name Service
98 | #'
99 | #' @title RESTful service constructor
100 | #'
101 | #' @param service character(1) The `Service` class name, e.g., `"terra"`.
102 | #'
103 | #' @param host character(1) host name that provides the API resource,
104 | #' e.g., `"leonardo.dsde-prod.broadinstitute.org"`.
105 | #'
106 | #' @param config httr::config() curl options
107 | #'
108 | #' @param authenticate logical(1) use credentials from authentication
109 | #' service file 'auth.json' in the specified package?
110 | #'
111 | #' @param api_url optional character(1) url location of OpenAPI
112 | #' `.json` or `.yaml` service definition.
113 | #'
114 | #' @param package character(1) (default `AnVIL`) The package where
115 | #' 'api.json' yaml and (optionally) 'auth.json' files are located.
116 | #'
117 | #' @param schemes character(1) (default 'https') Specifies the
118 | #' transfer protocol supported by the API service.
119 | #'
120 | #' @param api_reference_url character(1) path to reference API. See
121 | #' Details.
122 | #'
123 | #' @param api_reference_md5sum character(1) the result of
124 | #' `tools::md5sum()` applied to the reference API.
125 | #'
126 | #' @param api_reference_version character(1) the version of the
127 | #' reference API. This is used to check that the version of the
128 | #' service matches the version of the reference API. It is usally
129 | #' set by the service generation function,. e.g., `AnVIL::Rawls()`.
130 | #'
131 | #' @param api_reference_headers character() header(s) to be used
132 | #' (e.g., `c(Authorization = paste("Bearer", token))`) when
133 | #' retrieving the API reference for validation.
134 | #'
135 | #' @param ... additional arguments passed to `rapiclient::get_api()`
136 | #'
137 | #' @details This function creates a RESTful interface to a service
138 | #' provided by a host, e.g., "leonardo.dsde-prod.broadinstitute.org".
139 | #' The function requires an OpenAPI `.json` or `.yaml` specifcation
140 | #' as well as an (optional) `.json` authentication token. These files
141 | #' are located in the source directory of a pacakge, at
142 | #' `/inst/service//api.json` and
143 | #' `/inst/service//auth.json`, or at `api_url`.
144 | #'
145 | #' When provided, the `api_reference_md5sum` is used to check that
146 | #' the file described at `api_reference_url` has the same checksum
147 | #' as an author-validated version.
148 | #'
149 | #' The service is usually a singleton, created at the package
150 | #' level during `.onLoad()`.
151 | #'
152 | #' @return An object of class \code{Service}.
153 | #'
154 | #' @importFrom BiocBaseUtils isScalarCharacter isScalarLogical isCharacter
155 | #'
156 | #' @examples
157 | #' .MyService <- setClass("MyService", contains = "Service")
158 | #'
159 | #' MyService <- function() {
160 | #' .MyService(Service("my_service", host="my.api.org"))
161 | #' }
162 | #'
163 | #' @export
164 | Service <-
165 | function(
166 | service, host, config = httr::config(), authenticate = TRUE,
167 | api_url = character(), package = "AnVIL", schemes = "https",
168 | api_reference_url = api_url,
169 | api_reference_md5sum = character(),
170 | api_reference_version = character(),
171 | api_reference_headers = NULL,
172 | ...
173 | ) {
174 | stopifnot(
175 | isScalarCharacter(service),
176 | isScalarCharacter(host),
177 | isScalarLogical(authenticate),
178 | length(api_url) == 0L || isScalarCharacter(api_url),
179 | length(api_reference_url) == 0L ||
180 | isScalarCharacter(api_reference_url),
181 | length(api_reference_md5sum) == 0L ||
182 | isScalarCharacter(api_reference_md5sum),
183 | length(api_reference_version) == 0L ||
184 | isScalarCharacter(api_reference_version),
185 | is.null(api_reference_headers) || isCharacter(api_reference_headers)
186 | )
187 | flog.debug("Service(): %s", service)
188 |
189 | api_file <- .service_get_api_file(api_reference_url, api_reference_headers)
190 |
191 | .service_validate_md5sum(
192 | api_reference_url, api_reference_md5sum,
193 | api_reference_headers, api_file
194 | )
195 |
196 | .service_validate_version(
197 | api_reference_url, api_reference_version,
198 | api_reference_headers, api_file
199 | )
200 |
201 | if (authenticate)
202 | config <- c(authenticate_config(service), config)
203 |
204 | withCallingHandlers({
205 | if (length(api_url)) {
206 | path <- api_url
207 | } else {
208 | path <- .api_path(service, package)
209 | }
210 | api <- get_api(path, config, ...)
211 | }, warning = function(w) {
212 | test <- identical(
213 | conditionMessage(w),
214 | "Missing Swagger Specification version"
215 | )
216 | if (!test)
217 | warning(w)
218 | invokeRestart("muffleWarning")
219 | })
220 | api$schemes <- schemes
221 | api$host <- host
222 | api$paths <- .api_paths_fix(api$paths)
223 | .Service(service = service, config = config, api = api, host = host)
224 | }
225 |
226 | #' @importFrom utils .DollarNames
227 | #'
228 | #' @export
229 | .DollarNames.Service <-
230 | function(x, pattern)
231 | {
232 | grep(pattern, names(operations(x)), value = TRUE)
233 | }
234 |
235 | #' @export
236 | setMethod(
237 | "show", "Service",
238 | function(object)
239 | {
240 | cat(
241 | "service: ", .service(object), "\n",
242 | "host: ", .host(object), "\n",
243 | "tags(); use ", tolower(class(object)), "$:\n",
244 | sep = ""
245 | )
246 | tbl <- tags(object)
247 | print(tbl)
248 | cat(
249 | "tag values:\n",
250 | .pretty(unique(tbl$tag), 2, 2), "\n",
251 | "schemas():\n",
252 | .pretty(names(schemas(object)), 2, 2, some = TRUE), "\n",
253 | sep = ""
254 | )
255 | })
256 |
--------------------------------------------------------------------------------
/R/gadgets.R:
--------------------------------------------------------------------------------
1 | #' @importFrom DT renderDT datatable formatStyle
2 | .gadget_renderDT <-
3 | function(tbl)
4 | {
5 | force(tbl) # necessary/
6 | renderDT(
7 | datatable(
8 | tbl,
9 | fillContainer = TRUE,
10 | selection = list(mode = "single", target = "row"),
11 | options = list(dom = "ftp")
12 | ) |>
13 | formatStyle(seq_len(NROW(tbl) + 1L) - 1L, 'vertical-align'='top')
14 | )
15 | }
16 |
17 | #' @importFrom htmltools p strong
18 | #'
19 | #' @importFrom miniUI miniPage gadgetTitleBar miniContentPanel
20 | #'
21 | #' @importFrom DT DTOutput renderDT
22 | #'
23 | #' @importFrom shiny textOutput
24 | .gadget_ui <-
25 | function(title)
26 | {
27 | force(title)
28 | function() {
29 | miniPage(
30 | p(
31 | strong("Current workspace:"),
32 | textOutput("workspace", inline = TRUE)
33 | ),
34 | miniContentPanel(
35 | DTOutput("gadget_tibble", height = "100%")
36 | ),
37 | gadgetTitleBar(title)
38 | )
39 | }
40 | }
41 |
42 | #' @importFrom shiny observeEvent stopApp renderText
43 | #' @importFrom AnVILBase avworkspace avworkspace_name
44 | .gadget_server <-
45 | function(tibble, DONE_FUN)
46 | {
47 | force(tibble) # using force() improves display rendering
48 | force(DONE_FUN)
49 | function(input, output, session) {
50 | output$workspace <-renderText({
51 | if (nzchar(avworkspace_name(warn = FALSE))) {
52 | avworkspace()
53 | }
54 | })
55 |
56 | output$gadget_tibble <- .gadget_renderDT(tibble)
57 |
58 | observeEvent(input$done, {
59 | row_selected <- input$gadget_tibble_rows_selected
60 | if (is.integer(row_selected)) {
61 | returnValue <- DONE_FUN(tibble, row_selected)
62 | } else {
63 | returnValue <- character()
64 | }
65 | stopApp(returnValue)
66 | })
67 |
68 | observeEvent(input$cancel, {
69 | stopApp(NULL)
70 | })
71 | }
72 | }
73 |
74 | #' @rdname gadgets_developer
75 | #'
76 | #' @title Functions to implement AnVIL gadget interfaces
77 | #'
78 | #' @description Functions documented on this page are primarily
79 | #' intended for package developers wishing to implement gadgets
80 | #' (graphical interfaces) to navigating AnVIL-generated tables.
81 | #'
82 | #' @description `.gadget_run()` presents the user with a
83 | #' tibble-navigating gadget, returning the value of `DONE_FUN` if
84 | #' a row of the tibble is selected, or NULL.
85 | #'
86 | #' @param title character(1) (required) title to appear at the base of
87 | #' the gadget, e.g., "AnVIL Workspaces".
88 | #'
89 | #' @param tibble a `tibble` or `data.frame` to be displayed in the
90 | #' gadget.
91 | #'
92 | #' @param DONE_FUN a function of two arguments, `tibble` and
93 | #' `row_selected`. The tibble is the `tibble` provided as an
94 | #' argument to `.gadget_run()`. `row_selected` is the row
95 | #' selected in the gadget by the user. The function is only
96 | #' invoked when the user selects a valid row.
97 | #'
98 | #' @return `.gadget_run()` returns the result of `DONE_FUN()` if a row
99 | #' has been selected by the user, or `NULL` if no row is selected
100 | #' (the user presses `Cancel`, or `Done` prior to selecting any
101 | #' row).
102 | #'
103 | #' @importFrom shiny runGadget
104 | #'
105 | #' @examplesIf has_avworkspace(TRUE, platform = AnVILGCP::gcp())
106 | #' tibble <- avworkspaces(platform = AnVILGCP::gcp())
107 | #' DONE_FUN <- function(tibble, row_selected) {
108 | #' selected <- slice(tibble, row_selected)
109 | #' with(selected, paste0(namespace, "/", name))
110 | #' }
111 | #' .gadget_run("AnVIL Example", tibble, DONE_FUN)
112 | #' @export
113 | .gadget_run <-
114 | function(title, tibble, DONE_FUN)
115 | {
116 | stopifnot(
117 | isScalarCharacter(title),
118 | is.data.frame(tibble)
119 | )
120 | suppressMessages({
121 | runGadget(
122 | .gadget_ui(title),
123 | .gadget_server(tibble, DONE_FUN),
124 | stopOnCancel = FALSE
125 | )
126 | })
127 | }
128 |
129 | #' @rdname gadgets
130 | #'
131 | #' @title Graphical user interfaces for common AnVIL operations
132 | #'
133 | #' @description `workspace()` allows choice of workspace for
134 | #' subsequent use. It is the equivalent of displaying workspaces
135 | #' with `avworkspaces()`, and setting the selected workspace with
136 | #' `avworkspace()`.
137 | #'
138 | #' @return `workspace()` returns the selected workspace as a
139 | #' character(1) using the format namespace/name, or character(0)
140 | #' if no workspace is selected.
141 | #'
142 | #' @examplesIf has_avworkspace(TRUE, platform = AnVILGCP::gcp())
143 | #' workspace()
144 | #' browse_workspace(use_avworkspace = FALSE)
145 | #' tbl <- table()
146 | #' wkflw <- avworkflow_gadget()
147 | #' @export
148 | avworkspace_gadget <-
149 | function()
150 | {
151 | .workspace_impl()
152 | }
153 |
154 | #' @importFrom AnVILBase avworkspaces
155 | .workspaces <- local({
156 | ## a little more responsive -- only retrieve workspaces once per session
157 | workspaces <- NULL
158 | function() {
159 | if (is.null(workspaces))
160 | workspaces <<- avworkspaces()
161 | workspaces
162 | }
163 | })
164 |
165 | .workspace_impl <-
166 | function(use_avworkspace = TRUE)
167 | {
168 | DONE_FUN <- function(tibble, row_selected)
169 | paste0(tibble$namespace[row_selected], "/", tibble$name[row_selected])
170 |
171 | workspace <- .gadget_run("AnVIL Workspaces", .workspaces(), DONE_FUN)
172 |
173 | if (length(workspace)) {
174 | avworkspace(workspace) # set workflow to selected value
175 | message("workspace set to '", avworkspace(), "'")
176 | }
177 |
178 | invisible(workspace)
179 | }
180 |
181 | .workspace_get <-
182 | function(use_avworkspace = TRUE)
183 | {
184 | if (use_avworkspace && nzchar(avworkspace_name(warn = FALSE))) {
185 | workspace <- avworkspace()
186 | } else {
187 | ## no workspace currently selected
188 | workspace <- .workspace_impl(use_avworkspace = TRUE)
189 | }
190 | if (!length(workspace))
191 | stop("select a workspace to visit", call. = FALSE)
192 |
193 | workspace
194 | }
195 |
196 | #' @rdname gadgets
197 | #'
198 | #' @description `browse_workspace()` uses `browseURL()` to open a
199 | #' browser window pointing to the Terra workspace.
200 | #'
201 | #' @param use_avworkspace logical(1) when `TRUE` (default), use the
202 | #' selected workspace (via `workspace()` or `avworkspace()` if
203 | #' available. If `FALSE` or no workspace is currently selected,
204 | #' use `workspace()` to allow the user to select the workspace.
205 | #'
206 | #' @return `browse_workspace()` returns the status of a `system()`
207 | #' call to launch the browser, invisibly.
208 | #'
209 | #' @importFrom utils browseURL
210 | #'
211 | #' @export
212 | browse_workspace <-
213 | function(use_avworkspace = TRUE)
214 | {
215 | stopifnot(isScalarLogical(use_avworkspace))
216 |
217 | workspace <- .workspace_get(use_avworkspace)
218 | url <- paste0("https://app.terra.bio/#workspaces/", workspace)
219 | browseURL(url)
220 | }
221 |
222 | #' @rdname gadgets
223 | #'
224 | #' @description `table()` allows choice of table in the current
225 | #' workspace (selected by `avworkspace()` or `workspace()`) to be
226 | #' returned as a tibble. It is equivalent to invoking `avtables()`
227 | #' to show available tables, and `avtable()` to retrieve the
228 | #' selected table.
229 | #'
230 | #' @return `table()` returns a `tibble` representing the selected
231 | #' AnVIL table.
232 | #'
233 | #' @importFrom AnVILBase avtable avtables
234 | #'
235 | #' @export
236 | avtable_gadget <-
237 | function()
238 | {
239 | DONE_FUN <- function(tibble, row_selected)
240 | tibble$table[row_selected]
241 |
242 | workspace <- .workspace_get() # maybe prompt for workspace
243 |
244 | table <- .gadget_run("AnVIL Tables", avtables(), DONE_FUN)
245 |
246 | if (length(table)) {
247 | avtable(table)
248 | } else {
249 | invisible()
250 | }
251 | }
252 |
253 | #' @rdname gadgets
254 | #'
255 | #' @description `workflow()` allows choice of workflow for
256 | #' retrieval. It is the equivalent of `avworkflows()` for listing
257 | #' available workflows, and `avworkflow_configuration_get()` for
258 | #' retrieving the workflow.
259 | #'
260 | #' @return `workflow()` returns an `avworkflow_configuration` object
261 | #' representing the inputs and outputs of the selected
262 | #' workflow. This can be edited and updated as described in the
263 | #' "Running an AnVIL workflow within R" vigenette.
264 | #'
265 | #' @importFrom BiocBaseUtils checkInstalled
266 | #'
267 | #' @export
268 | avworkflow_gadget <-
269 | function()
270 | {
271 | checkInstalled("AnVILGCP")
272 | DONE_FUN <- function(tibble, row_selected)
273 | paste0(tibble$namespace[row_selected], "/", tibble$name[row_selected])
274 |
275 | workspace <- .workspace_get()
276 |
277 | workflow <-
278 | .gadget_run("AnVIL Workflows", AnVILGCP::avworkflows(), DONE_FUN)
279 |
280 | if (length(workflow)) {
281 | AnVILGCP::avworkflow(workflow) # set workflow to selected value
282 | message("workflow set to '", AnVILGCP::avworkflow(), "'")
283 | AnVILGCP::avworkflow_configuration_get()
284 | }
285 | }
286 |
--------------------------------------------------------------------------------
/vignettes/RunningWorkflow.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Running an AnVIL workflow within R"
3 | author:
4 | - name: Kayla Interdonato
5 | affiliation: Roswell Park Comprehensive Cancer Center
6 | - name: Yubo Cheng
7 | affiliation: Roswell Park Comprehensive Cancer Center
8 | - name: Martin Morgan
9 | affiliation: Roswell Park Comprehensive Cancer Center
10 | email: Martin.Morgan@RoswellPark.org
11 | package: AnVIL
12 | output:
13 | BiocStyle::html_document
14 | abstract: |
15 | This vignette demonstrates how a user can edit, run, and stop a
16 | Terra / AnVIL workflow from within their R session. The configuration of the
17 | workflow can be retrieved and edited. Then this new configuration can be
18 | sent back to the Terra / AnVIL workspace for future use. With the new
19 | configuration defined by the user will then be able to run the workflow as
20 | well as stop any jobs from running.
21 | vignette: |
22 | %\VignetteIndexEntry{Running an AnVIL workflow within R}
23 | %\VignetteEngine{knitr::rmarkdown}
24 | %\VignetteEncoding{UTF-8}
25 | ---
26 |
27 | ```{r setup, include = FALSE}
28 | has_gcloud <- AnVILBase::has_avworkspace(
29 | strict = TRUE, platform = AnVILGCP::gcp()
30 | )
31 | knitr::opts_chunk$set(
32 | eval = has_gcloud, collapse = TRUE, cache = TRUE
33 | )
34 | options(width=75)
35 | ```
36 |
37 | # Installation
38 |
39 | Install the _AnVIL_ package with
40 |
41 | ```{r install-anvil, eval = FALSE}
42 | if (!requireNamespace("BiocManager", quietly = TRUE))
43 | install.packages("BiocManager", repos = "https://cran.r-project.org")
44 | BiocManager::install("AnVIL")
45 | ```
46 |
47 | Once installed, load the package with
48 |
49 | ```{r load-packages, message = FALSE, eval = TRUE, cache = FALSE}
50 | library(AnVILGCP)
51 | library(AnVIL)
52 | ```
53 |
54 | # Workflow setup: DESeq2
55 |
56 | ## Setting up the workspace and choosing a workflow
57 |
58 | The first step will be to define the namespace (billing project) and
59 | name of the workspace to be used with the functions. In our case we
60 | will be using the Bioconductor AnVIL namespace and a DESeq2 workflow
61 | as the intended workspace.
62 |
63 | ```{r workspace, eval = has_gcloud}
64 | avworkspace("bioconductor-rpci-anvil/Bioconductor-Workflow-DESeq2")
65 | ```
66 |
67 | Each workspace can have 0 or more workflows. The workflows have a
68 | `name` and `namespace`, just as workspaces. Discover the workflows
69 | available in a workspace
70 |
71 | ```{r workflows, eval = has_gcloud}
72 | avworkflows()
73 | ```
74 |
75 | From the table returned by `avworkflows()`, record the namespace and
76 | name of the workflow of interest using `avworkflow()`.
77 |
78 | ```{r workflow, eval = has_gcloud}
79 | avworkflow("bioconductor-rpci-anvil/AnVILBulkRNASeq")
80 | ```
81 |
82 | ## Retrieving the configuration
83 |
84 | Each workflow defines inputs, outputs and certain code
85 | execution. These workflow 'configurations' that can be retrieved with
86 | `avworkflow_configuration_get`.
87 |
88 | ```{r configuration, eval = has_gcloud}
89 | config <- avworkflow_configuration_get()
90 | config
91 | ```
92 |
93 | This function is using the workspace namespace, workspace name,
94 | workflow namespace, and workflow name we recorded above with
95 | `avworkspace()` and `avworkflow()`.
96 |
97 | # Updating workflows
98 |
99 | ## Changing the inputs / outputs
100 |
101 | There is a lot of information contained in the configuration but the
102 | only variables of interest to the user would be the inputs and
103 | outputs. In our case the inputs and outputs are pre-defined so we
104 | don't have to do anything to them. But for some workflows these
105 | inputs / outputs may be blank and therefore would need to be defined
106 | by the user. We will change one of our inputs values to show how this
107 | would be done.
108 |
109 | There are two functions to help users easily see the content of the
110 | inputs and outputs, they are `avworkflow_configuration_inputs` and
111 | `avworkflow_configuration_outputs`. These functions display the
112 | information in a `tibble` structure which users are most likely
113 | familiar with.
114 |
115 | ```{r inputs-outputs, eval = has_gcloud}
116 | inputs <- avworkflow_configuration_inputs(config)
117 | inputs
118 |
119 | outputs <- avworkflow_configuration_outputs(config)
120 | outputs
121 | ```
122 |
123 | Let's change the `salmon.transcriptome_index_name` field; this is an
124 | arbitrary string identifier in our workflow.
125 |
126 | ```{r change-input, eval = has_gcloud}
127 | inputs <-
128 | inputs |>
129 | mutate(
130 | attribute = ifelse(
131 | name == "salmon.transcriptome_index_name",
132 | '"new_index_name"',
133 | attribute
134 | )
135 | )
136 | inputs
137 | ```
138 |
139 | ## Update configuration locally
140 |
141 | Since the inputs have been modified we need to put this information into
142 | the configuration of the workflow. We can do this with
143 | `avworkflow_configuration_update()`. By default this function will take the
144 | inputs and outputs of the original configuration, just in case there were no
145 | changes to one of them (like in our example our outputs weren't changed).
146 |
147 | ```{r update-config, eval = has_gcloud}
148 | new_config <- avworkflow_configuration_update(config, inputs)
149 | new_config
150 | ```
151 |
152 | ## Set a workflow configuration for reuse in AnVIL
153 |
154 | Use `avworkflow_configuration_set()` to permanently update the
155 | workflow to new parameter values.
156 |
157 | ```{r set-config, eval = has_gcloud}
158 | avworkflow_configuration_set(new_config)
159 | ```
160 |
161 | Actually, the previous command validates `new_config` only; to update
162 | the configuration in AnVIL (i.e., replacing the values in the
163 | workspace workflow graphical user interface), add the argument `dry = FALSE`.
164 |
165 | ```{r set-config-not-dry}
166 | ## avworkflow_configuration_set(new_config, dry = FALSE)
167 | ```
168 |
169 | # Running and stopping workflows
170 |
171 | ## Running a workflow
172 |
173 | To finally run the new workflow we need to know the name of the data set to be
174 | used in the workflow. This can be discovered by looking at the table of
175 | interest and using the name of the data set.
176 |
177 | ```{r entityName, eval = has_gcloud}
178 | entityName <- avtable("participant_set") |>
179 | pull(participant_set_id) |>
180 | head(1)
181 | avworkflow_run(new_config, entityName)
182 | ```
183 |
184 | Again, actually running the new configuration requires the argument
185 | `dry = FALSE`.
186 |
187 | ```{r run-not-dry}
188 | ## avworkflow_run(new_config, entityName, dry = FALSE)
189 | ```
190 |
191 | `config` is used to set the `rootEntityType` and workflow method name
192 | and namespace; other components of `config` are ignored (the other
193 | components will be read by Terra / AnVIL from values updated with
194 | `avworkflow_configuration_set()`).
195 |
196 | ## Monitoring workflows
197 |
198 | We can see that the workflow is running by using the `avworkflow_jobs`
199 | function. The elements of the table are ordered chronologically, with
200 | the most recent submission (most likely the job we just started!)
201 | listed first.
202 |
203 | ```{r checking-workflow, eval = has_gcloud}
204 | avworkflow_jobs()
205 | ```
206 |
207 | ## Stopping workflows
208 |
209 | Use `avworkflow_stop()` to stop a currently running workflow. This
210 | will change the status of the job, reported by `avworkflow_jobs()`,
211 | from 'Submitted' to 'Aborted'.
212 |
213 | ```{r stop-workflow, eval = has_gcloud}
214 | avworkflow_stop() # dry = FALSE to stop
215 |
216 | avworkflow_jobs()
217 | ```
218 |
219 | # Managing workflow output
220 |
221 | ## Workflow files
222 |
223 | Workflows can generate a large number of intermediate files (including
224 | diagnostic logs), as well as final outputs for more interactive
225 | analysis. Use the `submissionId` from `avworkflow_jobs()` to discover
226 | files produced by a submission; the default behavior lists files
227 | produced by the most recent job.
228 |
229 | ```{r files, eval = has_gcloud}
230 | submissionId <- "fb8e35b7-df5d-49e6-affa-9893aaeebf37"
231 | avworkflow_files(submissionId)
232 | ```
233 |
234 | Workflow files are stored in the workspace bucket. The files can be
235 | localized to the persistent disk of the current runtime using
236 | `avworkflow_localize()`; the default is again to localize files from
237 | the most recently submitted job; use `type=` to influence which files
238 | ('control' e.g., log files, 'output', or 'all') are localized.
239 |
240 | ```{r localize, eval = has_gcloud}
241 | avworkflow_localize(
242 | submissionId,
243 | type = "output"
244 | ## dry = FALSE to localize
245 | )
246 | ```
247 |
248 | ## Workflow information
249 |
250 | Information on workflows (status, start, and end times, and input and
251 | output parameters) is available with `avworkflow_info()`. The examples
252 | below are from workflows using the [Rcollectl][] package to measure
253 | time spent in different parts of a single-cell RNA-seq analysis. The
254 | workspace is not publicly available, so results from
255 | `avworkflow_info()` are read from files in this package.
256 |
257 | [Rcollectl]: https://bioconductor.org/packages/Rcollectl
258 |
259 | A single job submission can launch multiple workflows. This occurs,
260 | e.g., when a workflow uses several rows from a DATA table to perform
261 | independent analyses. In the example used here, the workflows were
262 | configured to use different numbers of cores (3 or 8) and different
263 | ways of storing single-cell expression data (an in-memory `dgCMatrix`
264 | or an on-disk representation). Thus a single job submission started
265 | four workflows. This example was retrieved with
266 |
267 | ```{r workflow-info-example1-retrieval, eval = FALSE}
268 | avworkspace("bioconductor-rpci-yubo/Rcollectlworkflowh5ad")
269 | submissionId <- "9385fd75-4cb7-470f-9e07-1979e2c8f193"
270 | info_1 <- avworkflow_info(submissionId)
271 | ```
272 |
273 | Read the saved version of this result in to *R*.
274 |
275 | ```{r workflow-info-1-read, message=FALSE, warning=FALSE}
276 | info_file_1 <-
277 | system.file(package = "AnVIL", "extdata", "avworkflow_info_1.rds")
278 | info_1 <- readRDS(info_file_1)
279 |
280 | ## view result of avworkflow_info()
281 | info_1
282 | ```
283 |
284 | Three of the workflows were successful, one failed.
285 |
286 | ```{r workflow-info-1-select}
287 | info_1 |>
288 | select(workflowId, status, inputs, outputs)
289 | ```
290 |
291 | Inputs and outputs for each workflow are stored as list. Strategies
292 | for working with list-columns in tibbles are described in Chapter 24
293 | of [R for Data Science][r4ds]. Use `tidyr::unnest_wider()` to expand
294 | the inputs. The failed workflow involved 8 `core` using the on-disk
295 | data representation `dgCMatrix = FALSE`.
296 |
297 | [r4ds]: https://r4ds.hadley.nz/rectangling
298 |
299 | ```{r workflow-info-1-inputs-unnested}
300 | info_1 |>
301 | select(workflowId, status, inputs) |>
302 | tidyr::unnest_wider(inputs)
303 | ```
304 |
305 | The outputs (files summarizing the single-cell analysis, and the
306 | timestamps associated with each step in the analysis) involve two
307 | levels of nesting; following the strategy outlined in
308 | [R for Data Science][r4ds], the outputs (google bucket locations) are
309 |
310 | [r4ds]: https://r4ds.hadley.nz/rectangling
311 |
312 | ```{r workflow-info-1-outputs-unnested}
313 | info_1 |>
314 | select(workflowId, outputs) |>
315 | tidyr::unnest_wider(outputs) |>
316 | tidyr::unnest_longer(starts_with("Rcollectl"), keep_empty = TRUE)
317 | ```
318 |
319 | In the example used so far, each workflow produces a single file. A
320 | different examples is a workflow that produces multiple output
321 | files. This corresponds to the following submissionId:
322 |
323 | ```{r workflow-info-example2-retrieval, eval = FALSE, eval = has_gcloud}
324 | submissionId <- "35280de1-42d8-492b-aa8c-5feff984bffa"
325 | info_2 <- avworkflow_info(submissionId)
326 | ```
327 |
328 | Reading the result from the stored version:
329 |
330 | ```{r workflow-info-2-read, message=FALSE, warning=FALSE}
331 | info_file_2 <-
332 | system.file(package = "AnVIL", "extdata", "avworkflow_info_2.rds")
333 | info_2 <- readRDS(info_file_2)
334 | info_2
335 | ```
336 |
337 | Inputs and outputs are manipulated in the same way as before, but this
338 | time there are multiple output files.
339 |
340 | ```{r workflow-info-2-outputs-unnested, message=FALSE, warning=FALSE}
341 | info_2 |>
342 | select(workflowId, outputs) |>
343 | tidyr::unnest_wider(outputs)
344 | ```
345 |
346 | To see the output files, expand the outputs column using `unnest_longer()`.
347 |
348 | ```{r workflow-info-2-output-files}
349 | output_files <-
350 | info_2 |>
351 | select(workflowId, outputs) |>
352 | tidyr::unnest_wider(outputs) |>
353 | select(RcollectlWorkflowDelayedArrayParameters.Rcollectl_result) |>
354 | tidyr::unnest_longer(
355 | "RcollectlWorkflowDelayedArrayParameters.Rcollectl_result"
356 | )
357 | output_files
358 | ```
359 |
360 | The full file paths are available using `pull()` or `as.vector()`.
361 |
362 | ```{r workflow-info-2-output-paths}
363 | output_files |>
364 | as.vector()
365 | ```
366 |
367 | # Session information
368 |
369 | ```{r sessionInfo}
370 | sessionInfo()
371 | ```
372 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # AnVIL 1.20.0
2 |
3 | USER VISIBLE CHANGES
4 |
5 | - (v 1.19.5) Added `host` slot to `Service` class slot to show any
6 | subdomains in the API host URL.
7 |
8 | - (v 1.19.1) Deprecated `av*`, `gcloud`, etc. functions are now defunct; see
9 | `*-defunct` documentation pages.
10 |
11 | BUG FIXES AND MINOR IMPROVEMENTS
12 |
13 | - (v 1.19.9) Trigger updates based on changes to Dockstore API
14 |
15 | - (v 1.19.7) Updated NEWS.md and GitHub Actions to automate version
16 | updates in Dockstore.
17 |
18 | - (v 1.19.6) Remove test meant for `rapiclient` client package.
19 |
20 | - (v 1.19.4) Update Dockstore API version
21 |
22 | - (v 1.19.3) Remove examples and tests for defunct functions.
23 |
24 | - (v 1.19.2) Use `gcloud_exists` from the `AnVILGCP` package.
25 |
26 | # AnVIL 1.18.0
27 |
28 | USER VISIBLE CHANGES
29 |
30 | - (v 1.17.18) Added `has_avworkspace` function to check for the existence of
31 | an AnVIL workspace environment.
32 |
33 | - (v 1.17.10) Internal functions now use `AnVILGCP` for `gcloud` utilities.
34 |
35 | - (v 1.17.8) Functions that use `gcloud` utilities are deprecated and will be
36 | moved to `AnVILGCP`. See `help(package = "AnVIL")` for a complete list.
37 | Documentation pages have a `*-deprecated` suffix.
38 |
39 | - (v 1.17.3) Added Terra Data Repository (TDR) service as `TDR()`. See service
40 | at https://data.terra.bio.
41 |
42 | - (v 1.17.1) Gen3 services, `avworkflow*_configuration()` functions, `install()`,
43 | `repository()`, and `repositories()` have been removed.
44 |
45 | - (v 1.17.1) Defunct `repository_stats` function in favor of
46 | `BiocPkgTools::repositoryStats` (@LiNk-NY)
47 |
48 | BUG FIXES AND MINOR IMPROVEMENTS
49 |
50 | - (v 1.17.20) Use `lifeCycle` from `BiocBaseUtils` to mark functions as
51 | deprecated or defunct.
52 |
53 | - (v 1.17.19) Increase robustness of `gcloud_exists` by testing `gcloud`
54 | with the `version` command.
55 |
56 | - (v 1.17.18) Remove mentions of `AnVIL::install` from the vignette.
57 |
58 | - (v 1.17.13) Update to changes in `rapiclient` and use native pipe operator.
59 |
60 | - (v 1.17.7) Do not evaluate vignette chunks if `gcloud_exists()` is `FALSE`
61 |
62 | - (v 1.17.6) Update Dockstore API file, version, and URL
63 |
64 | - (v 1.17.2) Use `application/json` as default `Content-Type`.
65 |
66 | # AnVIL 1.16.0
67 |
68 | USER VISIBLE CHANGES
69 |
70 | - (v 1.15.10) Validate API versions against hardcoded variables; produce warning
71 | when discordant (@LiNk-NY, #101).
72 |
73 | - (v 1.15.8) Add `gcloud_storage()` and `gcloud_storage_buckets()` to
74 | create and manage Google Cloud Storage buckets (@LiNk-NY, #72).
75 |
76 | - Gen3 services, `avworkflow*_configuration()` functions, `install()`,
77 | `repository()`, and `repositories()` are defunct.
78 |
79 | - (v 1.15.5) Catch `avtable_import_status()` errors in the response object.
80 |
81 | - (v 1.15.1) Update vignette with examples for `avworkflow_info()` (@mtmorgan,
82 | @yubocheng).
83 |
84 | BUG FIXES AND MINOR IMPROVEMENTS
85 |
86 | - (v 1.15.11) Update Dockstore API file, version, and URL
87 |
88 | - (v 1.15.9) Use assertions from `BiocBaseUtils`
89 |
90 | - (v 1.15.7) Use `URLencode` for table in `avtable` and direct request to
91 | Rawls endpoint (@LiNk-NY, #98)
92 |
93 | - (v 1.15.6) Update the Dockstore API reference URL and use
94 | `api_referenc_url` instead of API file (@LiNk-NY).
95 |
96 | - Update namespace in vignette and examples (@kozo2, #54)
97 |
98 | # AnVIL 1.14.0
99 |
100 | NEW FEATURES
101 |
102 | - (v 1.13.1) Add paged support for large tables in `avtable_import()`
103 | and `avtable_import_set()`.
104 |
105 | - (v 1.13.2) Only show `avtable_paged()` and `avtable_import*()`
106 | progress bar in interactive() sessions
107 |
108 | - (v 1.13.4) Report messages when `avtable_import_status()`
109 | contains one.
110 |
111 | - (v 1.13.3) Use 'op' when .avworkflow_response() calls avstop_for_status().
112 |
113 |
114 | - (v 1.13.7) Check `requester pays` for destination URIs when using
115 | `gsutil_cp` (@smgogarten, #82)
116 |
117 | USER VISIBLE CHANGES
118 |
119 | - (v 1.13.8) Update documentation on updating workflow configurations.
120 | (@amstilp, #84)
121 |
122 | - (v 1.13.11) Added workflowId to `avworkflow_files()` and
123 | `avworkflow_localize()` to allow for filtering by workflow
124 | (@yubocheng, #90).
125 |
126 |
127 | # AnVIL 1.12.0
128 |
129 | USER VISIBLE CHANGES
130 |
131 | - (v 1.11.2) update workflow file discovery to use API, rather than 'scraping'
132 | google bucket. https://github.com/Bioconductor/AnVIL/issues/69
133 |
134 | - (v 1.11.3) Gen3 services deprecated
135 |
136 | - (v 1.11.5) Add `na =` to handle NA encoding in `avtable()` /
137 | `avtable_import()`. Changes default behavior.
138 |
139 |
140 | BUG FIXES
141 |
142 | - (v 1.11.1) consistently URLencode workspace and workflow `name`, to allow
143 | for spaces. https://github.com/Bioconductor/AnVIL/issues/67
144 |
145 | # AnVIL 1.10.0
146 |
147 | NEW FEATURES
148 |
149 | - (v 1.9.1) add `drs_access_url()` to returned signed `https://` URLs
150 | from `drs://` URIs. Enhance `drs_cp()`.
151 |
152 | - (v 1.9.4) add `auto_unbox=` argument to Service class, allowing
153 | other developers flexibility in unboxing values passed to REST
154 | APIs.
155 |
156 | - (v 1.9.7) add developer facilities for tracking API changes in
157 | Rawls, Terra, and Leonardo services
158 |
159 | USER VISIBLE CHANGES
160 |
161 | - (v 1.9.2) Deprecate AnVIL::install() & friends in favor of
162 | BiocManager::install(), which now knows about container binary
163 | repositories.
164 |
165 | - (v 1.9.8) Update Rawls, Terra, and Leonardo services. Changed
166 | endpoints include:
167 |
168 | ```
169 | ## Rawls
170 | $removed
171 | [1] admin_delete_refresh_token admin_statistics_get
172 | [3] refreshToken refreshTokenDate
173 |
174 | $updated
175 | [1] listUserBillingAccounts createWorkspace getTags
176 | [4] clone entity_type_metadata get_entity
177 | [7] entityQuery createSubmission validateSubmission
178 |
179 | ## Terra
180 | $removed
181 | [1] userTrial listImportPFBJobs importPFBStatus
182 |
183 | $updated
184 | [1] deleteBillingProject billingAccounts
185 | [3] createWorkspace cloneWorkspace
186 | [5] entityQuery flexibleImportEntities
187 | [7] importEntities createSubmission
188 | [9] validateSubmission browserDownloadEntitiesTSV
189 | [11] setProfile
190 |
191 | ## Leonardo
192 | $removed
193 | [1] batchNodepoolCreate
194 |
195 | $updated
196 | [1] listApp listAppByProject deleteApp
197 | [4] createApp listDisks listDisksByProject
198 | [7] createDisk updateRuntime createRuntime
199 | [10] setCookie proxyClusterJupyter proxyClusterJupyterLab
200 | [13] proxyClusterRStudio
201 | ```
202 |
203 | - (v 1.9.9) add 'gadgets' (simple graphical interfaces) to key
204 | functions, `avworkspace_gadget()`, `avtable_gadget()`,
205 | `avworkflow_gadget()`. Also `browse_workspace()` for opening a terra
206 | workspace in the browser.
207 |
208 | BUG FIXES
209 |
210 | - (v 1.9.3 / 1.8.2) `avworkflow_localize()` looks for `submissionId`
211 | files correctly.
212 |
213 | - (v 1.9.5 / 1.8.3) `drs_stat()` works when `accessUrl` is included in
214 | response.
215 |
216 | - (v 1.9.6 / 1.8.5) `gsutil_cp()` and `gsutil_rsync()` use
217 | `normalizePath()` on source and destination arguments to avoid
218 | creating directories in unexpected locations when provided with
219 | paths containing `~`, `.` or `..`.
220 |
221 | - (v 19.10 / v 1.8.6) `gcloud_account("")` did not
222 | invalidate cached access tokens.
223 | https://github.com/Bioconductor/AnVIL/issues/66
224 |
225 | - (v 1.9.11 / v 1.8.7) avoid changing status of 'Done' workflows to
226 | 'Aborted'
227 |
228 | - (v 1.9.11 / v 1.8.7) allow 'NULL' for entity arguments of
229 | avworkflow_run()
230 |
231 | # AnVIL 1.8.0
232 |
233 | NEW FEATURES
234 |
235 | - (v 1.7.4) add `avworkflow_configuration_*()` functions for
236 | manipulating workflow configurations, and a vignette describing use.
237 |
238 | - (v 1.7.5) add `avdata_import()` to import 'REFERENCE DATA' and
239 | 'OTHER DATA' tables.
240 |
241 | - (v 1.7.9) export `repository_stats()` to summarize binary package
242 | availability.
243 |
244 | USER VISIBLE CHANGES
245 |
246 | - (v 1.7.4) Deprecate `avworkflow_configuration()`,
247 | `avworkflow_import_configuration()`.
248 |
249 | - (v 1.7.4) Update Dockstore md5sum.
250 |
251 | - (v 1.7.5) `avdata()` is re-implemented to more faithfully report
252 | only 'REFERENCE DATA' and 'OTHER DATA' workspace attributes;
253 | previously, other attributes such as the description and tags (from
254 | the workspace landing page) were also reported.
255 |
256 | BUG FIXES
257 |
258 | - (v 1.7.4) `avworkflow_files()` and `avworkflow_localize()` do not
259 | fail when the workflow has produced no files.
260 |
261 | - (v 1.7.6) improve handling of authentication token for gcloud
262 | utilities.
263 |
264 | - (v 1.8.2) `avworkflow_localize()` looks for `submissionId` files
265 | correctly.
266 |
267 | - (v 1.8.3) `drs_stat()` works when `accessUrl` is included in
268 | response.
269 |
270 | # AnVIL 1.6.7 / 1.7.13
271 |
272 | BUG FIXES
273 |
274 | - Correct gcloud_project() when user environment variable set.
275 | https://github.com/Bioconductor/AnVIL/pull/52
276 |
277 | # AnVIL 1.6.6
278 |
279 | BUG FIXES
280 |
281 | - Correct gsutil_pipe() argument mis-match, see
282 | https://support.bioconductor.org/p/9141780/
283 |
284 | # AnVIL 1.6.0
285 |
286 | NEW FEATURES
287 |
288 | - (v. 1.5.5) add `repository()` to return the binary repository
289 | location, if available.
290 |
291 | - (v. 1.5.7) `drs_stat()` and `drs_cp()` support signed URLs
292 |
293 | USER VISIBLE CHANGES
294 |
295 | - (v. 1.5.2) `drs_stat()` uses multiple cores (on non-Windows) to enhance
296 | performance
297 |
298 | - (v. 1.5.6) `install()` delegates to `BiocManager::install()`,
299 | providing more flexibility (e.g., installing from GitHub) and
300 | robustness.
301 |
302 | - (v. 1.5.7) `drs_stat()` returns fields more selectively.
303 |
304 | # AnVIL 1.4.1
305 |
306 | BUG FIXES
307 |
308 | - Only install binary packages on Bioconductor docker images
309 |
310 | # AnVIL 1.4.0
311 |
312 | NEW FEATURES
313 |
314 | - (v 1.3.1) support `Rawls()` service (more fine-grained implementation
315 | / extension of the 'Terra()' orchestration API).
316 |
317 | - (v 1.3.2) introduce `avworkspace_*()` functions for viewing and updating
318 | workflow configurations.
319 |
320 | - (v 1.3.3) introduce `avnotebooks_()` functions for managing notebooks
321 | on workspaces and runtimes.
322 |
323 | - (v 1.3.11) introduce `avtable_paged()` for page-wise access to tables
324 |
325 | - (v 1.3.14) introduce `avworkspace_clone()` for cloning existing
326 | workspaces.
327 |
328 | - (v 1.3.21) `avworkspaces()` returns a tibble of available workspaces.
329 |
330 | - (v 1.3.24) `gsutil_rsync()` supports a regular expresion `exclude =`
331 | to exclude files from synchronization.
332 |
333 | - (v 1.3.24) `avworkflow_localize()` copies workflow control and / or
334 | output files to the local disk.
335 |
336 | USER VISIBLE CHANGES
337 |
338 | - (v 1.3.1) service functions have signatures like `fun(x, ...,
339 | .__body__ = list(y))`, where `x` is a argument for the 'URL' of the
340 | RESTful interface, and `y` is an argument for the 'BODY' of POST and
341 | similar requests. The `...` provide backward compatibility, and is
342 | used to populate elements of `.__body__`; the full interface is
343 | required when URL and BODY have identically named arguments.
344 |
345 | - (v 1.3.10, 1.3.11) return 'entity' column with name `'table_id'`,
346 | rather than `'name'`.
347 |
348 | - (v 1.3.22) `localize()` / `delocalize()` warn when `dry = TRUE`, so that
349 | lack of localization is more apparent.
350 |
351 | - (v 1.3.24) `gsutil_stat()` returns a tibble summaring bucket status,
352 | rather than character().
353 |
354 | - (v 1.3.30) Add Referer: header to all Leonardo requests
355 |
356 | BUG FIXES
357 |
358 | - (v 1.3.6) when `.__body__` consists of 1 argument, it is represented
359 | as an unnamed set.
360 |
361 | - (v 1.3.7) allow positional matching for `.__body__` arguments
362 |
363 | - (v. 1.2.1 / 1.3.31) drs_stat() returns a single record per URL when
364 | multiple hashes available.
365 |
366 | # AnVIL 1.2.0
367 |
368 | NEW FEATURES
369 |
370 | - (v 1.1.3) introduce .deprecated flag in `operations()` / `tags()`; don't include
371 | deprecated APIs by default; warn on use of deprecated APIs.
372 |
373 | - (v 1.1.4) add `repositories()` to return binary (if available),
374 | Bioconductor, and CRAN repository paths.
375 |
376 | - (v 1.1.6) provide md5sum as check on service version.
377 |
378 | - (v 1.1.9) add `avfiles_*()` for managing workspace bucket files.
379 |
380 | - (v 1.1.15) add `avtable_import_set()` to create subsets of tables,
381 | following the Terra data model.
382 |
383 | - (v 1.1.16) add `avruntimes()`, `avworkspace_jobs()` to query for runtimes
384 | and jobs associated with the active billing account.
385 |
386 | - (v 1.1.17) add `avdisks()` to query for persistent disks associate
387 | with the active billing account.
388 |
389 | - (v 1.1.21) add `avworkflow_*()` for interacting with workflow jobs
390 | and outputs.
391 |
392 | # AnVIL 1.0.x
393 |
394 | BUG FIXES
395 |
396 | - (v 1.0.1) collapse 'produces' vectors to scalars, for httr::accept()
397 |
398 | - (v 1.0.3) access correct binary repository, more robustly
399 |
400 | USER VISIBLE CHANGES
401 |
402 | - (v 1.0.2) support updated Leonardo `listRuntimes()` and friends
403 | (`listClusters()` deprecated)
404 |
405 | # AnVIL 1.0.0
406 |
407 | - AnVIL is _finally on Bioconductor!
408 | - Support OpenAPI Specification version 2 (aka Swagger 2.0)
409 | - `av`, `gcloud`, `gsutil` type functions added to interface with AnVIL
410 | and the cloud
411 | - Support `leonardo`, `terra`, `dockstore` and `gen3*` APIs
412 |
413 | # AnVIL 0.0.20
414 |
415 | - Support untagged swagger
416 |
417 | # AnVIL 0.0.17
418 |
419 | - `leonardo`, `terra`, `dockstore` and `gen3*` symbols not defined; users must
420 | create these themselves, e.g., `leonardo <-
421 |
422 | - Added a `NEWS.md` file to track changes to the package.
423 |
--------------------------------------------------------------------------------
/vignettes/Introduction.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Introduction to the AnVIL package"
3 | author:
4 | - name: Nitesh Turaga
5 | - name: Vincent Carey
6 | - name: BJ Stubbs
7 | - name: Marcel Ramos
8 | affiliation: CUNY Graduate School of Public Health and Health Policy
9 | email: marcel.ramos@sph.cuny.edu
10 | - name: Martin Morgan
11 | package: AnVIL
12 | output:
13 | BiocStyle::html_document
14 | abstract: |
15 | The AnVIL is cloud computing resource developed in part by the
16 | National Human Genome Research Institute. The AnVIL package provides
17 | end-user and developer functionality. For the end-user, AnVIL
18 | provides fast binary package installation, utilities for working
19 | with Terra / AnVIL table and data resources, and convenient
20 | functions for file movement to and from Google cloud storage. For
21 | developers, AnVIL provides programmatic access to the Terra,
22 | Leonardo, Rawls, and Dockstore RESTful programming interface,
23 | including helper functions to transform JSON responses to more
24 | formats more amenable to manipulation in _R_.
25 | vignette: |
26 | %\VignetteIndexEntry{Introduction to the AnVIL package}
27 | %\VignetteEngine{knitr::rmarkdown}
28 | %\VignetteEncoding{UTF-8}
29 | ---
30 |
31 | ```{r setup, include = FALSE}
32 | has_gcloud <- AnVILBase::has_avworkspace(
33 | strict = TRUE, platform = AnVILGCP::gcp()
34 | )
35 | knitr::opts_chunk$set(
36 | eval = has_gcloud, collapse = TRUE, cache = TRUE
37 | )
38 | options(width=75)
39 | ```
40 |
41 | # Installation
42 |
43 | Install the _AnVIL_ package with
44 |
45 | ```{r install-anvil, eval = FALSE}
46 | if (!requireNamespace("BiocManager", quietly = TRUE))
47 | install.packages("BiocManager", repos = "https://cran.r-project.org")
48 | BiocManager::install("AnVIL")
49 | ```
50 |
51 | Once installed, load the package with
52 |
53 | ```{r load-packages, message =FALSE, eval = TRUE, cache = FALSE}
54 | library(AnVILGCP)
55 | library(AnVIL)
56 | ```
57 |
58 | # Quick start
59 |
60 | ## Up to speed with _AnVIL_
61 |
62 | The [AnVIL project][] is an analysis, visualization, and informatics
63 | cloud-based space for data access, sharing and computing across large
64 | genomic-related data sets.
65 |
66 | The _AnVIL_ project supports use of _R_ through Jupyter notebooks and
67 | _RStudio_. Support for _RStudio_ is preliminary as of April 2020.
68 |
69 | This package provides access to _AnVIL_ resources from within the
70 | _AnVIL_ cloud, and also from stand-alone computing resources such as a
71 | user's laptop.
72 |
73 | Use of this package requires AnVIL and Google cloud computing billing
74 | accounts. Consult [AnVIL training guides][] for details on
75 | establishing these accounts.
76 |
77 | The remainder of this vignette assumes that an AnVIL account has been
78 | established and successfully linked to a Google cloud computing
79 | billing account.
80 |
81 | [AnVIL project]: https://anvilproject.org/
82 | [AnVIL training guides]: https://anvilproject.org/training/guides
83 |
84 | ## Use in the AnVIL cloud
85 |
86 | In the AnVIL cloud environment, clone or create a new workspace. Click
87 | on the `Cloud Environment` button at the top right of the
88 | screen. Choose the `R / Bioconductor` runtime to use in a Jupyter
89 | notebook, or `RStudio` to use in RStudio. When creating a Jupyter
90 | notebook, choose `R` as the engine.
91 |
92 | A new layout is being introduced in Fall of 2022. If the workspace has
93 | an 'Analyses' tab, navigate to it and look for the 'Environment
94 | Configuration' button to the right of the screen. For a Jupyter
95 | notebook-based environment, select `jupyter` 'Environment Settings'
96 | followed by `Customize` and the `R / Bioconductor` application
97 | configuration. _RStudio_ is available by clicking on the `RStudio /
98 | Bioconductor` 'Environment Settings' button.
99 |
100 | For tasks more complicated than manipulation and visualization of
101 | tabular data (e.g., performing steps of a single-cell work flow) the
102 | default Jupyter notebook configuration of 1 CPU and 3.75 GB of memory
103 | will be insufficient; the RStudio image defaults to 4 CPU and 15 GB of
104 | memory
105 |
106 | ## Local use
107 |
108 | Local use requires that the gcloud SDK is installed, and that the
109 | billing account used by AnVIL can be authenticated with the
110 | user. These requirements are satisfied when using the AnVIL compute
111 | cloud. For local use, one must
112 |
113 | - [Install][install-gcloud-sdk] the gcloud sdk (for Linux and Windows,
114 | `cloudml::gcloud_install()` provides an alternative way to install
115 | gcloud).
116 | - Define an environment variable or `option()` named `GCLOUD_SDK_PATH`
117 | pointing to the root of the SDK installation, e.g,
118 |
119 | ```{r gcloud-sdk-path, eval = FALSE}
120 | dir(file.path(Sys.getenv("GCLOUD_SDK_PATH"), "bin"), "^(gcloud|gsutil)$")
121 | ## [1] "gcloud" "gsutil"
122 | ```
123 |
124 | Test the installation with `gcloud_exists()`
125 |
126 | ```{r gcloud-exists, eval = TRUE}
127 | ## the code chunks in this vignette are fully evaluated when
128 | ## gcloud_exists() returns TRUE
129 | GCPtools::gcloud_exists()
130 | ```
131 |
132 | ## Graphical interfaces
133 |
134 | Several commonly used functions have an additional 'gadget' interface,
135 | allowing selection of workspaces (`avworkspace_gadget()`, DATA tables
136 | (`avtable_gadget()`) and workflows `avworkflow_gadget()` using a
137 | simple tabular graphical user interface. The `browse_workspace()`
138 | function allows selection of a workspace to be opened as a browser
139 | tab.
140 |
141 | # For end users
142 |
143 | ## Fast binary package installation
144 |
145 | The AnVIL cloud compute environment makes use of Docker containers
146 | with defined installations of binary system software. Bioconductor
147 | has arranged to build 'binary' _R_ packages that work out of the
148 | box with the `BiocManager::install()` function. Binary packages
149 | (when available and current) install without requiring compilation,
150 | and are faster to install than packages built from source.
151 |
152 | ```{r install-genomicfeatures, eval = FALSE}
153 | BiocManager::install("GenomicFeatures")
154 | ```
155 |
156 | Thus `BiocManager::install()` can be used as an improved method for
157 | installing _CRAN_ and _Bioconductor_ binary and source packages.
158 |
159 | Because package installation is fast, it can be convenient to install
160 | packages into libraries on a project-specific basis, e.g., to create a
161 | 'snapshot' of packages for reproducible analysis. Use
162 |
163 | ```{r add-libpaths, eval = FALSE}
164 | add_libpaths("~/my/project")
165 | ```
166 |
167 | as a convenient way to prepend a project-specific library path to
168 | `.libPaths()`. New packages will be installed into this library.
169 |
170 | ## Working with Google cloud-based resources
171 |
172 | The AnVIL package implements functions to facilitate access to Google
173 | cloud resources.
174 |
175 | ### Using `gcloud_*()` for account management {.unnumbered}
176 |
177 | The `gcloud_*()` family of functions provide access to Google cloud
178 | functions implemented by the `gcloud` binary. `gcloud_project()`
179 | returns the current billing account.
180 |
181 | ```{r gcloud-account-project, eval = has_gcloud}
182 | gcloud_account() # authentication account
183 | gcloud_project() # billing project information
184 | ```
185 |
186 | A convenient way to access _any_ `gcloud` SDK command is to use
187 | `gcloud_cmd()`, e.g.,
188 |
189 | ```{r gcloud-projects-list, eval = has_gcloud}
190 | gcloud_cmd("projects", "list") |>
191 | readr::read_table() |>
192 | filter(startsWith(PROJECT_ID, "anvil"))
193 | ```
194 |
195 | This translates into the command line `gcloud projects list`. Help is
196 | also available within _R_, e.g.,
197 |
198 | ```{r gcloud-help, eval = FALSE}
199 | gcloud_help("projects")
200 | ```
201 |
202 | Use `gcloud_help()` (with no arguments) for an overview of available
203 | commands.
204 |
205 | ### Using `gsutil_*()` for file and bucket management {.unnumbered}
206 |
207 | The `gsutil_*()` family of functions provides an interface to google
208 | bucket manipulation. The following refers to publicly available 1000
209 | genomes data available in Google Cloud Storage.
210 |
211 | ```{r gsutil-src}
212 | src <- "gs://genomics-public-data/1000-genomes/"
213 | ```
214 |
215 | `gsutil_ls()` lists bucket content; `gsutil_stat()` additional detail
216 | about fully-specified buckets.
217 |
218 | ```{r gsutil-ls-stat, eval = has_gcloud}
219 | gsutil_ls(src)
220 |
221 | other <- paste0(src, "other")
222 | gsutil_ls(other, recursive = TRUE)
223 |
224 | sample_info <- paste0(src, "other/sample_info/sample_info.csv")
225 | gsutil_stat(sample_info)
226 | ```
227 |
228 | `gsutil_cp()` copies buckets from or to Google cloud storage; copying
229 | to cloud storage requires write permission, of course. One or both of
230 | the arguments can be cloud endpoints.
231 |
232 | ```{r gsutil-cp, eval = has_gcloud}
233 | fl <- tempfile()
234 | gsutil_cp(sample_info, fl)
235 |
236 | csv <- readr::read_csv(fl, guess_max = 5000L, col_types = readr::cols())
237 | csv
238 | ```
239 |
240 | `gsutil_pipe()` provides a streaming interface that does not require
241 | intermediate disk storage.
242 |
243 | ```{r gsutil-pipe, eval = has_gcloud}
244 | pipe <- gsutil_pipe(fl, "rb")
245 | readr::read_csv(pipe, guess_max = 5000L, col_types = readr::cols()) |>
246 | dplyr::select("Sample", "Family_ID", "Population", "Gender")
247 | ```
248 |
249 | `gsutil_rsync()` synchronizes a local file hierarchy with a remote
250 | bucket. This can be a powerful operation when `delete = TRUE`
251 | (removing local or remote files), and has default option `dry = TRUE`
252 | to indicate the consequences of the sync.
253 |
254 | ```{r gsutil-rsync, eval = has_gcloud}
255 | destination <- tempfile()
256 | stopifnot(dir.create(destination))
257 | source <- paste0(src, "other/sample_info")
258 |
259 | ## dry run
260 | gsutil_rsync(source, destination)
261 |
262 | gsutil_rsync(source, destination, dry = FALSE)
263 | dir(destination, recursive = TRUE)
264 |
265 | ## nothing to synchronize
266 | gsutil_rsync(source, destination, dry = FALSE)
267 |
268 | ## one file requires synchronization
269 | unlink(file.path(destination, "README"))
270 | gsutil_rsync(source, destination, dry = FALSE)
271 | ```
272 |
273 | `localize()` and `delocalize()` provide 'one-way'
274 | synchronization. `localize()` moves the content of the `gs://`
275 | `source` to the local file system. `localize()` could be used at the
276 | start of an analysis to retrieve data stored in the google cloud to
277 | the local compute instance. `delocalize()` performs the complementary
278 | operation, copying local files to a `gs://` destination. The `unlink =
279 | TRUE` option to `delocalize()` unlinks local `source` files
280 | recursively. It could be used at the end of an analysis to move
281 | results to the cloud for long-term persistent storage.
282 |
283 | ## Using `av*()` to work with AnVIL tables and data
284 |
285 | ### Tables, reference data, and persistent files {.unnumbered}
286 |
287 | AnVIL organizes data and analysis environments into
288 | 'workspaces'. AnVIL-provided data resources in a workspace are managed
289 | under the 'DATA' tab as 'TABLES', 'REFERENCE DATA', and 'OTHER DATA';
290 | the latter includes ''Workspace Data' and 'Files', with 'Files'
291 | corresponding to a google cloud bucket associated with the
292 | workspace. These components of the graphical user interface are
293 | illustrated in the figure below.
294 |
295 | ```{r workspace-data-image, echo = FALSE, cache = FALSE}
296 | knitr::include_graphics('images/AnVIL-Workspace-Data.png')
297 | ```
298 |
299 | The AnVIL package provides programmatic tools to access different
300 | components of the data workspace, as summarized in the following
301 | table.
302 |
303 | Workspace | AnVIL function
304 | ---------------|---------------
305 | TABLES | `avtables()`
306 | REFERENCE DATA | None
307 | OTHER DATA | `avbucket()`
308 | Workspace Data | `avdata()`
309 | Files | `avfiles_ls()`, `avfiles_backup()`, `avfiles_restore()`
310 |
311 | ```{r avworkspace-set-hidden, include = FALSE, cache = FALSE, eval = has_gcloud}
312 | avworkspace_namespace("pathogen-genomic-surveillance")
313 | avworkspace_name("COVID-19")
314 | ```
315 |
316 | Data tables in a workspace are available by specifying the `namespace`
317 | (billing account) and `name` (workspace name) of the workspace. When
318 | on the AnVIL in a Jupyter notebook or RStudio, this information can be
319 | discovered with
320 |
321 | ```{r avworkspace-get, eval = has_gcloud}
322 | avworkspace_namespace()
323 | avworkspace_name()
324 | ```
325 |
326 | It is also possible to specify, when not in the AnVIL compute
327 | environment, the data resource to work with.
328 |
329 | ```{r avworkspace-set, eval = has_gcloud}
330 | ## N.B.: IT MAY NOT BE NECESSARY TO SET THESE WHEN ON ANVIL
331 | avworkspace_namespace("pathogen-genomic-surveillance")
332 | avworkspace_name("COVID-19")
333 | ```
334 |
335 | ### Using `avtable*()` for accessing tables {.unnumbered}
336 |
337 | Accessing data tables use the `av*()` functions. Use `avtables()` to
338 | discover available tables, and `avtable()` to retrieve a particular
339 | table
340 |
341 | ```{r avtables-avtable, eval = has_gcloud}
342 | avtables()
343 | sample <- avtable("sample")
344 | sample
345 | ```
346 |
347 | The data in the table can then be manipulated using standard _R_
348 | commands, e.g., to identify SRA samples for which a final assembly
349 | fasta file is available.
350 |
351 | ```{r avtable-manipulation, eval = has_gcloud}
352 | sample |>
353 | select("sample_id", contains("fasta")) |>
354 | filter(!is.na(final_assembly_fasta))
355 | ```
356 |
357 | Users can easily add tables to their own workspace using
358 | `avtable_import()`, perhaps as the final stage of a pipe
359 |
360 | ```{r avtable-import-example, eval = FALSE}
361 | my_cars <-
362 | mtcars |>
363 | as_tibble(rownames = "model") |>
364 | mutate(model = gsub(" ", "_", model))
365 | job_status <- avtable_import(my_cars)
366 | ```
367 |
368 | Tables are imported 'asynchronously', and large tables (more than 1.5
369 | million elements; see the `pageSize` argument) are uploaded in
370 | pages. The `job status` is a tibble summarizing each page; the status
371 | of the upload can be checked with
372 |
373 | ```{r avtable-import-status-example, eval = FALSE}
374 | avtable_import_status(job_status)
375 | ```
376 |
377 | The transcript of a session where page size is set intentionally small
378 | for illustration is
379 |
380 | ```{r avtable-import-pagesize-example, eval = FALSE}
381 | (job_status <- avtable_import(my_cars, pageSize = 10))
382 | ## pageSize = 10 rows (4 pages)
383 | ## |===================================================================| 100%
384 | ## # A tibble: 4 × 5
385 | ## page from_row to_row job_id status
386 | ##
387 | ## 1 1 1 10 a32e9706-f63c-49ed-9620-b214746b9392 Uploaded
388 | ## 2 2 11 20 f2910ac2-0954-4fb9-b36c-970845a266b7 Uploaded
389 | ## 3 3 21 30 e18adc5b-d26f-4a8a-a0d7-a232e17ac8d2 Uploaded
390 | ## 4 4 31 32 d14efb89-e2dd-4937-b80a-169520b5f563 Uploaded
391 | (job_status <- avtable_import_status(job_status))
392 | ## checking status of 4 avtable import jobs
393 | ## |===================================================================| 100%
394 | ## # A tibble: 4 × 5
395 | ## page from_row to_row job_id status
396 | ##
397 | ## 1 1 1 10 a32e9706-f63c-49ed-9620-b214746b9392 Done
398 | ## 2 2 11 20 f2910ac2-0954-4fb9-b36c-970845a266b7 Done
399 | ## 3 3 21 30 e18adc5b-d26f-4a8a-a0d7-a232e17ac8d2 ReadyForUpsert
400 | ## 4 4 31 32 d14efb89-e2dd-4937-b80a-169520b5f563 ReadyForUpsert
401 | (job_status <- avtable_import_status(job_status))
402 | ## checking status of 4 avtable import jobs
403 | ## |===================================================================| 100%
404 | ## # A tibble: 4 × 5
405 | ## page from_row to_row job_id status
406 | ##
407 | ## 1 1 1 10 a32e9706-f63c-49ed-9620-b214746b9392 Done
408 | ## 2 2 11 20 f2910ac2-0954-4fb9-b36c-970845a266b7 Done
409 | ## 3 3 21 30 e18adc5b-d26f-4a8a-a0d7-a232e17ac8d2 Done
410 | ## 4 4 31 32 d14efb89-e2dd-4937-b80a-169520b5f563 Done
411 | ```
412 |
413 | The Terra data model allows for tables that represent samples of other
414 | tables. The following create or add rows to `participant_set` and
415 | `sample_set` tables. Each row represents a sample from the
416 | corresponding 'origin' table.
417 |
418 | ```{r avtable-import-set-example, eval = FALSE}
419 | ## editable copy of '1000G-high-coverage-2019' workspace
420 | avworkspace("anvil-datastorage/1000G-high-coverage-2019")
421 | sample <-
422 | avtable("sample") |> # existing table
423 | mutate(set = sample(head(LETTERS), nrow(.), TRUE)) # arbitrary groups
424 | sample |> # new 'participant_set' table
425 | avtable_import_set("participant", "set", "participant")
426 | sample |> # new 'sample_set' table
427 | avtable_import_set("sample", "set", "name")
428 | ```
429 |
430 | The `TABLES` data in a workspace are usually provided as curated
431 | results from AnVIL. Nonetheless, it can sometimes be useful to delete
432 | individual rows from a table. Use `avtable_delete_values()`.
433 |
434 | ### Using `avdata()` for accessing Workspace Data {.unnumbered}
435 |
436 | The 'Workspace Data' is accessible through `avdata()` (the example
437 | below shows that some additional parsing may be necessary).
438 |
439 | ```{r avdata, eval = has_gcloud}
440 | avdata()
441 | ```
442 |
443 | ### Using `avbucket()` and workspace files {.unnumbered}
444 |
445 | Each workspace is associated with a google bucket, with the content
446 | summarized in the 'Files' portion of the workspace. The location of
447 | the files is
448 |
449 | ```{r avbucket, eval = has_gcloud}
450 | bucket <- avbucket()
451 | bucket
452 | ```
453 |
454 | The content of the bucket can be viewed with
455 |
456 | ```{r avfiles_ls, eval = has_gcloud}
457 | avfiles_ls()
458 | ```
459 |
460 | If the workspace is owned by the user, then persistent data can be
461 | written to the bucket.
462 |
463 | ```{r write-to-bucket-example, eval = FALSE}
464 | ## requires workspace ownership
465 | uri <- avbucket() # discover bucket
466 | bucket <- file.path(uri, "mtcars.tab")
467 | write.table(mtcars, gsutil_pipe(bucket, "w")) # write to bucket
468 | ```
469 |
470 | A particularly convenient operation is to back up files or directories
471 | from the compute node to the bucket
472 |
473 | ```{r avfiles-backup-cwd, eval = FALSE}
474 | ## backup all files and folders in the current working directory
475 | avfiles_backup(getwd(), recursive = TRUE)
476 | ```
477 |
478 | ```{r avfiles-backup-dir, eval = FALSE}
479 | ## backup all files in the current directory
480 | avfiles_backup(dir())
481 | ```
482 |
483 | ```{r avfiles-backup-scratch, eval = FALSE}
484 | ## backup all files to gs:///scratch/
485 | avfiles_backup(dir, paste0(avbucket(), "/scratch"))
486 | ```
487 |
488 | Note that the backup operations have file naming behavior like the
489 | Linux `cp` command; details are described in the help page
490 | `gsutil_help("cp")`.
491 |
492 | Use `avfiles_restore()` to restore files or directories from the
493 | workspace bucket to the compute node.
494 |
495 | ## Using `avnotebooks*()` for notebook management
496 |
497 | Python (`.ipynb`) or R (`.Rmd`) notebooks are associated with
498 | individual workspaces under the DATA tab, `Files/notebooks`
499 | location.
500 |
501 | Jupyter notebooks are exposed through the Terra interface under the
502 | NOTEBOOKS tab, and are automatically synchronized between the
503 | workspace and the current runtime.
504 |
505 | R markdown documents may also be associated with the workspace (under
506 | DATA `Files/notebooks`) but are not automatically synchronized with
507 | the current runtime. The functions in this section help manage R
508 | markdown documents.
509 |
510 | Available notebooks in the workspace are listed with
511 | `avnotebooks()`. Copies of the notebooks on the current runtime are
512 | listed with `avnotebooks(local = TRUE)`. The default location of the
513 | notebooks is `~//notebooks/`.
514 |
515 | Use `avnotebooks_localize()` to synchronize the version of the
516 | notebooks in the workspace to the current runtime. This operation
517 | might be used when a new runtime is created, and one wishes to start
518 | with the notebooks found in the workspace. If a newer version of the
519 | notebook exists in the workspace, this will overwrite the older
520 | version on the runtime, potentially causing data loss. For this
521 | reason, `avnotebooks_localize()` by default reports the actions that
522 | will be performed, without actually performing them. Use
523 | `avnotebooks_localize(dry = FALSE)` to perform the localization.
524 |
525 | Use `avnotebooks_delocalize()` to synchronize local versions of the
526 | notebooks on the current runtime to the workspace. This operation
527 | might be used when developing a workspace, and wishing to update the
528 | definitive notebook in the workspace. When `dry = FALSE`, this
529 | operation also overwrites older workspace notebook files with their
530 | runtime version.
531 |
532 | ## Using `avworkflows_*()` for workflows
533 |
534 | See the vignette "Running an AnVIL workflow within R", in this
535 | package, for details on running workflows and managing output.
536 |
537 | ## Using `avworkspace_*()` for workspaces
538 |
539 | `avworkspace()` is used to define or return the 'namespace' (billing
540 | project) and 'name' of the workspace on which operations are to
541 | act. `avworkspace_namespace()` and `avworkspace_name()` can be used to
542 | set individual elements of the workspace.
543 |
544 | `avworkspace_clone()` clones a workspace to a new location. The clone
545 | includes the 'DATA', 'NOTEBOOK', and 'WORKFLOWS' elements of the
546 | workspace.
547 |
548 | ## Using `drs_*()` for resolving DRS (Data Repository Service) URIs
549 |
550 | The Data Repository Service (DRS) is a GA4GH standard that separates a
551 | resource location (e.g., google bucket of a VCF file) from the URI
552 | that identifies the resource. A URI with the form `drs://...` is submitted to
553 | the Terra / AnVIL DRS, and translated to bucket (e.g., `gs://...`) or
554 | `https://...` URIs. One use case for DRS is when the location (e.g.,
555 | google bucket) of the resouce moves. In this case the DRS identifier
556 | does not change, so no changes are needed to code or data resources
557 | that referenced the object. A second use case is when access to a
558 | resource is restricted. The DRS URI in conjunction with appropriate
559 | credentials can then be translated to a 'signed' https URL that
560 | encodes authentication information, allowing standard software like a
561 | web browser, or R commands like `download.file()` or
562 | `VariantAnnotation::readVcf()` to access the resource. A Terra [support
563 | article][DRS] provides more information, though not about DRS in R!
564 |
565 | [DRS]: https://support.terra.bio/hc/en-us/articles/360039330211
566 |
567 | The following DRS URIs identify a 1000 Genomes VCF file and it's index
568 |
569 | ```{r drs-uri-example, eval = has_gcloud}
570 | uri <- c(
571 | vcf = "drs://dg.ANV0/6f633518-f2de-4460-aaa4-a27ee6138ab5",
572 | tbi = "drs://dg.ANV0/4fb9e77f-c92a-4deb-ac90-db007dc633aa"
573 | )
574 | ```
575 |
576 | Information about the URIs can be discovered with `drs_stat()`
577 |
578 | ```{r drs-stat-example, eval = FALSE}
579 | tbl <- drs_stat(uri)
580 | ## # A tibble: 2 × 9
581 | ## drs fileName size gsUri accessUrl timeUpdated hashes bucket name
582 | ##
583 | ## 1 drs://d… NA21144… 7.06e9 gs:/… NA 2020-07-08… fc-56… CCDG…
584 | ## 2 drs://d… NA21144… 4.08e6 gs:/… NA 2020-07-08… fc-56… CCDG…
585 | ```
586 |
587 | Column names indicate the information that is avaialable, e.g., the
588 | google object (`gsUri`) and size (`size`) of the object, and the
589 | object's file name (`fileName`)
590 |
591 | `drs_cp()` provides a convient way to translate DRS URIs to `gs://`
592 | URIs, and to copy files from their cloud location to the local disk or
593 | another bucket, e.g.,
594 |
595 | ```{r drs-cp-example, eval = FALSE}
596 | drs_cp(uri, "/tmp") # local temporary directory
597 | drs_cp(uri, avbucket()) # workspace bucket
598 | ```
599 |
600 | `drs_access_url()` translates the DRS URI to a standard HTTPS URI, but
601 | with additional authentication information embedded. These HTTPS URIs
602 | are usually time-limited. They can be used like regular HTTPS URIs, e.g,
603 |
604 | ```{r drs-access-url-example, eval = FALSE}
605 | suppressPackageStartupMessages({
606 | library(VariantAnnotation)
607 | })
608 | https <- drs_access_url(uri)
609 | vcffile <- VcfFile(https[["vcf"]], https[["tbi"]])
610 | scanVcfHeader(vcffile)
611 | ## class: VCFHeader
612 | ## samples(1): NA21144
613 | ## meta(3): fileformat reference contig
614 | ## fixed(2): FILTER ALT
615 | ## info(16): BaseQRankSum ClippingRankSum ... ReadPosRankSum VariantType
616 | ## geno(11): GT AB ... PL SB
617 |
618 | variants <- readVcf(vcffile, param = GRanges("chr1:1-1000000"))
619 | nrow(variants)
620 | ## [1] 123077
621 | ```
622 |
623 | The buckets are both 'requester pays' (see
624 | `gsutil_requesterpays(uri)`), so these queries are billed to the
625 | current project.
626 |
627 | # For developers
628 |
629 | ## Set-up
630 |
631 | [install-gcloud-sdk]: https://cloud.google.com/sdk/install
632 |
633 | ## Service APIs
634 |
635 | AnVIL applications are exposed to the developer through RESTful API
636 | services. Each service is represented in _R_ as an object. The object
637 | is created by invoking a constructor, sometimes with arguments. We
638 | illustrate basic functionality with the `Terra()` service.
639 |
640 | Currently, APIs using the OpenAPI Specification (OAS) Version 2
641 | (formerly known as Swagger) are supported. AnVIL makes use of the
642 | [rapiclient][] codebase to provide a unified representation of the
643 | API protocol.
644 |
645 | [rapiclient]: https://cran.r-project.org/package=rapiclient
646 |
647 | ### Construction {.unnumbered}
648 |
649 | Create an instance of the service. This consults a Swagger / OpenAPI
650 | schema corresponding to the service to create an object that knows
651 | about available endpoints. Terra / AnVIL project services usually have
652 | Swagger / OpenApi-generated documentation, e.g., for the [Terra
653 | service][].
654 |
655 | ```{r terra-api, eval = has_gcloud}
656 | terra <- Terra()
657 | ```
658 |
659 | Printing the return object displays a brief summary of endpoints
660 |
661 | ```{r terra-summary, eval = has_gcloud}
662 | terra
663 | ```
664 |
665 | The schema for the service groups endpoints based on tag values,
666 | providing some level of organization when exploring the service. Tags
667 | display consists of endpoints (available as a tibble with
668 | `tags(terra)`).
669 |
670 | ```{r terra-tags, eval = has_gcloud}
671 | terra |> tags("Status")
672 | ```
673 |
674 | ### Invoke endpoints {.unnumbered}
675 |
676 | Access an endpoint with `$`; without parentheses `()` this generates a
677 | brief documentation string (derived from the schema
678 | specification. Including parentheses (and necessary arguments) invokes
679 | the endpoint.
680 |
681 | ```{r terra-status-access, eval = has_gcloud}
682 | terra$status
683 | ```
684 |
685 | ```{r terra-status-call, eval = has_gcloud}
686 | terra$status()
687 | ```
688 |
689 | Some arguments appear in the 'body' of a REST request. Provide these
690 | as a list specified with `.__body__ = list(...)`; use `args()` to
691 | discover whether arguments should be present in the body of the
692 | request. For instance,
693 |
694 | ```{r terra-create-billing-args, eval = has_gcloud}
695 | args(terra$createBillingProjectFull)
696 | ```
697 |
698 | shows that all arguments should be included in the `.__body__=`
699 | argument. A more complicated example is
700 |
701 | ```{r terra-overwrite-args, eval = has_gcloud}
702 | args(terra$overwriteWorkspaceMethodConfig)
703 | ```
704 |
705 | where the same argument name appears in both the URL and the
706 | body. Again, the specification of the body arguments should be in
707 | `.__body__ = list()`. As a convenience, arguments appearing _only_ in
708 | the body can also be specified in the `...` argument of the reqeust.
709 |
710 | `operations()` and `schemas()` return a named list of endpoints, and
711 | of argument and return value schemas. `operations(terra)$XXX()` can be
712 | used an alternative to direct invocation `terra$XXX()`. `schemas()`
713 | can be used to construct function arguments with complex structure.
714 |
715 | `empty_object()` is a convenience function to construct an 'empty'
716 | object (named list without content) required by some endpoints.
717 |
718 | ### Process responses {.unnumbered}
719 |
720 | Endpoints return objects of class `response`, defined in the [httr][] package
721 |
722 | ```{r terra-status-response, eval = has_gcloud}
723 | status <- terra$status()
724 | class(status)
725 | ```
726 |
727 | Several convenience functions are available to help developers
728 | transform return values into representations that are more directly
729 | useful.
730 |
731 | `str()` is invoked for the side-effect of displaying the list-like
732 | structure of the response. Note that this is not the literal structure
733 | of the `response` object (use `utils::str(status)` for that), but
734 | rather the structure of the JSON response received from the service.
735 |
736 | ```{r terra-status-str, eval = has_gcloud}
737 | str(status)
738 | ```
739 |
740 | `as.list()` returns the JSON response as a list, and `flatten()`
741 | attempts to transform the list into a tibble. `flatten()` is effective
742 | when the response is in fact a JSON row-wise representation of
743 | tibble-like data.
744 |
745 | ```{r terra-status-flatten-example, eval = has_gcloud}
746 | lst <- status |> as.list()
747 | lengths(lst)
748 | lengths(lst$systems)
749 | str(lst$systems)
750 | ```
751 |
752 | ### Test endpoints {.unnumbered}
753 |
754 | Testing endpoints is challenging. Endpoints cannot be evaluated
755 | directly because they required credentialed access, and because remote
756 | calls involve considerable latency and sometimes
757 | bandwidth. Traditional 'mocks' are difficult to implement because of
758 | the auto-generated nature of endpoints from APIs. Simply checking for
759 | identical API YAML files (e.g., using md5sums) only indicates a change
760 | in the file without assessing whether the R code invoking the endpoint
761 | is the same (e.g., because arguments were added, removed, or renamed).
762 |
763 | The approach adopted here is to take a 'snapshot' of the current
764 | API. This is then compared to the updated API. Endpoints that are used
765 | in the code but that have been removed or have updated arguments are
766 | then manually checked for conformance to the updated API. Once
767 | endpoints are brought into line with the new API, the snapshot is
768 | updated to reflect the new API.
769 |
770 | Non-exported functions in the AnVIL package facilitate these steps. For
771 | instance, `AnVIL:::.api_test_write(Terra(), "Terra")` creates a
772 | snapshot of the current API. This is saved as
773 | `tests/testthat/api-Terra.rds`. The service is then updated (following
774 | the README of `inst/services/terra`) and the updated API compared to
775 | the original with `AnVIL::.api_test_check(Terra(), "Terra")`. The
776 | result is a list of functions that are common to both APIs, or added,
777 | removed, or updated (different arguments) in the new API. A static
778 | example is
779 | ```
780 | > .api_test_check(Terra(), "Terra") |> lengths()
781 | common added removed updated common_in_use
782 | 135 24 3 11 9
783 | removed_in_use updated_in_use
784 | 0 3
785 | ```
786 | with the `removed_in_use` and `updated_in_use` endpoints
787 | ```
788 | > .api_test_check(Terra(), "Terra")[c("removed_in_use", "updated_in_use")]
789 | $removed_in_use
790 | character(0)
791 |
792 | $updated_in_use
793 | [1] "cloneWorkspace" "entityQuery" "flexibleImportEntities"
794 | ```
795 | requiring manual inspection. Manual inspection means that each use in
796 | the AnVIL R package code is examined and updated to match the new
797 | API. Once the R code is aligned with the new API, `.api_test_write()`
798 | is re-run. The commit consists of the updated API files in
799 | `inst/services`, updated R code, and the updated snapshot.
800 |
801 | Unit tests (in `test_api.R`) are implemented to fail when the
802 | `removed_in_use` or `updated_in_use` fields are not zero-length.
803 |
804 | [httr]: https://cran.r-project.org/package=httr
805 |
806 | ## Service implementations
807 |
808 | The AnVIL package implements and has made extensive use of the
809 | following services:
810 |
811 | - _Terra_ (https://api.firecloud.org/; `Terra()`) provides access to
812 | terra account and workspace management, and is meant as the primary
813 | user-facing 'orchestration' API.
814 |
815 | [Terra service]: https://api.firecloud.org
816 |
817 | - _Leonardo_ (https://leonardo.dev.anvilproject.org/; `Leonardo()`)
818 | implements an interface to the AnVIL container deployment service,
819 | useful for management Jupyter notebook and RStudio sessions running
820 | in the AnVIL compute cloud.
821 |
822 | - _Rawls_ (https://rawls.dsde-prod.broadinstitute.org; `Rawls()`)
823 | implements functionality that often overlaps with (and is delegated
824 | to) the _Terra_ interface; the _Rawls_ interface implements
825 | lower-level functionality, and some operations (e.g., populating a
826 | DATA TABLE) are more difficult to accomplish with _Rawls_.
827 |
828 | The _Dockstore_ service (https://dockstore.org/swagger.json,
829 | `Dockstore()`) is available but has received limited
830 | testing. _Dockstore_ is used to run CWL- or WDL-based work flows,
831 | including workflows using _R_ / _Bioconductor_. See the separate
832 | vignette 'Dockstore and _Bioconductor_ for AnVIL' for initial
833 | documentation.
834 |
835 | ## Extending the `Service` class to implement your own RESTful interface
836 |
837 | The AnVIL package provides useful functionality for exposing other
838 | RESTful services represented in Swagger. To use this in other
839 | packages,
840 |
841 | - Add to the package DESCRIPTION file
842 |
843 | ```
844 | Imports: AnVIL
845 | ```
846 |
847 | - Arrange (e.g., via roxygen2 `@importFrom`, etc.) for the NAMESPACE
848 | file to contain
849 |
850 | ```
851 | importFrom AnVIL, Service
852 | importMethodsFrom AnVIL, "$" # pehaps also `tags()`, etc
853 | importClassesFrom AnVIL, Service
854 | ```
855 |
856 | - Implement your own class definition and constructor. Use `?Service`
857 | to provide guidance on argument specification. For instance, to
858 | re-implement the terra service.
859 |
860 | ```{r my-service-class}
861 | .MyService <- setClass("MyService", contains = "Service")
862 |
863 | MyService <-
864 | function()
865 | {
866 | .MyService(Service(
867 | "myservice",
868 | host = "api.firecloud.org",
869 | api_url = "https://api.firecloud.org/api-docs.yaml",
870 | authenticate = FALSE
871 | ))
872 | }
873 | ```
874 |
875 | Use `api_reference_url` and `api_reference_md5sum` of `Service()` as a
876 | mechanism to provide some confidence that the service created by the
877 | user at runtime is consistent with the service intended by the
878 | developer.
879 |
880 | # Support, bug reports, and source code availability
881 |
882 | For user support, please ask for help on the _Bioconductor_ [support
883 | site][]. Remember to tag your question with 'AnVIL', so that the
884 | maintainer is notified. Ask for developer support on the
885 | [bioc-devel][] mailing list.
886 |
887 | Please report bugs as 'issues' on [GitHub][].
888 |
889 | Retrieve the source code for this package from it's canonical location.
890 |
891 | ```
892 | git clone https://git.bioconductor.org/packages/AnVIL
893 | ```
894 |
895 | The package source code is also available on [GitHub][]
896 |
897 | [support site]: https://support.bioconductor.org
898 | [bioc-devel]: https://stat.ethz.ch/mailman/listinfo/bioc-devel
899 | [GitHub]: https://github.com/Bioconductor/AnVIL
900 |
901 | # Appendix {.unnumbered}
902 |
903 | ## Acknowledgments {.unnumbered}
904 |
905 | Research reported in this software package was supported by the US
906 | National Human Genomics Research Institute of the National Institutes
907 | of Health under award number [U24HG010263][]. The content is solely
908 | the responsibility of the authors and does not necessarily represent
909 | the official views of the National Institutes of Health.
910 |
911 | [U24HG010263]: https://projectreporter.nih.gov/project_info_description.cfm?aid=9789931&icde=49694078
912 |
913 | ## Session info {.unnumbered}
914 |
915 | ```{r sessionInfo, echo=FALSE}
916 | sessionInfo()
917 | ```
918 |
--------------------------------------------------------------------------------