├── .gitignore ├── .github ├── .gitignore └── workflows │ └── pkgdown.yaml ├── .Rbuildignore ├── NAMESPACE ├── pkgdown └── extra.css ├── vignettes ├── images │ ├── ccdgAFF.png │ ├── favicon.ico │ ├── anvil_logo.jpg │ ├── logo_nhgri_w.png │ ├── logo_bioconductor.gif │ └── teampics │ │ ├── SehyunOh.jpg │ │ ├── VinceCarey.jpg │ │ ├── LeviWaldron.jpg │ │ ├── LoriShepherd.jpg │ │ ├── MarcelRamos.jpg │ │ ├── MartinMorgan.jpg │ │ └── NiteshTuraga.jpeg ├── updating.Rmd ├── binrepo.Rmd ├── team.Rmd ├── overview.Rmd ├── packages.Rmd ├── images.Rmd └── training.Rmd ├── inst ├── legacy │ ├── project_management.md │ ├── team.md │ ├── about.md │ ├── FaceToFace_Jan2019.md │ ├── training.md │ └── index.md └── resources │ └── basicData.Rmd ├── DESCRIPTION ├── _pkgdown.yml └── index.md /.gitignore: -------------------------------------------------------------------------------- 1 | .Rhistory 2 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^codecov\.yml$ 2 | ^\.github$ 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | -------------------------------------------------------------------------------- /pkgdown/extra.css: -------------------------------------------------------------------------------- 1 | // extra.css 2 | // Apply any additional CSS here 3 | -------------------------------------------------------------------------------- /vignettes/images/ccdgAFF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/BiocAnVIL/main/vignettes/images/ccdgAFF.png -------------------------------------------------------------------------------- /vignettes/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/BiocAnVIL/main/vignettes/images/favicon.ico -------------------------------------------------------------------------------- /vignettes/images/anvil_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/BiocAnVIL/main/vignettes/images/anvil_logo.jpg -------------------------------------------------------------------------------- /vignettes/images/logo_nhgri_w.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/BiocAnVIL/main/vignettes/images/logo_nhgri_w.png -------------------------------------------------------------------------------- /vignettes/images/logo_bioconductor.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/BiocAnVIL/main/vignettes/images/logo_bioconductor.gif -------------------------------------------------------------------------------- /vignettes/images/teampics/SehyunOh.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/BiocAnVIL/main/vignettes/images/teampics/SehyunOh.jpg -------------------------------------------------------------------------------- /vignettes/images/teampics/VinceCarey.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/BiocAnVIL/main/vignettes/images/teampics/VinceCarey.jpg -------------------------------------------------------------------------------- /vignettes/images/teampics/LeviWaldron.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/BiocAnVIL/main/vignettes/images/teampics/LeviWaldron.jpg -------------------------------------------------------------------------------- /vignettes/images/teampics/LoriShepherd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/BiocAnVIL/main/vignettes/images/teampics/LoriShepherd.jpg -------------------------------------------------------------------------------- /vignettes/images/teampics/MarcelRamos.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/BiocAnVIL/main/vignettes/images/teampics/MarcelRamos.jpg -------------------------------------------------------------------------------- /vignettes/images/teampics/MartinMorgan.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/BiocAnVIL/main/vignettes/images/teampics/MartinMorgan.jpg -------------------------------------------------------------------------------- /vignettes/images/teampics/NiteshTuraga.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/BiocAnVIL/main/vignettes/images/teampics/NiteshTuraga.jpeg -------------------------------------------------------------------------------- /inst/legacy/project_management.md: -------------------------------------------------------------------------------- 1 | # Bioconductor / AnVIL 2 | 3 | ## Project management 4 | 5 | Some visibility into _Bioconductor_-specfic project management, 6 | including [current developement][] effort and [AnVIL Year 2 Plan][] are 7 | available as github projects. [Legacy][] project boards are also available. 8 |
9 |
10 | For quick glance of project updates: [Project Activities][] 11 | 12 | [current developement]: https://github.com/Bioconductor/AnVIL_Admin/projects/5 13 | [AnVIL Year 2 Plan]: https://github.com/Bioconductor/AnVIL_Admin/projects/6 14 | [Legacy]: https://github.com/Bioconductor/AnVIL_Admin/projects?query=is%3Aclosed 15 | [Project Activities]: index -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: BiocAnVIL 2 | Title: Bioconductor's contributions to [NHGRI's Analysis and VIsualization Laboratory (AnVIL)](https://anvilproject.org) 3 | are many and are evolving. 4 | Version: 0.0.10 5 | Authors@R: 6 | person(given = "Bioc-AnVIL", 7 | family = "Team", 8 | role = c("aut", "cre"), 9 | email = "stvjc@channing.harvard.edu") 10 | Description: Bioconductor's contributions to [NHGRI's Analysis and VIsualization Laboratory (AnVIL)](https://anvilproject.org) 11 | are many and are evolving. 12 | Depends: R (>= 4.0.0), tools, utils 13 | Suggests: 14 | knitr, 15 | BiocStyle, 16 | testthat, 17 | rmarkdown, 18 | pkgdown 19 | License: Artistic-2.0 20 | VignetteBuilder: knitr 21 | biocViews: Software 22 | RoxygenNote: 7.1.1 23 | BugReports: https://github.com/Bioconductor/BiocAnVIL/issues 24 | -------------------------------------------------------------------------------- /vignettes/updating.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Updating the Website" 3 | author: "Bioc/Anvil Team" 4 | vignette: > 5 | %\VignetteEngine{knitr::rmarkdown} 6 | %\VignetteIndexEntry{updating the website} 7 | %\VignetteEncoding{UTF-8} 8 | output: 9 | BiocStyle::html_document: 10 | highlight: pygments 11 | number_sections: yes 12 | theme: united 13 | toc: yes 14 | --- 15 | 16 | ## Updating the Website 17 | 18 | 1. `git clone` the repository. 19 | 2. If editing content, edit the `.Rmd` file in `vignettes` directory. Or to 20 | make a new article, add the new `.Rmd` file in `vignettes` directory. `pkgdown` 21 | will automatically generate a link to the article in the navigation. To edit 22 | the navigation, edit the `_pkgdown.yml` file. 23 | 3. `git push` your new branch to the repository. 24 | 4. Push to `main` or open a pull request to the `main` branch to trigger the 25 | pkgdown action to rebuild the site. 26 | 27 | ## Customizations 28 | 29 | The navigation can be customized by editing the `_pkgdown.yml` and custom css 30 | may be added by editing `pkgdown/extra.css`. 31 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | pkgdown: 2 | as_is: false 3 | 4 | template: 5 | params: 6 | bootswatch: yeti 7 | ganalytics: UA-93043521-1 8 | 9 | 10 | navbar: 11 | left: 12 | - text: "Home" 13 | href: index.html 14 | - text: Project 15 | menu: 16 | - text: Overview 17 | href: articles/overview.html 18 | - text: Docker Images 19 | href: articles/images.html 20 | - text: Binary Repository Generation 21 | href: articles/binrepo.html 22 | - text: Bioconductor Packages 23 | href: articles/packages.html 24 | - text: Training 25 | href: articles/training.html 26 | - text: Team 27 | href: articles/team.html 28 | - text: Updating the Website 29 | href: articles/updating.html 30 | right: 31 | - icon: fab fa-github fa-lg 32 | href: https://github.com/Bioconductor/BiocAnVIL 33 | 34 | home: 35 | title: Bioconductor AnVIL Projects 36 | description: > 37 | Bioconductor contributes R / Bioconductor Jupyter notebooks, RStudio / 38 | Bioconductor docker images, AnVIL packages, fast binary installation 39 | packages, workshops, and training materials to AnVIL. 40 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | pull_request: 5 | branches: main 6 | push: 7 | branches: main 8 | tags: ['*'] 9 | 10 | name: pkgdown 11 | 12 | jobs: 13 | pkgdown: 14 | runs-on: ubuntu-latest 15 | env: 16 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 17 | steps: 18 | - name: Checkout Repository 19 | uses: actions/checkout@v2 20 | 21 | - name: Set up Pandoc 22 | uses: r-lib/actions/setup-pandoc@v1 23 | 24 | - name: Set up R 25 | uses: r-lib/actions/setup-r@v1 26 | with: 27 | use-public-rspm: true 28 | 29 | - name: Set up R dependencies 30 | uses: r-lib/actions/setup-r-dependencies@v1 31 | with: 32 | extra-packages: pkgdown 33 | needs: website 34 | 35 | - name: Deploy package 36 | run: | 37 | git config --local user.name "$GITHUB_ACTOR" 38 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 39 | Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' 40 | -------------------------------------------------------------------------------- /inst/legacy/team.md: -------------------------------------------------------------------------------- 1 | # Bioconductor / AnVIL 2 | 3 | ## Team 4 | 5 | ### Current Developers 6 | 7 | Vincent Carey 9 |
Vincent Carey 10 | 11 | Martin Morgan 13 |
Martin Morgan (PI) 14 | 15 | Sehyun Oh 17 |
Sehyun Oh 18 | 19 | Marcel Ramos 21 |
Marcel Ramos 22 | 23 | Lori Shepherd 25 |
Lori Ann Shepherd (PM) 26 | 27 | BJ Stubbs 29 |
BJ Stubbs 30 | 31 | Nitesh Turaga 33 |
Nitesh Turaga 34 | 35 | Levi Waldron 37 |
Levi Waldron 38 | 39 | ### Past Developers 40 | 41 | -------------------------------------------------------------------------------- /index.md: -------------------------------------------------------------------------------- 1 | ## Bioconductor for AnVIL 2 | 3 | Bioconductor is available in [NHGRI's Analysis and VIsualization Laboratory 4 | (AnVIL)](https://anvilproject.org). [AnVIL](https://anvil.terra.bio) is a US 5 | National Institutes of Health / National Human Genome Research Institute 6 | initiative powered by Terra to provide a secure cloud environment for genomic 7 | data science analysis, visualization, and informatics. AnVIL provides the 8 | ability to launch RStudio, Jupyter notebook, and other R / Bioconductor 9 | resources in a computational cloud. 10 | 11 | Bioconductor contributes R / Bioconductor Jupyter notebooks, RStudio / 12 | Bioconductor docker images, AnVIL packages, fast binary installation packages, 13 | workshops, and training materials to AnVIL. 14 | 15 | ### Bioconductor's work in AnVIL 16 | - [Docker images](articles/images.html) for use in Terra 17 | - [Binary repositories](articles/binrepo.html) for Bioconductor software 18 | - [Bioconductor packages](articles/packages.html) 19 | - [AnVIL Training](articles/training.html) 20 | 21 | ### Use Bioconductor in AnVIL 22 | 23 | Visit [anvilproject.org](https://anvilproject.org/) to learn more about AnVIL or 24 | [get started](https://anvilproject.org/learn/interactive-analysis/getting-started-with-bioconductor) 25 | with Bioconductor in AnVIL. 26 | -------------------------------------------------------------------------------- /inst/legacy/about.md: -------------------------------------------------------------------------------- 1 | # Bioconductor / AnVIL 2 | 3 | ## About _Bioconductor_ and _AnVIL_ 4 | 5 | [_Bioconductor_][] is a successful open-source project for the 6 | statistical analysis and comprehension of high throughput genomic 7 | data. _Bioconductor_ is based on the _R_ programming language. It 8 | consists of more than 1600 [_R_ packages][] contributed by more than 9 | 1000 maintainers world-wide. _Bioconductor_ packages are downloaded to 1/2 10 | million IP addresses annualy. There are more than 29,000 11 | [PubMedCentral citations][] to _Bioconductor_. _Bioconductor_ has an 12 | active [support site][] and [community slack][]. 13 | 14 | [_AnVIL_][] is a US National Institutes of Health / National Human 15 | Genome Research Institute initiative to develop a Genomic Data Science 16 | Analysis, Visualization, and Informatics Lab-space (AnVIL). 17 | 18 | Ultimately, _AnVIL_ will provide the ability to launch RStudio, 19 | Jupyter notebook, and other _R_ / _Bioconductor_ resources in a 20 | computational cloud. The _AnVIL_ environment will provide secure access 21 | to large-scale as well as individual data resources, and to scalable 22 | cloud-based computational environments. 23 | 24 | [_Bioconductor_]: https://bioconductor.org 25 | [_AnVIL_]: https://www.genome.gov/27569268/genomic-analysis-visualization-and-informatics-labspace-anvil/ 26 | [_R_ packages]: https://bioconductor.org/packages 27 | [PubMedCentral citations]: https://www.ncbi.nlm.nih.gov/pmc/?term=bioconductor&sort=ePubDate 28 | [support site]: https://support.bioconductor.org 29 | [community slack]: https://bioc-community.herokuapp.com/ 30 | -------------------------------------------------------------------------------- /vignettes/binrepo.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Bioc/AnVIL binary generation" 3 | author: "Bioc/AnVIL team" 4 | vignette: > 5 | %\VignetteEngine{knitr::rmarkdown} 6 | %\VignetteIndexEntry{Bioc/AnVIL binary generation} 7 | %\VignetteEncoding{UTF-8} 8 | output: 9 | BiocStyle::html_document: 10 | highlight: pygments 11 | number_sections: yes 12 | theme: united 13 | toc: yes 14 | --- 15 | 16 | Binary packages for fast installation via `AnVIL::install()` for Bioconductor 17 | and CRAN packages are available at through a `CRAN`-style repository at 18 | `https://storage.googleapis.com/bioconductor-packages/3.15/container-binaries/`. The packages are 19 | generated with the internal Bioconductor package, 20 | [BiocKubeInstall](https://github.com/Bioconductor/BiocKubeInstall), which 21 | parallelizes package installation and binary creation on a Kubernetes cluster 22 | using Google Kubernetes Engine. The clusters runs and deploys builds Tuesday, 23 | Thursdays, and Saturday through a scheduled Github Action. See the [BiocKubeInstall 24 | Documentation](https://bioconductor.github.io/BiocKubeInstall) for more information. 25 | 26 | BiocKubeInstall works with [mtmorgan/RedisParam](https://github.com/mtmorgan/RedisParam), 27 | which facilitations parallelization over a Kubernetes cluster. Docker images for 28 | `bioc-redis:manager` and a release worker e.g `bioc-redis:RELEASE_3_14` are on DockerHub. 29 | 30 | ## Additional Resources 31 | - [Slides](https://docs.google.com/presentation/d/1Y7g_6X8I6DPaNK84EzWNo1wVpfAwdORGt6kcgcPYOV4/edit?usp=sharing) 32 | - [shwetagopaul92/hgvarByKub](https://github.com/shwetagopaul92/hgvarByKub) - an example of R 33 | and Kubernetes 34 | -------------------------------------------------------------------------------- /vignettes/team.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "The Bioc/AnVIL team" 3 | author: "Bioc/AnVIL team" 4 | vignette: > 5 | %\VignetteEngine{knitr::rmarkdown} 6 | %\VignetteIndexEntry{The Bioc/AnVIL Team} 7 | %\VignetteEncoding{UTF-8} 8 | output: 9 | BiocStyle::html_document: 10 | highlight: pygments 11 | number_sections: yes 12 | theme: united 13 | toc: yes 14 | --- 15 | 16 | ## Current Developers 17 |
18 |
19 |
20 | Vincent Carey 21 |
Vincent Carey 22 |
23 |
24 | Martin Morgan 25 |
Martin Morgan (PI) 26 |
27 |
28 |
29 |
30 | Sehyun Oh 31 |
Sehyun Oh 32 |
33 |
34 | Marcel Ramos 35 |
Marcel Ramos 36 |
37 |
38 |
39 |
40 | Nitesh Turaga 41 |
Nitesh Turaga (PM) 42 |
43 |
44 |
45 |
46 | Levi Waldron 47 |
Levi Waldron 48 |
49 |
50 |
51 | 52 | ## Past Developers 53 | 54 | 60 | -------------------------------------------------------------------------------- /vignettes/overview.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Bioc/AnVIL overview" 3 | author: "Bioc/AnVIL team" 4 | vignette: > 5 | %\VignetteEngine{knitr::rmarkdown} 6 | %\VignetteIndexEntry{Bioconductor / AnVIL Overview} 7 | %\VignetteEncoding{UTF-8} 8 | output: 9 | BiocStyle::html_document: 10 | highlight: pygments 11 | number_sections: yes 12 | theme: united 13 | toc: yes 14 | --- 15 | 16 | ## About _Bioconductor_ and _AnVIL_ 17 | 18 | [_Bioconductor_][] is a successful open-source project for the 19 | statistical analysis and comprehension of high throughput genomic 20 | data. _Bioconductor_ is based on the _R_ programming language. It 21 | consists of more than 2000 [_R_ packages][] contributed by more than 22 | 1000 maintainers world-wide. _Bioconductor_ packages are downloaded to 1/2 23 | million IP addresses annually. There are more than 29,000 24 | [PubMedCentral citations][] to _Bioconductor_. _Bioconductor_ has an 25 | active [support site][] and [community slack][]. 26 | 27 | [_AnVIL_][] is a US National Institutes of Health / National Human 28 | Genome Research Institute initiative to develop a Genomic Data Science 29 | Analysis, Visualization, and Informatics Lab-space (AnVIL). 30 | 31 | _AnVIL_ provides the ability to launch RStudio, Jupyter notebook, and 32 | other _R_ / _Bioconductor_ resources in a computational cloud. The 33 | _AnVIL_ environment provides secure access to large-scale as well as 34 | individual data resources, and to scalable cloud-based computational 35 | environments. 36 | 37 | [_Bioconductor_]: https://bioconductor.org 38 | [_AnVIL_]: https://www.genome.gov/27569268/genomic-analysis-visualization-and-informatics-labspace-anvil/ 39 | [_R_ packages]: https://bioconductor.org/packages 40 | [PubMedCentral citations]: https://www.ncbi.nlm.nih.gov/pmc/?term=bioconductor&sort=ePubDate 41 | [support site]: https://support.bioconductor.org 42 | [community slack]: https://bioc-community.herokuapp.com/ 43 | -------------------------------------------------------------------------------- /inst/resources/basicData.Rmd: -------------------------------------------------------------------------------- 1 | # Basic dataset sizes in AnVIL workspaces visible to AnVIL_Devs 2 | 3 | VJ Carey, BJ Stubbs 4 | 5 | 6 | 7 | ```{r setup, echo=FALSE, results="hide"} 8 | suppressMessages({ 9 | library(AnVIL) 10 | library(rjson) 11 | library(httr) 12 | library(tibble) 13 | library(dplyr) 14 | library(magrittr) 15 | }) 16 | 17 | participantCount = function(studyTitle) { 18 | types = httr::content( 19 | terra$getEntityTypes("anvil-datastorage", 20 | studyTitle)) 21 | types$participant$count 22 | } 23 | 24 | sampleAtts = function(studyTitle) { 25 | atts = httr::content( 26 | terra$getEntities( 27 | "anvil-datastorage", 28 | studyTitle, 29 | "sample")) 30 | atts 31 | } 32 | 33 | basicDataFrame = function() { 34 | ws = AnVIL::terra$listWorkspaces() 35 | wscon = content(ws, type="text/json") 36 | ww = fromJSON(wscon) 37 | alln = sapply(ww, function(x) x$workspace$name) 38 | anvnames = grep("AnVIL", alln, value=TRUE) -> anvnames 39 | toks = strsplit(anvnames, "_") 40 | stud = sapply(toks, "[", 2) 41 | site = sapply(toks, "[", 3) 42 | org = sapply(toks, "[", 4) 43 | t5 = sapply(toks, "[", 5) 44 | t6 = sapply(toks, "[", 6) 45 | nums = lapply(anvnames, participantCount) 46 | bad = which(sapply(nums, is.null)) 47 | if (length(bad)>0) { 48 | nums[bad]=NA 49 | nums = unlist(nums) 50 | } 51 | data.frame(study=stud, site=site, organ=org, 52 | N=nums, addit=t5, ext=t6, name=anvnames, 53 | stringsAsFactors=FALSE) 54 | } 55 | 56 | setClass("AnVILStudies", 57 | representation(basic="data.frame")) 58 | AnVILStudies = function(dataframe) { 59 | new("AnVILStudies", basic=dataframe) } 60 | setMethod("show", "AnVILStudies", function(object) { 61 | print(as_tibble(object@basic)) 62 | }) 63 | #sa = sampleAtts("AnVIL_CCDG_WashU_CVD_EOCAD_Emerge_WGS") 64 | 65 | ``` 66 | 67 | ```{r getbasic} 68 | basedf = basicDataFrame() 69 | ``` 70 | We are using the AnVIL package of Bioconductor to 71 | query and summarize the AnVIL workspaces. There are 72 | `r nrow(basedf)` workspaces with 'AnVIL' as the 73 | leading substring of workspace name. 74 | 75 | ```{r baic} 76 | as_tibble(basedf) 77 | ``` 78 | 79 | The number of samples is usually available in an 80 | attribute, but there are `r sum(is.na(basedf$N))` 81 | studies where this is not available. 82 | 83 | It is of interest to tabulate numbers of samples 84 | by anatomic organ. 85 | ```{r byorg} 86 | as_tibble(basedf) %>% 87 | group_by(study, organ) %>% 88 | summarise(N=sum(N, na.rm=TRUE)) 89 | ``` 90 | 91 | For CCDG we can partition a bit more finely. 92 | 93 | ```{r doccd} 94 | basedf %>% filter(study=="CCDG") %>% 95 | group_by(study,organ,addit) %>% 96 | summarise(N=sum(N, na.rm=TRUE)) 97 | ``` 98 | -------------------------------------------------------------------------------- /vignettes/packages.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Bioc/AnVIL Packages" 3 | author: "Bioc/AnVIL team" 4 | vignette: > 5 | %\VignetteEngine{knitr::rmarkdown} 6 | %\VignetteIndexEntry{Bioconductor / AnVIL Packages} 7 | %\VignetteEncoding{UTF-8} 8 | output: 9 | BiocStyle::html_document: 10 | highlight: pygments 11 | number_sections: yes 12 | theme: united 13 | toc: yes 14 | --- 15 | 16 | ## AnVIL Package Tools 17 | 18 | ### AnVIL 19 | 20 | The AnVIL package provides binary package installation, utitlities for manipulating 21 | table and data resources, and functions for moving files between AnVIL Terra and 22 | Google cloud storage. It also provides programmatic access with helper functions 23 | to the Terra, Leonardo, Rawls, Dockstore, and Gen3 RESTful programming interface. 24 | For more information, see 25 | 26 | - [AnVIL package at bioconductor.og](https://bioconductor.org/packages/AnVIL) 27 | - [AnVIL package source at github.com](https://github.com/Bioconductor/AnVIL) 28 | 29 | ### AnVILPublish 30 | 31 | Create and update AnVIL workspaces from R / Bioconductor packages. For more 32 | information, see 33 | 34 | * [AnVILPublish at bioconductor.org](https://bioconductor.org/packages/AnVILPublish) 35 | * [AnVILPublish at github.com](https://github.com/Bioconductor/AnVILPublish) 36 | 37 | ### AnVILBilling 38 | 39 | Retrieve and report on the costs related to AnVIL and Terra resources. For 40 | more information, see 41 | 42 | * [AnVILBilling at github.com](https://bioconductor.org/packages/AnVILBilling) 43 | * [AnVILBilling at github.com](https://github.com/vjcitn/AnVILBilling) 44 | 45 | ## Metadata access and overview 46 | 47 | AnVIL package tools can be used to discover incompatibilities or ambiguities 48 | in study annotation. BJ’s class worked through metadata survey exercises. An 49 | example of incompatible/ambiguous annotation is present in the Autism workspaces. 50 | 51 | ![Workspace showing an example of incompatible/ambiguous annotation in Autism 52 | workspaces](images/ccdgAFF.png "Incompatible/Ambiguous Annotation in Autism Workspaces") 53 | 54 | We are looking at two studies from NYGC referring to autism, one has substring 55 | ACE2 and the other SSC. What we see above is that AFFECTION_STATUS is coded 1/2 in 56 | the SSC study, and more prosaically in the ACE2 study. It may be that the labels 57 | in ACE2 study are more problematic as the options seem to be "0", "ASD affected", 58 | "ASD Affected", and "Diagnosis uncertain" – or perhaps it is just a 59 | letter casing issue. 60 | 61 | The ingestion group was notified and replied that "there is no process for the 62 | AnVIL team to retrospectively address existing data". Interest was expressed in 63 | learning more about our metadata survey capabilities. 64 | -------------------------------------------------------------------------------- /inst/legacy/FaceToFace_Jan2019.md: -------------------------------------------------------------------------------- 1 | # Face to Face Meeting: Baltimore Jan 10-11, 2019 2 | 3 | ## Resources (for reference) 4 | 5 | - Previous slides: https://docs.google.com/presentation/d/1rhffdH_uG6tyPZt67ie4P_NKyWJIk6DqMOgGrA8dnyI/edit?usp=sharing 6 | 7 | - Informal notes from first face-to-face meeting https://docs.google.com/document/d/1expHfjCHqgU2FqacQK7R5G_OiZDZ12xvZUhwYUw5emU/edit 8 | 9 | - 'Ideal AnVIL architecture' 10 | 11 | ## Platform and expertise 12 | 13 | - Bioconductor recap -- 'Statistical analysis & comprehension'; interactive; core and contributed packages; typical use with some but far from all packages used. 14 | 15 | - Primary use is INTERACTIVE 16 | - Bioconductor used through Jupyter / RStudio 'front end' 17 | 18 | ## Role in AnVIL 19 | 20 | 1. Initial development 21 | 22 | - Standardized, correct containers for essential Bioconductor packages through RStudio & Jupyter. 'Release' and 'devel' flavors. See https://github.com/Bioconductor/AnVIL_Docker (work in progesss) 23 | 24 | - R-based Leonardo REST interface -- primarily to help developers. See https://github.com/Bioconductor/AnVIL 25 | 26 | - Existing Bioconductor cloud-based activities -- Annotation & ExperimentHub, BiocOncoTK, ... 27 | 28 | 2. Single RStudio / Jupyter instance user interface to AnVIL 29 | 30 | - Access to user and protected data using standard R idioms. Maybe data resources are just 'files' on the file system and no special implementation is required; maybe data resource metadata needs to be 'discovered' and presented to the user through some kind of R / Bioconductor based interface. 31 | 32 | - Access to 'standard' (??) Firecloud / Terra services -- discover & run services on user and protected data from within R. E.g., perform https://software.broadinstitute.org/firecloud/documentation/article?id=11115 but from an R script. 33 | 34 | - Essential to have standard APIs published early to write R-level functions around 35 | 36 | 3. Several R / Bioconductor instances 37 | 38 | - Launch and manage many R / Bioconductor instances in firecloud; interact via BiocParallel-like task distribution 39 | 40 | - Management via CloudMan / Kubernetes / 'native' Firecloud / ... 41 | 42 | - Implement Terra services in R / Bioconductor 43 | - ?? WDL-enabled R (e.g., Sean Davis' [wdlRunR][1]) 44 | - WDL fits well with Bioc formal objects (vs. R's more relaxed approach) 45 | - ?? provided as container. 46 | 47 | - Importance of scalable container infrastructure 48 | - Use Galaxy CloudMan or Firecloud infrastructure to spin up instances 49 | - Communicate & drive from within 'manager' RStudio instance, e.g., via BiocParallel & rabbitmq parallel evaluation 50 | 51 | 4. Training 52 | 53 | 5. What we do right 54 | 55 | [1]: https://github.com/seandavi/wdlRunR 56 | -------------------------------------------------------------------------------- /vignettes/images.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Bioc/AnVIL images" 3 | shorttitle: "testing pkgdown approach" 4 | author: "Bioc/AnVIL images" 5 | vignette: > 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteIndexEntry{Bioc/AnVIL images} 8 | %\VignetteEncoding{UTF-8} 9 | output: 10 | BiocStyle::html_document: 11 | highlight: pygments 12 | number_sections: no 13 | theme: united 14 | toc: yes 15 | --- 16 | 17 | 18 | 19 | The latest [Terra-Jupyter-Bioconductor](https://github.com/DataBiosphere/terra-docker/tree/master/terra-jupyter-bioconductor) 20 | and [R / Bioconductor](https://github.com/anvilproject/anvil-docker/tree/master/anvil-rstudio-bioconductor) 21 | Docker images are available in [AnVIL](https://anvil.terra.bio) and the 22 | Google Container Registry. They work like the 23 | [bioconductor_docker](https://github.com/Bioconductor/bioconductor_docker) 24 | images, with the capability to install almost all of the Bioconductor packages 25 | along with a few pre-installed "core" set of packages. The 26 | terra-jupyter-bioconductor image inherits from the terra-jupyter-r image 27 | which has all the system dependencies installed. This image has been tested on 28 | Leonardo and installs all but a few packages in Bioconductor release, which 29 | fail due to achived CRAN dependencies. 30 | 31 | For the following images, check the CHANGELOG.md in their respective 32 | repositories for the most recent image URL. 33 | 34 | ## Jupyter notebooks 35 | 36 | The following image is available on the [broad-dsp-gcr-public GCP Container 37 | Registry for 38 | terra-jupyter-bioconductor](https://console.cloud.google.com/gcr/images/broad-dsp-gcr-public/US/terra-jupyter-bioconductor). 39 | 40 | - [Terra-Jupyter-Bioconductor](https://github.com/DataBiosphere/terra-docker/tree/master/terra-jupyter-bioconductor) 41 | is based on R version 4.1 and Bioconductor version 3.14. In Terra, it is listed 42 | with the version number as in 43 | `us.gcr.io/broad-dsp-gcr-public/terra-jupyter-bioconductor:{version}` and can 44 | be used by selecting the "Bioconductor" notebook runtime. See the 45 | [Terra-Jupyter-Bioconductor CHANGELOG.md](https://github.com/DataBiosphere/terra-docker/blob/master/terra-jupyter-bioconductor/CHANGELOG.md). 46 | 47 | ## RStudio / Bioconductor 48 | 49 | The following images are available on the [anvil-gcr-public GCP Container 50 | Registry](https://console.cloud.google.com/gcr/images/anvil-gcr-public/). 51 | 52 | - [R / Bioconductor (release 3.14)](https://github.com/anvilproject/anvil-docker/tree/master/anvil-rstudio-bioconductor) 53 | is based on R version 4.1 and Bioconductor version 3.14. In Terra, it is listed 54 | with the version number as in 55 | `us.gcr.io/anvil-gcr-public/anvil-rstudio-bioconductor:{version}` and can be 56 | used by selecting a "custom" notebook runtime and selecting the RStudio image 57 | under "Community Maintained RStudio Environment (Verified Partners)". See the 58 | [AnVIL-RStudio-Bioconductor CHANGELOG.md](https://github.com/anvilproject/anvil-docker/blob/master/anvil-rstudio-bioconductor/CHANGELOG.md). 59 | 60 | - [R / Bioconductor (devel)](https://github.com/anvilproject/anvil-docker/tree/master/anvil-rstudio-bioconductor-devel) 61 | In Terra, the image is accessible with the version number as in 62 | `us.gcr.io/anvil-gcr-public/anvil-rstudio-bioconductor-devel:{version}`. 63 | It can be used by selecting "Other Environments". See the 64 | [AnVIL-RStudio-Bioconductor-devel CHANGELOG.md](https://github.com/anvilproject/anvil-docker/blob/master/anvil-rstudio-bioconductor-devel/CHANGELOG.md). 65 | 66 | - [Legacy R / Bioconductor](https://github.com/anvilproject/anvil-docker/tree/master/anvil-rstudio-bioconductor) 67 | The image is based on R version 4.0.5 and Bioconductor version 3.13. 68 | -------------------------------------------------------------------------------- /inst/legacy/training.md: -------------------------------------------------------------------------------- 1 | # _Bioconductor_ / _AnVIL_ 2 | 3 | ## Training material 4 | 5 | The material here is 'in development' and meant to orient the 6 | _Bioconductor_ community to use of _AnVIL_ resources. See also [AnVIL 7 | documentation][AnVIL-docs] resources. 8 | 9 | [AnVIL-docs]: https://support.terra.bio/hc/en-us 10 | 11 | - [Workshops](#workshops) 12 | - [_Terra_ workspaces / workflows](#workspaces) 13 | - [Notebooks](#notebooks) 14 | - [Dockstore](#dockstore) 15 | - [Other](#other) 16 | 17 | 18 | ### Workshops 19 | 20 | - [Terra in the Classroom][] 21 | 22 | - Documentation on running a small course utilizing AnVIL. This includes some 23 | set up information as well as the learned positives and negatives as of 24 | February 2020. 25 | 26 | 27 | 28 | ### _Terra_ Workspaces / Workflows 29 | 30 | - (proof-of-principle) A workspace for [pan-cancer transcriptome 31 | surveys][pancanlink]. This workspace includes two workflows, each 32 | devoted to different gene sets. The WDL and associated scripts are 33 | [registered][dockstorelink] at dockstore.org. Unit testing for the 34 | script components is managed in the Bioconductor [BiocOncoTK 35 | package][vjconcohub] (developer repo). 36 | 37 | [pancanlink]: https://app.terra.bio/#workspaces/landmarkanvil2/pancan_tx_public 38 | [dockstorelink]: https://dockstore.org/workflows/github.com/vjcitn/BiocOncoTK/msireg1:master?tab=info 39 | [vjconcohub]: https://github.com/vjcitn/BiocOncoTK/blob/master/tests/testthat/test_dockstore_scripts.R 40 | 41 | 42 | 43 | ### Notebooks 44 | 45 | - (proof-of-principle) [Using Bioconductor's VCF processing stack][vcf stack] 46 | to demonstrate population stratification using a small slice of 47 | chr17 from the [new EBI 1000 genomes VCF][1kvcf]. 48 | 49 | - (proof-of-principle) [Using dockstore+terra for pancancer 50 | transcriptomics][pancantx] to compare relationships between gene 51 | expression and stratified or continuous measures of microsatellite 52 | instability in 33 TCGA tumor types. 53 | 54 | [vcf stack]: https://nbviewer.jupyter.org/github/vjcitn/terravar/blob/master/Tiny%20population%20stratification%20display.ipynb 55 | [1kvcf]: http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/20190312_biallelic_SNV_and_INDEL_README.txt 56 | [pancantx]: https://nbviewer.jupyter.org/github/vjcitn/terravar/blob/master/trimmedMondaySep16.ipynb 57 | 58 | 59 | ### Dockstore 60 | 61 | 62 | ### Other 63 | 64 | - Shiny Apps 65 | 66 | - [TerraPlane][] to help filter dockstore to find methods based on search term 67 | 68 | - Data management utilies 69 | 70 | - _R_ markdown for [using terra to survey CCDG and 71 | CMG](training/basicData.Rmd) 72 | 73 | - Results as of 20 June 2019 74 | ``` 75 | ## # A tibble: 10 x 3 76 | ## # Groups: study [2] 77 | ## study organ N 78 | ## 79 | ## 1 CCDG AI 9031 80 | ## 2 CCDG CVD 25741 81 | ## 3 CCDG NP 19422 82 | ## 4 CMG Blood 277 83 | ## 5 CMG Brain 1844 84 | ## 6 CMG Eye 552 85 | ## 7 CMG Heart 184 86 | ## 8 CMG Kidney 432 87 | ## 9 CMG Muscle 1722 88 | ## 10 CMG Orphan 717 89 | ``` 90 | 91 | - Drilling down on CCDG 92 | ``` 93 | ## # A tibble: 9 x 4 94 | ## # Groups: study, organ [3] 95 | ## study organ addit N 96 | ## 97 | ## 1 CCDG AI Asthma 1171 98 | ## 2 CCDG AI IBD 4694 99 | ## 3 CCDG AI T1D 3166 100 | ## 4 CCDG CVD AFib 3731 101 | ## 5 CCDG CVD EOCAD 20156 102 | ## 6 CCDG CVD HemStroke 1358 103 | ## 7 CCDG CVD Stroke 496 104 | ## 8 CCDG NP Alz 2374 105 | ## 9 CCDG NP Autism 17048 106 | ``` 107 | 108 | [TerraPlane]: https://github.com/shwetagopaul92/TerraPlane 109 | [Terra in the Classroom]: https://docs.google.com/presentation/d/1AvEt6UIIx-G5eTe4hlfkGOYsUcSQrKx8ySlnnfg7XH8/edit?usp=sharing -------------------------------------------------------------------------------- /vignettes/training.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Bioc/AnVIL training" 3 | author: "Bioc/AnVIL team" 4 | vignette: > 5 | %\VignetteEngine{knitr::rmarkdown} 6 | %\VignetteIndexEntry{training} 7 | %\VignetteEncoding{UTF-8} 8 | output: 9 | BiocStyle::html_document: 10 | highlight: pygments 11 | number_sections: yes 12 | theme: united 13 | toc: yes 14 | --- 15 | 16 | ## Terra / AnVIL R / Bioconductor Workshops 17 | 18 | The following Terra / AnVIL R / Bioconductor Popup Workshops 19 | were held in 2021 from April to June. 20 | 21 | - Week 1: [Using R / Bioconductor in AnVIL](https://anvilproject.org/learn/data-analysts/using-r-bioconductor-in-anvil) 22 | with Martin Morgan 23 | - Week 2: [The R / Bioconductor AnVIL package](https://anvilproject.org/learn/data-analysts/the-r-bioconductor-anvil-package) 24 | with Martin Morgan and Nitesh Turaga 25 | - Week 3: [Running a Workflow](https://anvilproject.org/learn/data-analysts/running-a-workflow) 26 | with Martin Morgan and Kayla Interdonato 27 | - Week 4: [Single-cell RNASeq with 'Orchestrating Single Cell Analysis' in R / Bioconductor](https://anvilproject.org/learn/data-analysts/single-cell-rnaseq-with-orchestrating-single-cell-analysis-in-r-bioconductor) with Vince Carey 28 | - Week 5: [Using AnVIL for Teaching R](https://anvilproject.org/learn/data-analysts/using-anvil-for-teaching-r-bioconductor) 29 | with Levi Waldron 30 | - Week 6: [Reproducible Research with AnVILPublish](https://anvilproject.org/learn/data-analysts/reproducible-research-with-anvilpublish) 31 | with Martin Morgan 32 | - Week 7: [Participant Stories](https://anvilproject.org/learn/data-analysts/participant-stories) 33 | 34 | [Introduction to the Terra AnVIL Cloud-based Genomics Platform](https://youtu.be/N83iDYE2yQM) with 35 | Sehyun Oh and Levi Waldron at Bioc2021 36 | 37 | [Terra in the Classroom](https://docs.google.com/presentation/d/1AvEt6UIIx-G5eTe4hlfkGOYsUcSQrKx8ySlnnfg7XH8/edit?usp=sharing) 38 | documents the experience of running a small course utilizing AnVIL. Includes some set up, 39 | learned positives and negatives from February 2020. 40 | 41 | ## Workspaces 42 | 43 | See illustrative examples of how you can use Bioconductor in AnVIL workspaces. 44 | 45 | - [The AnVIL package](https://app.terra.bio/#workspaces/bioconductor-rpci-anvil/Bioconductor-Package-AnVIL) (public release pending) 46 | - Simplifies exploration of the Terra data model 47 | - Supports programmatic development of workflows, workspaces, and resource management data 48 | - [Orchestrating Single Cell Analysis](https://app.terra.bio/#workspaces/use-strides/Bioconductor-Workshop-OSCA-3-12) 49 | - Describes how to use a custom container to do all computations in the OSCA book [current to March 2021] 50 | - Note: another workspace processes the book content via AnVILPublish 51 | - [RNA-seq analysis with DESeq2](https://app.terra.bio/#workspaces/bioconductor-rpci-anvil/Bioconductor-Workflow-DESeq2) 52 | - A collection of Jupyter notebooks 53 | - Includes a WDL workflow that runs salmon quantification on FASTQ inputs 54 | - [Pan-cancer Transcriptome Surveys](https://app.terra.bio/#workspaces/landmarkanvil2/pancan_tx_public) (public release pending) 55 | - Two workflows, each devoted to different gene sets 56 | - The WDL and scripts are registered at [dockstore.org](https://dockstore.org/workflows/github.com/vjcitn/BiocOncoTK/msireg1:master?tab=info) 57 | - Unit testing for the script components are managed in [BiocOncoTK](https://github.com/vjcitn/BiocOncoTK/blob/master/tests/testthat/test_dockstore_scripts.R) 58 | 59 | ## Notebooks 60 | 61 | - [Using Bioconductor's VCF processing stack](https://nbviewer.jupyter.org/github/vjcitn/terravar/blob/master/Tiny%20population%20stratification%20display.ipynb) to demonstrate population stratification using a small slice of chr17 from [EBI 1000 genomes VCF](http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/20190312_biallelic_SNV_and_INDEL_README.txt) 62 | - [Using Dockstore and Terra for Pan-cancer Transcriptomics](https://nbviewer.jupyter.org/github/vjcitn/terravar/blob/master/trimmedMondaySep16.ipynb) to compare relationships between gene expression and stratified or continuous measures of microsatellite instability in 33 TCGA tumor types 63 | 64 | ## Miscellaneous 65 | 66 | - Shiny Apps 67 | - [TerraPlane](https://github.com/shwetagopaul92/TerraPlane) filters Dockstore to find methods 68 | based on search terms 69 | - Data Management Utilities 70 | 71 | - Results as of 20 June 2019 72 | 73 | - _R_ markdown for [using terra to survey CCDG and 74 | CM](resources/basicData.Rmd) 75 | 76 | ``` 77 | ## # A tibble: 10 x 3 78 | ## # Groups: study [2] 79 | ## study organ N 80 | ## 81 | ## 1 CCDG AI 9031 82 | ## 2 CCDG CVD 25741 83 | ## 3 CCDG NP 19422 84 | ## 4 CMG Blood 277 85 | ## 5 CMG Brain 1844 86 | ## 6 CMG Eye 552 87 | ## 7 CMG Heart 184 88 | ## 8 CMG Kidney 432 89 | ## 9 CMG Muscle 1722 90 | ## 10 CMG Orphan 717 91 | ``` 92 | 93 | - Drilling down on CCDG 94 | ``` 95 | ## # A tibble: 9 x 4 96 | ## # Groups: study, organ [3] 97 | ## study organ addit N 98 | ## 99 | ## 1 CCDG AI Asthma 1171 100 | ## 2 CCDG AI IBD 4694 101 | ## 3 CCDG AI T1D 3166 102 | ## 4 CCDG CVD AFib 3731 103 | ## 5 CCDG CVD EOCAD 20156 104 | ## 6 CCDG CVD HemStroke 1358 105 | ## 7 CCDG CVD Stroke 496 106 | ## 8 CCDG NP Alz 2374 107 | ## 9 CCDG NP Autism 17048 108 | ``` 109 | -------------------------------------------------------------------------------- /inst/legacy/index.md: -------------------------------------------------------------------------------- 1 | # _Bioconductor_ / _AnVIL_ 2 | 3 | ## Project Activities 4 | 5 | This site summarizes ongoing [_Bioconductor_][] development activities 6 | related to [_AnVIL_][]. 7 | 8 | Learn more [about][] _Bioconductor_ and _AnVIL_. 9 | 10 | ## Project Activities Overview 11 | 12 | - [Available Now](#now) 13 | - [In Progress](#inprogress) 14 | - [Future](#future) 15 | - [Details](#details) 16 | 17 | 18 | ### Available Now 19 | 20 | - Jupyter _R_ / _Bioconductor_ notebooks 21 | ([image][terra-jupyter-bioconductor:image], 22 | [github][terra-jupyter-bioconductor:github]). Use by choosing the 23 | 'Bioconductor' Notebook Runtime. 24 | 25 | - _RStudio_ _R_ / _Bioconductor_ 26 | ([image][anvil-rstudio-bioconductor:image], 27 | [github][anvil-rstudio-bioconductor:github]). Use by selecting a 28 | 'custom' Notebook Runtime and entering the selecting the RStudio image under 29 | `Community Maintained RStudio Environments (Verified Partners)`. The image 30 | is located at the link `us.gcr.io/anvil-gcr-public/anvil-rstudio-bioconductor:0.0.10`. The 31 | image has R-4.0.3 and the latest stable release Bioc 3.12. 32 | 33 | You can also select 'Other Environments' and use our Bioconductor `devel` image. 34 | The image has the current Bioconductor `devel` version (3.13) and 35 | appropriate R version 4.1. The image is located at 36 | `us.gcr.io/anvil-gcr-public/anvil-rstudio-bioconductor-devel:3.13.0` 37 | 38 | - [AnVIL][anvil:bioconductor] _R_ package for user- and 39 | developer-oriented AnVIL-specific functionality. Install with: 40 | 41 | - `BiocManager::install("Bioconductor/AnVIL")` -- _Bioconductor_ 42 | version 3.10 (_R_ 3.6.*) on the AnVIL. 43 | - `BiocManager::install("AnVIL")` -- _Bioconductor_ 44 | version 3.11 (_R_ 4.0.0) or later. 45 | 46 | - Fast binary package installation of 3212 _Bioconductor_ and _CRAN_ package 47 | 48 | - Available on the _RStudio_ _R_ / _Bioconductor_ image (_R_ 4.0.0, 49 | _Bioconductor_ 3.11) ONLY. Use `AnVIL::install("Rsamtools")`. 50 | - 'CRAN'-style repository at 51 | `https://storage.googleapis.com/anvil-rstudio-bioconductor/0.99/3.11`; 52 | images created 27 May 2020. 53 | - See binary builds for more details. 54 | 55 | - [AnVILBilling](https://github.com/bjstubbs/AnVILBilling) _R_ package for tracking and detailing billing and usage of the AnVIL and Terra resources 56 | 57 | 58 | ### In Progress 59 | 60 | 61 | - AnVIL / _Bioconductor_ oriented workshops 62 | 63 | - [BCC 2020][]. See R / Bioconductor in the Cloud description [here][] 64 | - [Bioc 2020][]. Tentative [workshop materials][] 65 | 66 | - Continued development of [Training Materials][] 67 | 68 | - User tool for instrumentation analysis. Cost estimation of RAM, disk, time, 69 | etc. 70 | 71 | 72 | ### Future 73 | 74 | - _Kubernetes_ support for _Bioconductor_ in AnVIL 75 | 76 | 77 | ## Project Activities -- Detailed 78 | 79 | This section provides a more detailed description of projects. 80 | 81 | - [Containers](#containers) 82 | - [User and Developer Tools](#tools) 83 | - [Metadata access and overview](#metadata) 84 | - [Other activities](#other-activities) 85 | 86 | 87 | ### Containers 88 | 89 | The latest terra-jupyter-bioconductor docker containers are available 90 | at [AnVIL][] and on the Google Container Registry ([gcr][]). They work 91 | like the [bioconductor_docker][] images, with the capability to 92 | install 'all of' _Bioconductor_ packages along with a few 93 | pre-installed "core" set of packages. The terra-jupyter-bioconductor 94 | image inherits from the terra-jupyter-r image which has all the system 95 | dependencies installed. This image has been tested on Leonardo and 96 | installs all but a few packages in _Bioconductor_ release,which 97 | fail due to achived CRAN dependencies. 98 | 99 | Jupyter notebooks 100 | 101 | The images are based on _R_ version 4.0 and _Bioconductor_ version 102 | 3.12. 103 | 104 | - [image][terra-jupyter-bioconductor:image], 105 | [github][terra-jupyter-bioconductor:github], _Bioconductor_ 106 | [github][terra-jupyter-bioconductor:github] (use for 107 | _Bioconductor_-specific issues). 108 | 109 | RStudio / _Bioconductor_ 110 | 111 | The image now has R-4.0.3 and the latest stable release Bioc 3.12. 112 | 113 | - [image][anvil-rstudio-bioconductor:image], 114 | [github][anvil-rstudio-bioconductor:github]. 115 | 116 | - The terra-rstudio-bioconductor docker container is currently 117 | available as a custom 'bring your own' image to terra while work on 118 | a fully integrated RStudio environment is being developed. 119 | 120 | 121 | ### User and Developer Tools 122 | 123 | AnVIL package ([_Bioconductor_][anvil:bioconductor], 124 | [github][anvil:github]). 125 | 126 | - This _R_ package provides both developer-oriented and user-oriented 127 | AnVIL-specific functionality. 128 | - `av*()` facilities for interacting with workspace data elements. 129 | - `gsutil_*()` facilities for interacting with the Google cloud. 130 | - Developer-oriented access to major AnVIL components (Terra, 131 | Leonardo, Dockstore, and Gen3) REST APIs. Bearer-token 132 | authentication requires gcloud sdk installation. Work on expanding 133 | and implemented additional REST APIs in on-going. 134 | 135 | Binary package installation (under development) 136 | 137 | - Because the software environment is fixed by the container, packages 138 | can be pre-built and rapidly installed simply by copying from an 139 | online repository. We are developing the tooling to support this 140 | repository (as folders in google buckets) and to facilitate easy 141 | installation (via the `AnVIL::install()` function). 142 | 143 | 144 | Notes on construction of binary package images within AnVIL 145 | 146 | 1. Install AnVIL -- do with Ncpus > 1 147 | 2. Allow updates 148 | 3. NOT YET: set `options(repos=AnVIL::repositories())` to get fast 149 | install of CRAN packages 150 | 4. `BiocManager::install("vjcitn/BiocBBSpack", Ncpus=10)` 151 | 5. `library(BiocBBSpack)` 152 | 6. Retrieve manifest from Bioconductor git `pl = get_bioc_packagelist()` 153 | 7. `BiocManager::install(pl, Ncpus=50)` -- this gets us 3212 packages 154 | binary packages ... odd situation for affypdnn not available for 155 | 3.11 but why in manifest? These packages do not install: 156 | 157 | ``` 158 | > dput(sort(setdiff(pl, installed))) 159 | c("affypdnn", "anamiR", "BatchQC", "CALIB", "ccfindR", "cellGrowth", 160 | "cellTree", "CHARGE", "chroGPS", "cobindR", "CountClust", "CTDquerier", 161 | "CVE", "debrowser", "DEDS", "Doscheda", "flowFit", "GeneGeneInteR", 162 | "Genominator", "gpuMagic", "IdMappingAnalysis", "IdMappingRetrieval", 163 | "Imetagene", "lol", "lpNet", "LVSmiRNA", "manta", "MCRestimate", 164 | "Melissa", "MoonlightR", "MSGFgui", "MSGFplus", "MTseeker", "nem", 165 | "netbenchmark", "nethet", "PAPi", "PathwaySplice", "pcaGoPromoter", 166 | "pint", "proteoQC", "QUALIFIER", "R3CPET", "readat", "RIPSeeker", 167 | "SANTA", "scAlign", "sparsenetgls", "splicegear", "trena", "waveTiling", 168 | "xps", "YAPSA") 169 | ``` 170 | 171 | 8. Use `dotarmv()` as follows 172 | 173 | ``` 174 | setwd(.libPaths()[1]) 175 | jnk = lapply(dir(), dotarmv) # could probably be done with mclapply or bash 176 | ``` 177 | 178 | binaries will appear in the `dest=` argument of `dotarmv()` 179 | 180 | 181 | 9. Set up https://storage.googleapis.com/anvil-rstudio-bioconductor/0.99/3.11/src/contrib/ 182 | 183 | 10. Use `gsutil -m cp` to copy content of dotarmv to the src/contrib bucket 184 | 185 | 11. Create PACKAGES.gz using `tools::write_PACKAGE(unpacked=TRUE)`, copy to src/contrib 186 | 187 | 188 | ### Metadata access and overview 189 | 190 | 201 | 202 | AnVIL package tools can be used to discover incompatibilities 203 | or ambiguities in study annotation. BJ's class worked through 204 | metadata survey exercises. An example of incompatible/ambiguous 205 | annotation is present in the Autism workspaces. 206 | 207 | 208 | 209 | We are looking at two studies from NYGC referring to autism, one has substring 210 | ACE2 and the other SSC. What we see above is that AFFECTION_STATUS is coded 1/2 in the SSC study, 211 | and more prosaically in the ACE2 study. It may be that the 212 | labels in ACE2 study are more problematic as the options seem to be "0", "ASD affected", 213 | "ASD Affected", and "Diagnosis uncertain" -- or perhaps it is just a letter casing issue. 214 | 215 | The ingestion group was notified and replied that "there is no process for the 216 | AnVIL team to retrospectively address existing data". Interest was expressed in 217 | learning more about our metadata survey capabilities. 218 | 219 | 220 | ### Other activities 221 | 222 | _Kubernetes_ 223 | 224 | - [Slides](https://docs.google.com/presentation/d/1Y7g_6X8I6DPaNK84EzWNo1wVpfAwdORGt6kcgcPYOV4/edit?usp=sharing) 225 | 226 | - Illustration of working with R and Kubernetes : https://github.com/shwetagopaul92/hgvarByKub 227 | 228 | - [k8sredis][] An alternative approach: k8s with redis work queue and 229 | BiocParallel functionality. Start a number of parallel jobs on k8s, 230 | then an interactive 'manager' (e.g., RStudio session; Jupyter 231 | notebook). Once in R one can 232 | 233 | ``` 234 | library(RedisParam) 235 | fun = function(i, ...) { 236 | Sys.sleep(1) 237 | system("hostname", intern=TRUE) 238 | } 239 | 240 | p <- RedisParam( 241 | workers = 5, jobname = "demo", 242 | is.worker = FALSE 243 | ) 244 | res <- bplapply(1:13, fun, BPPARAM = p) 245 | 246 | table(unlist(res)) 247 | ## five-worker-5ns79 five-worker-8gh5q 248 | ## 2 2 249 | ## five-worker-dvdtv five-worker-wt5jq 250 | ## 3 3 251 | ## five-worker-zwlpw 252 | ## 3 253 | ``` 254 | 255 | [_Bioconductor_]: https://bioconductor.org 256 | [_AnVIL_]: https://anvilproject.org 257 | [about]: about 258 | [Training Materials]: training 259 | [AnVIL]: https://anvil.terra.app 260 | [AnVIL_package]: https://github.com/Bioconductor/AnVIL 261 | [gcr]: https://console.cloud.google.com/gcr/images/broad-dsp-gcr-public/US/terra-jupyter-bioconductor 262 | [bioconductor_docker]: https://github.com/Bioconductor/bioconductor_docker 263 | [k8sredis]: https://github.com/Bioconductor/k8sredis 264 | [BCC 2020]: https://bcc2020.github.io/ 265 | [here]: https://bcc2020.github.io/training/ 266 | [Bioc 2020]: http://bioc2020.bioconductor.org/ 267 | [workshop materials]: https://github.com/waldronlab/AnVILWorkshop 268 | 269 | [anvil:bioconductor]: https://bioconductor.org/packages/AnVIL 270 | [anvil:github]: https://github.com/Bioconductor/AnVIL 271 | 272 | [terra-jupyter-bioconductor:image]: https://us.gcr.io/broad-dsp-gcr-public/terra-jupyter-bioconductor:1.0.11 273 | [terra-jupyter-bioconductor:github]: https://github.com/DataBiosphere/terra-docker/tree/master/terra-jupyter-bioconductor 274 | [terra-jupyter-bioconductor:bioconductor]: https://github.com/Bioconductor/terra-docker 275 | 276 | [anvil-rstudio-bioconductor:image]: https://us.gcr.io/anvil-gcr-public/anvil-rstudio-bioconductor:0.0.10 277 | [anvil-rstudio-bioconductor:github]: https://github.com/anvilproject/anvil-docker 278 | 279 | [Google Container Registry]: https://cloud.google.com/container-registry/docs/pushing-and-pulling 280 | --------------------------------------------------------------------------------