├── code └── README ├── docs ├── .gitignore ├── _config.yml ├── Gemfile ├── images │ ├── ccdgAFF.png │ ├── favicon.ico │ ├── anvil_logo.jpg │ ├── logo_nhgri_w.png │ ├── logo_bioconductor.gif │ └── teampics │ │ ├── SehyunOh.jpg │ │ ├── VinceCarey.jpg │ │ ├── LeviWaldron.jpg │ │ ├── LoriShepherd.jpg │ │ ├── MarcelRamos.jpg │ │ ├── MartinMorgan.jpg │ │ └── NiteshTuraga.jpeg ├── project_management.md ├── team.md ├── about.md ├── training │ └── basicData.Rmd ├── FaceToFace_Jan2019.md ├── _layouts │ └── default.html ├── training.md └── index.md ├── README.md └── README_GITHUB_IO.md /code/README: -------------------------------------------------------------------------------- 1 | Code for testing / developing Bioconductor in AnVIL. 2 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | .bundle 2 | Gemfile.lock 3 | _site 4 | vendor 5 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | title: Bioconductor / AnVIL 2 | theme: jekyll-theme-minimal -------------------------------------------------------------------------------- /docs/Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | gem 'github-pages', group: :jekyll_plugins 3 | -------------------------------------------------------------------------------- /docs/images/ccdgAFF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/AnVIL_Admin/master/docs/images/ccdgAFF.png -------------------------------------------------------------------------------- /docs/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/AnVIL_Admin/master/docs/images/favicon.ico -------------------------------------------------------------------------------- /docs/images/anvil_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/AnVIL_Admin/master/docs/images/anvil_logo.jpg -------------------------------------------------------------------------------- /docs/images/logo_nhgri_w.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/AnVIL_Admin/master/docs/images/logo_nhgri_w.png -------------------------------------------------------------------------------- /docs/images/logo_bioconductor.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/AnVIL_Admin/master/docs/images/logo_bioconductor.gif -------------------------------------------------------------------------------- /docs/images/teampics/SehyunOh.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/AnVIL_Admin/master/docs/images/teampics/SehyunOh.jpg -------------------------------------------------------------------------------- /docs/images/teampics/VinceCarey.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/AnVIL_Admin/master/docs/images/teampics/VinceCarey.jpg -------------------------------------------------------------------------------- /docs/images/teampics/LeviWaldron.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/AnVIL_Admin/master/docs/images/teampics/LeviWaldron.jpg -------------------------------------------------------------------------------- /docs/images/teampics/LoriShepherd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/AnVIL_Admin/master/docs/images/teampics/LoriShepherd.jpg -------------------------------------------------------------------------------- /docs/images/teampics/MarcelRamos.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/AnVIL_Admin/master/docs/images/teampics/MarcelRamos.jpg -------------------------------------------------------------------------------- /docs/images/teampics/MartinMorgan.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/AnVIL_Admin/master/docs/images/teampics/MartinMorgan.jpg -------------------------------------------------------------------------------- /docs/images/teampics/NiteshTuraga.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bioconductor/AnVIL_Admin/master/docs/images/teampics/NiteshTuraga.jpeg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bioconductor / AnVIL Admin Repository 2 | 3 | Visit https://bioconductor.github.io/AnVIL_Admin 4 | 5 | This repository is a place to communicate and track progress by 6 | _Bioconductor_ participants in the AnVIL project. 7 | 8 | See [projects][] for basic project management. 9 | 10 | See [README_GITHUB_IO][] for instructions on editing (including local 11 | previews) the github.io site. 12 | 13 | [projects]: https://github.com/Bioconductor/AnVIL_Admin/projects 14 | [README_GITHUB_IO]: https://github.com/Bioconductor/AnVIL_Admin/blob/master/README_GITHUB_IO.md 15 | -------------------------------------------------------------------------------- /docs/project_management.md: -------------------------------------------------------------------------------- 1 | # Bioconductor / AnVIL 2 | 3 | ## Project management 4 | 5 | Some visibility into _Bioconductor_-specfic project management, 6 | including [current developement][] effort and [AnVIL Year 2 Plan][] are 7 | available as github projects. [Legacy][] project boards are also available. 8 |
9 |
10 | For quick glance of project updates: [Project Activities][] 11 | 12 | [current developement]: https://github.com/Bioconductor/AnVIL_Admin/projects/5 13 | [AnVIL Year 2 Plan]: https://github.com/Bioconductor/AnVIL_Admin/projects/6 14 | [Legacy]: https://github.com/Bioconductor/AnVIL_Admin/projects?query=is%3Aclosed 15 | [Project Activities]: index -------------------------------------------------------------------------------- /docs/team.md: -------------------------------------------------------------------------------- 1 | # Bioconductor / AnVIL 2 | 3 | ## Team 4 | 5 | ### Current Developers 6 | 7 | Vincent Carey 9 |
Vincent Carey 10 | 11 | Martin Morgan 13 |
Martin Morgan (PI) 14 | 15 | Sehyun Oh 17 |
Sehyun Oh 18 | 19 | Marcel Ramos 21 |
Marcel Ramos 22 | 23 | Lori Shepherd 25 |
Lori Ann Shepherd (PM) 26 | 27 | BJ Stubbs 29 |
BJ Stubbs 30 | 31 | Nitesh Turaga 33 |
Nitesh Turaga 34 | 35 | Levi Waldron 37 |
Levi Waldron 38 | 39 | ### Past Developers 40 | 41 | -------------------------------------------------------------------------------- /README_GITHUB_IO.md: -------------------------------------------------------------------------------- 1 | This repository contains material for the _Bioconductor_ / _AnVIL_ 2 | development [web site][1]. 3 | 4 | Edit or add material as markdown files in the docs/ directory. Please 5 | wrap lines to 80 character width and aim for simple markdown rather 6 | than elaborate html or other content. 7 | 8 | Sidebar links to new pages can be added by editing 9 | `docs/_layouts/default.html`. 10 | 11 | Please follow best practices by previewing changes locally. 12 | 13 | 1. Make sure that ruby and bundler are installed, following the 14 | 'Requirements' section of [GitHub's documentation][2]. 15 | 16 | 2. Clone the repository and switch to the `docs/` directory 17 | 18 | cd BioC2019/docs 19 | 20 | 3. Install or update bundler to install the ruby pre-requisities. 21 | 22 | gem install --user-install bundler 23 | # If the installer complains, add the suggested $PATH_TO_RUBY/bin 24 | # directory to your ~/.bash_profile or ~/.bashrc or similar. 25 | 26 | 4. Install ruby pre-requisites. 27 | 28 | bundle install --path vendor/bundle # once only; references Gemfile 29 | 30 | 5. Execute the jekyll server 31 | 32 | bundle exec jekyll serve 33 | 34 | and view the results at https://localhost:4000 35 | 36 | [1]: https://bioconductor.github.io/AnVIL_Admin 37 | [2]: https://help.github.com/articles/setting-up-your-github-pages-site-locally-with-jekyll/#requirements 38 | -------------------------------------------------------------------------------- /docs/about.md: -------------------------------------------------------------------------------- 1 | # Bioconductor / AnVIL 2 | 3 | ## About _Bioconductor_ and _AnVIL_ 4 | 5 | [_Bioconductor_][] is a successful open-source project for the 6 | statistical analysis and comprehension of high throughput genomic 7 | data. _Bioconductor_ is based on the _R_ programming language. It 8 | consists of more than 1600 [_R_ packages][] contributed by more than 9 | 1000 maintainers world-wide. _Bioconductor_ packages are downloaded to 1/2 10 | million IP addresses annualy. There are more than 29,000 11 | [PubMedCentral citations][] to _Bioconductor_. _Bioconductor_ has an 12 | active [support site][] and [community slack][]. 13 | 14 | [_AnVIL_][] is a US National Institutes of Health / National Human 15 | Genome Research Institute initiative to develop a Genomic Data Science 16 | Analysis, Visualization, and Informatics Lab-space (AnVIL). 17 | 18 | Ultimately, _AnVIL_ will provide the ability to launch RStudio, 19 | Jupyter notebook, and other _R_ / _Bioconductor_ resources in a 20 | computational cloud. The _AnVIL_ environment will provide secure access 21 | to large-scale as well as individual data resources, and to scalable 22 | cloud-based computational environments. 23 | 24 | [_Bioconductor_]: https://bioconductor.org 25 | [_AnVIL_]: https://www.genome.gov/27569268/genomic-analysis-visualization-and-informatics-labspace-anvil/ 26 | [_R_ packages]: https://bioconductor.org/packages 27 | [PubMedCentral citations]: https://www.ncbi.nlm.nih.gov/pmc/?term=bioconductor&sort=ePubDate 28 | [support site]: https://support.bioconductor.org 29 | [community slack]: https://bioc-community.herokuapp.com/ 30 | -------------------------------------------------------------------------------- /docs/training/basicData.Rmd: -------------------------------------------------------------------------------- 1 | # Basic dataset sizes in AnVIL workspaces visible to AnVIL_Devs 2 | 3 | VJ Carey, BJ Stubbs 4 | 5 | 6 | 7 | ```{r setup, echo=FALSE, results="hide"} 8 | suppressMessages({ 9 | library(AnVIL) 10 | library(rjson) 11 | library(httr) 12 | library(tibble) 13 | library(dplyr) 14 | library(magrittr) 15 | }) 16 | 17 | participantCount = function(studyTitle) { 18 | types = httr::content( 19 | terra$getEntityTypes("anvil-datastorage", 20 | studyTitle)) 21 | types$participant$count 22 | } 23 | 24 | sampleAtts = function(studyTitle) { 25 | atts = httr::content( 26 | terra$getEntities( 27 | "anvil-datastorage", 28 | studyTitle, 29 | "sample")) 30 | atts 31 | } 32 | 33 | basicDataFrame = function() { 34 | ws = AnVIL::terra$listWorkspaces() 35 | wscon = content(ws, type="text/json") 36 | ww = fromJSON(wscon) 37 | alln = sapply(ww, function(x) x$workspace$name) 38 | anvnames = grep("AnVIL", alln, value=TRUE) -> anvnames 39 | toks = strsplit(anvnames, "_") 40 | stud = sapply(toks, "[", 2) 41 | site = sapply(toks, "[", 3) 42 | org = sapply(toks, "[", 4) 43 | t5 = sapply(toks, "[", 5) 44 | t6 = sapply(toks, "[", 6) 45 | nums = lapply(anvnames, participantCount) 46 | bad = which(sapply(nums, is.null)) 47 | if (length(bad)>0) { 48 | nums[bad]=NA 49 | nums = unlist(nums) 50 | } 51 | data.frame(study=stud, site=site, organ=org, 52 | N=nums, addit=t5, ext=t6, name=anvnames, 53 | stringsAsFactors=FALSE) 54 | } 55 | 56 | setClass("AnVILStudies", 57 | representation(basic="data.frame")) 58 | AnVILStudies = function(dataframe) { 59 | new("AnVILStudies", basic=dataframe) } 60 | setMethod("show", "AnVILStudies", function(object) { 61 | print(as_tibble(object@basic)) 62 | }) 63 | #sa = sampleAtts("AnVIL_CCDG_WashU_CVD_EOCAD_Emerge_WGS") 64 | 65 | ``` 66 | 67 | ```{r getbasic} 68 | basedf = basicDataFrame() 69 | ``` 70 | We are using the AnVIL package of Bioconductor to 71 | query and summarize the AnVIL workspaces. There are 72 | `r nrow(basedf)` workspaces with 'AnVIL' as the 73 | leading substring of workspace name. 74 | 75 | ```{r baic} 76 | as_tibble(basedf) 77 | ``` 78 | 79 | The number of samples is usually available in an 80 | attribute, but there are `r sum(is.na(basedf$N))` 81 | studies where this is not available. 82 | 83 | It is of interest to tabulate numbers of samples 84 | by anatomic organ. 85 | ```{r byorg} 86 | as_tibble(basedf) %>% 87 | group_by(study, organ) %>% 88 | summarise(N=sum(N, na.rm=TRUE)) 89 | ``` 90 | 91 | For CCDG we can partition a bit more finely. 92 | 93 | ```{r doccd} 94 | basedf %>% filter(study=="CCDG") %>% 95 | group_by(study,organ,addit) %>% 96 | summarise(N=sum(N, na.rm=TRUE)) 97 | ``` 98 | -------------------------------------------------------------------------------- /docs/FaceToFace_Jan2019.md: -------------------------------------------------------------------------------- 1 | # Face to Face Meeting: Baltimore Jan 10-11, 2019 2 | 3 | ## Resources (for reference) 4 | 5 | - Previous slides: https://docs.google.com/presentation/d/1rhffdH_uG6tyPZt67ie4P_NKyWJIk6DqMOgGrA8dnyI/edit?usp=sharing 6 | 7 | - Informal notes from first face-to-face meeting https://docs.google.com/document/d/1expHfjCHqgU2FqacQK7R5G_OiZDZ12xvZUhwYUw5emU/edit 8 | 9 | - 'Ideal AnVIL architecture' 10 | 11 | ## Platform and expertise 12 | 13 | - Bioconductor recap -- 'Statistical analysis & comprehension'; interactive; core and contributed packages; typical use with some but far from all packages used. 14 | 15 | - Primary use is INTERACTIVE 16 | - Bioconductor used through Jupyter / RStudio 'front end' 17 | 18 | ## Role in AnVIL 19 | 20 | 1. Initial development 21 | 22 | - Standardized, correct containers for essential Bioconductor packages through RStudio & Jupyter. 'Release' and 'devel' flavors. See https://github.com/Bioconductor/AnVIL_Docker (work in progesss) 23 | 24 | - R-based Leonardo REST interface -- primarily to help developers. See https://github.com/Bioconductor/AnVIL 25 | 26 | - Existing Bioconductor cloud-based activities -- Annotation & ExperimentHub, BiocOncoTK, ... 27 | 28 | 2. Single RStudio / Jupyter instance user interface to AnVIL 29 | 30 | - Access to user and protected data using standard R idioms. Maybe data resources are just 'files' on the file system and no special implementation is required; maybe data resource metadata needs to be 'discovered' and presented to the user through some kind of R / Bioconductor based interface. 31 | 32 | - Access to 'standard' (??) Firecloud / Terra services -- discover & run services on user and protected data from within R. E.g., perform https://software.broadinstitute.org/firecloud/documentation/article?id=11115 but from an R script. 33 | 34 | - Essential to have standard APIs published early to write R-level functions around 35 | 36 | 3. Several R / Bioconductor instances 37 | 38 | - Launch and manage many R / Bioconductor instances in firecloud; interact via BiocParallel-like task distribution 39 | 40 | - Management via CloudMan / Kubernetes / 'native' Firecloud / ... 41 | 42 | - Implement Terra services in R / Bioconductor 43 | - ?? WDL-enabled R (e.g., Sean Davis' [wdlRunR][1]) 44 | - WDL fits well with Bioc formal objects (vs. R's more relaxed approach) 45 | - ?? provided as container. 46 | 47 | - Importance of scalable container infrastructure 48 | - Use Galaxy CloudMan or Firecloud infrastructure to spin up instances 49 | - Communicate & drive from within 'manager' RStudio instance, e.g., via BiocParallel & rabbitmq parallel evaluation 50 | 51 | 4. Training 52 | 53 | 5. What we do right 54 | 55 | [1]: https://github.com/seandavi/wdlRunR 56 | -------------------------------------------------------------------------------- /docs/_layouts/default.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | {% seo %} 8 | 9 | 10 | 11 | 12 | 13 | 16 | 17 | 18 |
19 |
20 |

21 | 22 | Bioconductor 26 | 27 |

28 | 29 |

30 | About
31 |  -Team
32 | Project Activities
33 |  - Available now
34 |  - In progress
35 |  - Future
36 |  - Details
37 | Training
38 | Project management
39 |

40 |
41 |

42 | 43 | NHGRI 48 | 49 |
50 | 51 | NHGRI 56 | 57 |

58 |

59 | Bioconductor https://bioconductor.org
60 | AnVIL_Admin repository
61 | Hosted on GitHub Pages
62 | Theme by orderedlist. 63 |

64 |
65 |
66 | 67 | {{ content }} 68 | 69 |
70 | 72 |
73 | 74 | 75 | {% if site.google_analytics %} 76 | 85 | {% endif %} 86 | 87 | 88 | -------------------------------------------------------------------------------- /docs/training.md: -------------------------------------------------------------------------------- 1 | # _Bioconductor_ / _AnVIL_ 2 | 3 | ## Training material 4 | 5 | The material here is 'in development' and meant to orient the 6 | _Bioconductor_ community to use of _AnVIL_ resources. See also [AnVIL 7 | documentation][AnVIL-docs] resources. 8 | 9 | [AnVIL-docs]: https://support.terra.bio/hc/en-us 10 | 11 | - [Workshops](#workshops) 12 | - [_Terra_ workspaces / workflows](#workspaces) 13 | - [Notebooks](#notebooks) 14 | - [Dockstore](#dockstore) 15 | - [Other](#other) 16 | 17 | 18 | ### Workshops 19 | 20 | - [Terra in the Classroom][] 21 | 22 | - Documentation on running a small course utilizing AnVIL. This includes some 23 | set up information as well as the learned positives and negatives as of 24 | February 2020. 25 | 26 | 27 | 28 | ### _Terra_ Workspaces / Workflows 29 | 30 | - (proof-of-principle) A workspace for [pan-cancer transcriptome 31 | surveys][pancanlink]. This workspace includes two workflows, each 32 | devoted to different gene sets. The WDL and associated scripts are 33 | [registered][dockstorelink] at dockstore.org. Unit testing for the 34 | script components is managed in the Bioconductor [BiocOncoTK 35 | package][vjconcohub] (developer repo). 36 | 37 | [pancanlink]: https://app.terra.bio/#workspaces/landmarkanvil2/pancan_tx_public 38 | [dockstorelink]: https://dockstore.org/workflows/github.com/vjcitn/BiocOncoTK/msireg1:master?tab=info 39 | [vjconcohub]: https://github.com/vjcitn/BiocOncoTK/blob/master/tests/testthat/test_dockstore_scripts.R 40 | 41 | 42 | 43 | ### Notebooks 44 | 45 | - (proof-of-principle) [Using Bioconductor's VCF processing stack][vcf stack] 46 | to demonstrate population stratification using a small slice of 47 | chr17 from the [new EBI 1000 genomes VCF][1kvcf]. 48 | 49 | - (proof-of-principle) [Using dockstore+terra for pancancer 50 | transcriptomics][pancantx] to compare relationships between gene 51 | expression and stratified or continuous measures of microsatellite 52 | instability in 33 TCGA tumor types. 53 | 54 | [vcf stack]: https://nbviewer.jupyter.org/github/vjcitn/terravar/blob/master/Tiny%20population%20stratification%20display.ipynb 55 | [1kvcf]: http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/20190312_biallelic_SNV_and_INDEL_README.txt 56 | [pancantx]: https://nbviewer.jupyter.org/github/vjcitn/terravar/blob/master/trimmedMondaySep16.ipynb 57 | 58 | 59 | ### Dockstore 60 | 61 | 62 | ### Other 63 | 64 | - Shiny Apps 65 | 66 | - [TerraPlane][] to help filter dockstore to find methods based on search term 67 | 68 | - Data management utilies 69 | 70 | - _R_ markdown for [using terra to survey CCDG and 71 | CMG](training/basicData.Rmd) 72 | 73 | - Results as of 20 June 2019 74 | ``` 75 | ## # A tibble: 10 x 3 76 | ## # Groups: study [2] 77 | ## study organ N 78 | ## 79 | ## 1 CCDG AI 9031 80 | ## 2 CCDG CVD 25741 81 | ## 3 CCDG NP 19422 82 | ## 4 CMG Blood 277 83 | ## 5 CMG Brain 1844 84 | ## 6 CMG Eye 552 85 | ## 7 CMG Heart 184 86 | ## 8 CMG Kidney 432 87 | ## 9 CMG Muscle 1722 88 | ## 10 CMG Orphan 717 89 | ``` 90 | 91 | - Drilling down on CCDG 92 | ``` 93 | ## # A tibble: 9 x 4 94 | ## # Groups: study, organ [3] 95 | ## study organ addit N 96 | ## 97 | ## 1 CCDG AI Asthma 1171 98 | ## 2 CCDG AI IBD 4694 99 | ## 3 CCDG AI T1D 3166 100 | ## 4 CCDG CVD AFib 3731 101 | ## 5 CCDG CVD EOCAD 20156 102 | ## 6 CCDG CVD HemStroke 1358 103 | ## 7 CCDG CVD Stroke 496 104 | ## 8 CCDG NP Alz 2374 105 | ## 9 CCDG NP Autism 17048 106 | ``` 107 | 108 | [TerraPlane]: https://github.com/shwetagopaul92/TerraPlane 109 | [Terra in the Classroom]: https://docs.google.com/presentation/d/1AvEt6UIIx-G5eTe4hlfkGOYsUcSQrKx8ySlnnfg7XH8/edit?usp=sharing -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # _Bioconductor_ / _AnVIL_ 2 | 3 | ## Project Activities 4 | 5 | This site summarizes ongoing [_Bioconductor_][] development activities 6 | related to [_AnVIL_][]. 7 | 8 | Learn more [about][] _Bioconductor_ and _AnVIL_. 9 | 10 | ## Project Activities Overview 11 | 12 | - [Available Now](#now) 13 | - [In Progress](#inprogress) 14 | - [Future](#future) 15 | - [Details](#details) 16 | 17 | 18 | ### Available Now 19 | 20 | - Jupyter _R_ / _Bioconductor_ notebooks 21 | ([image][terra-jupyter-bioconductor:image], 22 | [github][terra-jupyter-bioconductor:github]). Use by choosing the 23 | 'Bioconductor' Notebook Runtime. 24 | 25 | - _RStudio_ _R_ / _Bioconductor_ 26 | ([image][anvil-rstudio-bioconductor:image], 27 | [github][anvil-rstudio-bioconductor:github]). Use by selecting a 28 | 'custom' Notebook Runtime and entering the selecting the RStudio image under 29 | `Community Maintained RStudio Environments (Verified Partners)`. The image 30 | is located at the link `us.gcr.io/anvil-gcr-public/anvil-rstudio-bioconductor:0.0.10`. The 31 | image has R-4.0.3 and the latest stable release Bioc 3.12. 32 | 33 | You can also select 'Other Environments' and use our Bioconductor `devel` image. 34 | The image has the current Bioconductor `devel` version (3.13) and 35 | appropriate R version 4.1. The image is located at 36 | `us.gcr.io/anvil-gcr-public/anvil-rstudio-bioconductor-devel:3.13.0` 37 | 38 | - [AnVIL][anvil:bioconductor] _R_ package for user- and 39 | developer-oriented AnVIL-specific functionality. Install with: 40 | 41 | - `BiocManager::install("Bioconductor/AnVIL")` -- _Bioconductor_ 42 | version 3.10 (_R_ 3.6.*) on the AnVIL. 43 | - `BiocManager::install("AnVIL")` -- _Bioconductor_ 44 | version 3.11 (_R_ 4.0.0) or later. 45 | 46 | - Fast binary package installation of 3212 _Bioconductor_ and _CRAN_ package 47 | 48 | - Available on the _RStudio_ _R_ / _Bioconductor_ image (_R_ 4.0.0, 49 | _Bioconductor_ 3.11) ONLY. Use `AnVIL::install("Rsamtools")`. 50 | - 'CRAN'-style repository at 51 | `https://storage.googleapis.com/anvil-rstudio-bioconductor/0.99/3.11`; 52 | images created 27 May 2020. 53 | - See binary builds for more details. 54 | 55 | - [AnVILBilling](https://github.com/bjstubbs/AnVILBilling) _R_ package for tracking and detailing billing and usage of the AnVIL and Terra resources 56 | 57 | 58 | ### In Progress 59 | 60 | 61 | - AnVIL / _Bioconductor_ oriented workshops 62 | 63 | - [BCC 2020][]. See R / Bioconductor in the Cloud description [here][] 64 | - [Bioc 2020][]. Tentative [workshop materials][] 65 | 66 | - Continued development of [Training Materials][] 67 | 68 | - User tool for instrumentation analysis. Cost estimation of RAM, disk, time, 69 | etc. 70 | 71 | 72 | ### Future 73 | 74 | - _Kubernetes_ support for _Bioconductor_ in AnVIL 75 | 76 | 77 | ## Project Activities -- Detailed 78 | 79 | This section provides a more detailed description of projects. 80 | 81 | - [Containers](#containers) 82 | - [User and Developer Tools](#tools) 83 | - [Metadata access and overview](#metadata) 84 | - [Other activities](#other-activities) 85 | 86 | 87 | ### Containers 88 | 89 | The latest terra-jupyter-bioconductor docker containers are available 90 | at [AnVIL][] and on the Google Container Registry ([gcr][]). They work 91 | like the [bioconductor_docker][] images, with the capability to 92 | install 'all of' _Bioconductor_ packages along with a few 93 | pre-installed "core" set of packages. The terra-jupyter-bioconductor 94 | image inherits from the terra-jupyter-r image which has all the system 95 | dependencies installed. This image has been tested on Leonardo and 96 | installs all but a few packages in _Bioconductor_ release,which 97 | fail due to achived CRAN dependencies. 98 | 99 | Jupyter notebooks 100 | 101 | The images are based on _R_ version 4.0 and _Bioconductor_ version 102 | 3.12. 103 | 104 | - [image][terra-jupyter-bioconductor:image], 105 | [github][terra-jupyter-bioconductor:github], _Bioconductor_ 106 | [github][terra-jupyter-bioconductor:github] (use for 107 | _Bioconductor_-specific issues). 108 | 109 | RStudio / _Bioconductor_ 110 | 111 | The image now has R-4.0.3 and the latest stable release Bioc 3.12. 112 | 113 | - [image][anvil-rstudio-bioconductor:image], 114 | [github][anvil-rstudio-bioconductor:github]. 115 | 116 | - The terra-rstudio-bioconductor docker container is currently 117 | available as a custom 'bring your own' image to terra while work on 118 | a fully integrated RStudio environment is being developed. 119 | 120 | 121 | ### User and Developer Tools 122 | 123 | AnVIL package ([_Bioconductor_][anvil:bioconductor], 124 | [github][anvil:github]). 125 | 126 | - This _R_ package provides both developer-oriented and user-oriented 127 | AnVIL-specific functionality. 128 | - `av*()` facilities for interacting with workspace data elements. 129 | - `gsutil_*()` facilities for interacting with the Google cloud. 130 | - Developer-oriented access to major AnVIL components (Terra, 131 | Leonardo, Dockstore, and Gen3) REST APIs. Bearer-token 132 | authentication requires gcloud sdk installation. Work on expanding 133 | and implemented additional REST APIs in on-going. 134 | 135 | Binary package installation (under development) 136 | 137 | - Because the software environment is fixed by the container, packages 138 | can be pre-built and rapidly installed simply by copying from an 139 | online repository. We are developing the tooling to support this 140 | repository (as folders in google buckets) and to facilitate easy 141 | installation (via the `AnVIL::install()` function). 142 | 143 | 144 | Notes on construction of binary package images within AnVIL 145 | 146 | 1. Install AnVIL -- do with Ncpus > 1 147 | 2. Allow updates 148 | 3. NOT YET: set `options(repos=AnVIL::repositories())` to get fast 149 | install of CRAN packages 150 | 4. `BiocManager::install("vjcitn/BiocBBSpack", Ncpus=10)` 151 | 5. `library(BiocBBSpack)` 152 | 6. Retrieve manifest from Bioconductor git `pl = get_bioc_packagelist()` 153 | 7. `BiocManager::install(pl, Ncpus=50)` -- this gets us 3212 packages 154 | binary packages ... odd situation for affypdnn not available for 155 | 3.11 but why in manifest? These packages do not install: 156 | 157 | ``` 158 | > dput(sort(setdiff(pl, installed))) 159 | c("affypdnn", "anamiR", "BatchQC", "CALIB", "ccfindR", "cellGrowth", 160 | "cellTree", "CHARGE", "chroGPS", "cobindR", "CountClust", "CTDquerier", 161 | "CVE", "debrowser", "DEDS", "Doscheda", "flowFit", "GeneGeneInteR", 162 | "Genominator", "gpuMagic", "IdMappingAnalysis", "IdMappingRetrieval", 163 | "Imetagene", "lol", "lpNet", "LVSmiRNA", "manta", "MCRestimate", 164 | "Melissa", "MoonlightR", "MSGFgui", "MSGFplus", "MTseeker", "nem", 165 | "netbenchmark", "nethet", "PAPi", "PathwaySplice", "pcaGoPromoter", 166 | "pint", "proteoQC", "QUALIFIER", "R3CPET", "readat", "RIPSeeker", 167 | "SANTA", "scAlign", "sparsenetgls", "splicegear", "trena", "waveTiling", 168 | "xps", "YAPSA") 169 | ``` 170 | 171 | 8. Use `dotarmv()` as follows 172 | 173 | ``` 174 | setwd(.libPaths()[1]) 175 | jnk = lapply(dir(), dotarmv) # could probably be done with mclapply or bash 176 | ``` 177 | 178 | binaries will appear in the `dest=` argument of `dotarmv()` 179 | 180 | 181 | 9. Set up https://storage.googleapis.com/anvil-rstudio-bioconductor/0.99/3.11/src/contrib/ 182 | 183 | 10. Use `gsutil -m cp` to copy content of dotarmv to the src/contrib bucket 184 | 185 | 11. Create PACKAGES.gz using `tools::write_PACKAGE(unpacked=TRUE)`, copy to src/contrib 186 | 187 | 188 | ### Metadata access and overview 189 | 190 | 201 | 202 | AnVIL package tools can be used to discover incompatibilities 203 | or ambiguities in study annotation. BJ's class worked through 204 | metadata survey exercises. An example of incompatible/ambiguous 205 | annotation is present in the Autism workspaces. 206 | 207 | 208 | 209 | We are looking at two studies from NYGC referring to autism, one has substring 210 | ACE2 and the other SSC. What we see above is that AFFECTION_STATUS is coded 1/2 in the SSC study, 211 | and more prosaically in the ACE2 study. It may be that the 212 | labels in ACE2 study are more problematic as the options seem to be "0", "ASD affected", 213 | "ASD Affected", and "Diagnosis uncertain" -- or perhaps it is just a letter casing issue. 214 | 215 | The ingestion group was notified and replied that "there is no process for the 216 | AnVIL team to retrospectively address existing data". Interest was expressed in 217 | learning more about our metadata survey capabilities. 218 | 219 | 220 | ### Other activities 221 | 222 | _Kubernetes_ 223 | 224 | - [Slides](https://docs.google.com/presentation/d/1Y7g_6X8I6DPaNK84EzWNo1wVpfAwdORGt6kcgcPYOV4/edit?usp=sharing) 225 | 226 | - Illustration of working with R and Kubernetes : https://github.com/shwetagopaul92/hgvarByKub 227 | 228 | - [k8sredis][] An alternative approach: k8s with redis work queue and 229 | BiocParallel functionality. Start a number of parallel jobs on k8s, 230 | then an interactive 'manager' (e.g., RStudio session; Jupyter 231 | notebook). Once in R one can 232 | 233 | ``` 234 | library(RedisParam) 235 | fun = function(i, ...) { 236 | Sys.sleep(1) 237 | system("hostname", intern=TRUE) 238 | } 239 | 240 | p <- RedisParam( 241 | workers = 5, jobname = "demo", 242 | is.worker = FALSE 243 | ) 244 | res <- bplapply(1:13, fun, BPPARAM = p) 245 | 246 | table(unlist(res)) 247 | ## five-worker-5ns79 five-worker-8gh5q 248 | ## 2 2 249 | ## five-worker-dvdtv five-worker-wt5jq 250 | ## 3 3 251 | ## five-worker-zwlpw 252 | ## 3 253 | ``` 254 | 255 | [_Bioconductor_]: https://bioconductor.org 256 | [_AnVIL_]: https://anvilproject.org 257 | [about]: about 258 | [Training Materials]: training 259 | [AnVIL]: https://anvil.terra.app 260 | [AnVIL_package]: https://github.com/Bioconductor/AnVIL 261 | [gcr]: https://console.cloud.google.com/gcr/images/broad-dsp-gcr-public/US/terra-jupyter-bioconductor 262 | [bioconductor_docker]: https://github.com/Bioconductor/bioconductor_docker 263 | [k8sredis]: https://github.com/Bioconductor/k8sredis 264 | [BCC 2020]: https://bcc2020.github.io/ 265 | [here]: https://bcc2020.github.io/training/ 266 | [Bioc 2020]: http://bioc2020.bioconductor.org/ 267 | [workshop materials]: https://github.com/waldronlab/AnVILWorkshop 268 | 269 | [anvil:bioconductor]: https://bioconductor.org/packages/AnVIL 270 | [anvil:github]: https://github.com/Bioconductor/AnVIL 271 | 272 | [terra-jupyter-bioconductor:image]: https://us.gcr.io/broad-dsp-gcr-public/terra-jupyter-bioconductor:1.0.11 273 | [terra-jupyter-bioconductor:github]: https://github.com/DataBiosphere/terra-docker/tree/master/terra-jupyter-bioconductor 274 | [terra-jupyter-bioconductor:bioconductor]: https://github.com/Bioconductor/terra-docker 275 | 276 | [anvil-rstudio-bioconductor:image]: https://us.gcr.io/anvil-gcr-public/anvil-rstudio-bioconductor:0.0.10 277 | [anvil-rstudio-bioconductor:github]: https://github.com/anvilproject/anvil-docker 278 | 279 | [Google Container Registry]: https://cloud.google.com/container-registry/docs/pushing-and-pulling 280 | --------------------------------------------------------------------------------