├── .Rbuildignore ├── .github └── workflows │ └── basic_checks.yaml ├── .gitignore ├── CONTRIBUTING.md ├── DESCRIPTION ├── Dockerfile ├── LICENSE ├── NAMESPACE ├── R ├── aggregate_cells.R └── data.R ├── README.md ├── _pkgdown.yml ├── data-raw ├── seurat_obj.R ├── seurat_obj_UMAP3.R └── theme.R ├── data ├── gate_sce_obj.rda ├── sce_obj.rda ├── sce_obj_UMAP3.rda └── theme_multipanel.rda ├── inst ├── .gitignore ├── bioc2022_tidytranscriptomics.pdf └── vignettes │ ├── .gitignore │ ├── ScreenShot2.png │ ├── ScreenShot3.png │ ├── blog_screenshot.PNG │ ├── bulk_RNAseq_pipeline.png │ ├── bulk_vs_single.jpg │ ├── nesting.png │ ├── plotly_2.png │ ├── roadmap_integration.png │ ├── rstudio_cloud.png │ ├── single_cell_RNAseq_pipeline.png │ ├── tidybulk_logo.png │ ├── tidydata_1.jpg │ ├── tidytranscriptomics.bib │ ├── tidyverse.png │ └── transcriptomics.jpg ├── man ├── drop_class.Rd ├── figures │ ├── BioC2022_logo.png │ └── new_SE_usage-01.png ├── gate_sce_obj.Rd ├── quo_names.Rd ├── sce_obj.Rd ├── sce_obj_UMAP3.Rd └── theme_multipanel.Rd └── vignettes ├── solutions.Rmd ├── supplementary.Rmd └── tidytranscriptomics_case_study.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^doc$ 4 | ^Meta$ 5 | ^LICENSE\.md$ 6 | .github 7 | Dockerfile 8 | _pkgdown.yml 9 | ^data-raw$ 10 | dev$ 11 | CONTRIBUTING.md 12 | ^bioc2022tidytranscriptomics\.Rcheck$ 13 | ^bioc2022tidytranscriptomics.*\.tar\.gz$ 14 | ^bioc2022tidytranscriptomics.*\.tgz$ 15 | -------------------------------------------------------------------------------- /.github/workflows/basic_checks.yaml: -------------------------------------------------------------------------------- 1 | on: [push] 2 | 3 | env: 4 | cache-version: v4 5 | repo-name: tidytranscriptomics-workshops/bioc2022_tidytranscriptomics 6 | 7 | jobs: 8 | r-build-and-check: 9 | runs-on: ubuntu-latest 10 | container: bioconductor/bioconductor_docker:devel 11 | env: 12 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 13 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 14 | steps: 15 | - uses: actions/checkout@v2 16 | 17 | - name: Query dependencies and update old packages 18 | run: | 19 | # We'll install the specific versions needed 20 | BiocManager::install("stemangiola/tidySingleCellExperiment@v1.7.4") 21 | BiocManager::install("stemangiola/tidySummarizedExperiment@v1.7.3") 22 | BiocManager::install("stemangiola/tidybulk@v1.9.2") 23 | 24 | # Then install the other dependencies in the usual way 25 | BiocManager::install(ask=FALSE) 26 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 27 | shell: Rscript {0} 28 | 29 | - name: Cache R packages 30 | if: runner.os != 'Windows' 31 | uses: actions/cache@v1 32 | with: 33 | path: /usr/local/lib/R/site-library 34 | key: ${{ env.cache-version }}-${{ runner.os }}-r-${{ hashFiles('.github/depends.Rds') }} 35 | restore-keys: ${{ env.cache-version }}-${{ runner.os }}-r- 36 | 37 | # This lets us augment with additional dependencies 38 | - name: Install system dependencies 39 | if: runner.os == 'Linux' 40 | env: 41 | RHUB_PLATFORM: linux-x86_64-ubuntu-gcc 42 | run: | 43 | Rscript -e "remotes::install_github('r-hub/sysreqs')" 44 | sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") 45 | sudo -s eval "$sysreqs" 46 | 47 | - name: Install dependencies 48 | run: | 49 | options(repos = c(CRAN = "https://cran.r-project.org")) 50 | BiocManager::repositories() 51 | remotes::install_deps(dependencies = TRUE, repos = BiocManager::repositories()) 52 | remotes::install_cran("rcmdcheck") 53 | shell: Rscript {0} 54 | 55 | - name: Check 56 | env: 57 | _R_CHECK_CRAN_INCOMING_REMOTE_: false 58 | run: rcmdcheck::rcmdcheck(args = c("--no-manual"), error_on = "error", check_dir = "check") 59 | shell: Rscript {0} 60 | 61 | - name: Build pkgdown 62 | if: github.ref == 'refs/heads/master' 63 | run: | 64 | PATH=$PATH:$HOME/bin/ Rscript -e 'pkgdown::build_site(".")' 65 | 66 | # deploy needs rsync? Seems so. 67 | - name: Install deploy dependencies 68 | if: github.ref == 'refs/heads/master' 69 | run: | 70 | apt-get update 71 | apt-get -y install rsync 72 | 73 | - name: Deploy 🚀 74 | uses: JamesIves/github-pages-deploy-action@releases/v4 75 | if: github.ref == 'refs/heads/master' 76 | with: 77 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 78 | BRANCH: gh-pages # The branch the action should deploy to. 79 | FOLDER: docs # The folder the action should deploy 80 | 81 | docker-build-and-push: 82 | #needs: r-build-and-check 83 | runs-on: ubuntu-latest 84 | permissions: 85 | contents: read 86 | packages: write 87 | # This is used to complete the identity challenge 88 | # with sigstore/fulcio when running outside of PRs. 89 | id-token: write 90 | steps: 91 | - name: Checkout repository 92 | uses: actions/checkout@v2 93 | 94 | - name: Set Environment Variables 95 | run: | 96 | REPO_LOWER="$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]')" 97 | REGISTRY=ghcr.io 98 | echo "BUILD_DATE=$(date +'%Y-%m-%d %H:%M:%S')" >> $GITHUB_ENV 99 | echo "GIT_SHA=$(echo ${{ github.sha }} | cut -c1-7)" >> $GITHUB_ENV 100 | echo "REGISTRY=${REGISTRY}" >> $GITHUB_ENV 101 | echo "IMAGE=${REGISTRY}/${REPO_LOWER}" >> $GITHUB_ENV 102 | 103 | - name: Show environment 104 | run: | 105 | env 106 | # Install the cosign tool except on PR 107 | # https://github.com/sigstore/cosign-installer 108 | - name: Install cosign 109 | if: github.event_name != 'pull_request' 110 | uses: sigstore/cosign-installer@1e95c1de343b5b0c23352d6417ee3e48d5bcd422 111 | with: 112 | cosign-release: 'v1.4.0' 113 | # Workaround: https://github.com/docker/build-push-action/issues/461 114 | - name: Setup Docker buildx 115 | uses: docker/setup-buildx-action@79abd3f86f79a9d68a23c75a09a9a85889262adf 116 | # Login against a Docker registry except on PR 117 | # https://github.com/docker/login-action 118 | - name: Log into registry ${{ env.REGISTRY }} 119 | if: github.event_name != 'pull_request' 120 | uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c 121 | with: 122 | registry: ${{ env.REGISTRY }} 123 | username: ${{ github.actor }} 124 | password: ${{ secrets.GITHUB_TOKEN }} 125 | 126 | # Extract metadata (tags, labels) for Docker 127 | # https://github.com/docker/metadata-action 128 | - name: Extract Docker metadata 129 | id: meta 130 | uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 131 | with: 132 | images: ${{ env.IMAGE }} 133 | # Build and push Docker image with Buildx (don't push on PR) 134 | # https://github.com/docker/build-push-action 135 | - name: Build and push Docker image 136 | id: build-and-push 137 | uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc 138 | with: 139 | context: . 140 | push: ${{ github.event_name != 'pull_request' }} 141 | tags: | 142 | ${{ env.IMAGE }}:latest 143 | ${{ env.IMAGE }}:${{ env.GIT_SHA }} 144 | # Sign the resulting Docker image digest except on PRs. 145 | # This will only write to the public Rekor transparency log when the Docker 146 | # repository is public to avoid leaking data. If you would like to publish 147 | # transparency data even for private images, pass --force to cosign below. 148 | # https://github.com/sigstore/cosign 149 | - name: Sign the published Docker image 150 | if: ${{ github.event_name != 'pull_request' }} 151 | env: 152 | COSIGN_EXPERIMENTAL: "true" 153 | # This step uses the identity token to provision an ephemeral certificate 154 | # against the sigstore community Fulcio instance. 155 | run: cosign sign ${{ env.IMAGE }}@${{ steps.build-and-push.outputs.digest }} 156 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | doc 3 | Meta 4 | .RData 5 | .Rhistory 6 | *.Rproj 7 | bioc2022tidytranscriptomics.Rcheck/ 8 | bioc2022tidytranscriptomics*.tar.gz 9 | bioc2022tidytranscriptomics*.tgz 10 | dev 11 | /doc/ 12 | /Meta/ 13 | .DS_Store 14 | ._.DS_Store 15 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing to TidyTranscriptomics Workshop 2 | === 3 | 4 | :+1::tada: First off, thanks for taking the time to contribute! :tada::+1: 5 | 6 | The following is a set of guidelines for contributing to this training material on GitHub. 7 | 8 | # Table of contents 9 | 10 | - [What should I know before I get started?](#what-should-i-know-before-i-get-started) 11 | - [How can I contribute?](#how-can-i-contribute) 12 | - [How do I add new content?](#how-do-i-add-new-content) 13 | - [How is the training material maintained?](#how-is-the-training-material-maintained) 14 | 15 | # What should I know before I get started? 16 | 17 | This repository contains the files for the TidyTranscriptomics workshop. 18 | 19 | By contributing, you agree that we may redistribute your work under [this repository's license](LICENSE). 20 | 21 | We will address your issues and/or assess your change proposal as promptly as we can. 22 | 23 | If you have any questions, you can reach us by creating an [Issue](https://github.com/tidytranscriptomics-workshops/iscb2021_tidytranscriptomics/issues/new/choose) in the workshop repository. 24 | 25 | # How can I contribute? 26 | 27 | You can report mistakes or errors, add suggestions, additions, updates or improvements for content. Whatever is your background, there is probably a way to do it: via the GitHub website, via command-line. If you feel it is too much, you can even write it with any text editor and contact us: we will work together to integrate it. 28 | 29 | # How is the training material maintained? 30 | 31 | ## Maintainers 32 | 33 | The maintainers are listed in the [DESCRIPTION](https://github.com/tidytranscriptomics-workshops/iscb2021_tidytranscriptomics/blob/master/DESCRIPTION) file. 34 | 35 | They are responsible for making sure issues and change requests are looked at. They have the final say over what is included in the training material. 36 | 37 | ## Labels 38 | 39 | This repository is using the following labels for issues, pull requests and project management: 40 | 41 | - Type 42 | - `bug`: errors to be fixed 43 | - `improvement`: enhancement to an existing functionality 44 | - `feature`: new functionality 45 | - `discussion`: discussion threads 46 | - `question`: often turn into discussion threads 47 | - Status 48 | - `help-wanted`: requests for assistance 49 | - `newcomer-friendly`: suitable for people who want to start contributing 50 | - `work-in-progress`: someone is working on this 51 | - `review-needed`: requests for review -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: bioc2022tidytranscriptomics 2 | Title: Tidy Transcriptomics For Single-Cell RNA Sequencing Analyses 3 | Version: 0.13.3 4 | Authors@R: c( 5 | person("Stefano", "Mangiola", email="mangiola.s@wehi.edu.au", 6 | role = c("aut","cre"), 7 | comment = c(ORCID = "0000-0001-7474-836X")), 8 | person("Maria", "Doyle", email="maria.doyle@petermac.org", 9 | role = c("aut"), 10 | comment = c(ORCID = "0000-0003-4847-8436"))) 11 | Maintainer: Stefano Mangiola , Maria Doyle 12 | Description: This workshop will showcase analysis of single-cell RNA sequencing data following the tidy data paradigm, using the tidySingleCellExperiment, tidySummarizedExperiment, tidybulk and tidyverse packages. 13 | License: CC BY-SA 4.0 + file LICENSE 14 | Encoding: UTF-8 15 | LazyData: true 16 | LazyDataCompression: xz 17 | Roxygen: list(markdown = TRUE) 18 | RoxygenNote: 7.2.0 19 | Depends: 20 | R (>= 4.1.0) 21 | Imports: 22 | tidySingleCellExperiment, 23 | tidySummarizedExperiment, 24 | tidybulk, 25 | tidygate, 26 | scater, 27 | batchelor, 28 | stats, 29 | utils, 30 | tibble, 31 | stringr, 32 | ggplot2, 33 | dplyr, 34 | readr, 35 | tidyr, 36 | purrr, 37 | forcats, 38 | ggrepel, 39 | plotly, 40 | colorspace, 41 | scales, 42 | uwot, 43 | broom, 44 | devtools, 45 | rlang, 46 | magrittr, 47 | R.utils, 48 | dittoSeq, 49 | glue, 50 | patchwork 51 | Suggests: 52 | knitr, 53 | rmarkdown, 54 | pkgdown 55 | Biarch: true 56 | biocViews: RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, SingleCell, Transcription, Transcriptomics 57 | URL: https://tidytranscriptomics-workshops.github.io/bioc2022_tidytranscriptomics/ 58 | BugReports: https://github.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/issues/new/choose 59 | VignetteBuilder: knitr 60 | DockerImage: ghcr.io/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics:latest 61 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM bioconductor/bioconductor_docker:devel 2 | 3 | WORKDIR /home/rstudio 4 | 5 | COPY --chown=rstudio:rstudio . /home/rstudio/ 6 | 7 | RUN Rscript -e "options(repos = c(CRAN = 'https://cran.r-project.org')); BiocManager::install(ask=FALSE)" 8 | 9 | RUN Rscript -e "BiocManager::install(c('stemangiola/tidySingleCellExperiment@v1.7.4', 'stemangiola/tidySummarizedExperiment@v1.7.3', 'stemangiola/tidybulk@v1.9.2'))" 10 | 11 | RUN Rscript -e "options(repos = c(CRAN = 'https://cran.r-project.org')); devtools::install('.', dependencies=TRUE, build_vignettes=TRUE, repos = BiocManager::repositories())" -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2020 Maria Doyle and Stefano Mangiola 2 | 3 | This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 International License. To view a copy of this license, visit https://creativecommons.org/licenses/by-sa/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 4 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(aggregate_cells) 4 | import(ggplot2) 5 | importFrom(magrittr,equals) 6 | importFrom(purrr,map) 7 | importFrom(purrr,map2) 8 | importFrom(purrr,when) 9 | importFrom(rlang,quo_name) 10 | importFrom(rlang,quo_squash) 11 | -------------------------------------------------------------------------------- /R/aggregate_cells.R: -------------------------------------------------------------------------------- 1 | #' Convert array of quosure (e.g. c(col_a, col_b)) into character vector 2 | #' 3 | #' @keywords internal 4 | #' 5 | #' @importFrom rlang quo_name 6 | #' @importFrom rlang quo_squash 7 | #' @importFrom purrr when map map2 8 | #' @importFrom magrittr equals 9 | #' @import ggplot2 10 | #' 11 | #' @param v A array of quosures (e.g. c(col_a, col_b)) 12 | #' 13 | #' @return A character vector 14 | quo_names <- function(v) { 15 | 16 | v = rlang::quo_name(rlang::quo_squash(v)) 17 | gsub('^c\\(|`|\\)$', '', v) %>% 18 | strsplit(', ') %>% 19 | unlist 20 | } 21 | 22 | #' Remove class to abject 23 | #' 24 | #' 25 | #' @param var A tibble 26 | #' @param name A character name of the class 27 | #' 28 | #' @return A tibble with an additional attribute 29 | drop_class = function(var, name) { 30 | class(var) <- class(var)[!class(var)%in%name] 31 | var 32 | } 33 | 34 | get_specific_annotation_columns = function(.data, .col){ 35 | 36 | 37 | # Comply with CRAN NOTES 38 | . = NULL 39 | 40 | # Make col names 41 | .col = enquo(.col) 42 | 43 | # x-annotation df 44 | n_x = .data %>% dplyr::distinct_at(vars(!!.col)) %>% nrow 45 | 46 | # element wise columns 47 | .data %>% 48 | select(-!!.col) %>% 49 | colnames %>% 50 | map( 51 | ~ 52 | .x %>% 53 | when( 54 | .data %>% 55 | distinct_at(vars(!!.col, .x)) %>% 56 | nrow %>% 57 | magrittr::equals(n_x) ~ (.), 58 | ~ NULL 59 | ) 60 | ) %>% 61 | 62 | # Drop NULL 63 | { (.)[lengths((.)) != 0] } %>% 64 | unlist 65 | 66 | } 67 | 68 | 69 | subset = function(.data, .column) { 70 | # Make col names 71 | .column = enquo(.column) 72 | 73 | # Check if column present 74 | if(quo_names(.column) %in% colnames(.data) %>% all %>% `!`) 75 | stop("nanny says: some of the .column specified do not exist in the input data frame.") 76 | 77 | .data %>% 78 | 79 | # Selecting the right columns 80 | select( !!.column, get_specific_annotation_columns(.data, !!.column) ) %>% 81 | distinct() 82 | 83 | } 84 | 85 | #' @export 86 | aggregate_cells = function(.data, .sample = NULL, slot = "data", assays = NULL, aggregation_function = Matrix::rowSums) { 87 | 88 | .sample = enquo(.sample) 89 | 90 | # Subset only wanted assays 91 | if(!is.null(assays)){ 92 | .data@assays@data = .data@assays@data[assays] 93 | } 94 | 95 | .data %>% 96 | 97 | tidySingleCellExperiment::nest(data = -!!.sample) %>% 98 | mutate(.aggregated_cells = map_int(data, ~ ncol(.x))) %>% 99 | mutate(data = map(data, ~ 100 | # loop over assays 101 | map2( 102 | as.list(assays(.x)), names(.x@assays), 103 | 104 | # Get counts 105 | ~ .x %>% 106 | aggregation_function(na.rm = T) %>% 107 | tibble::enframe( 108 | name = "feature", 109 | value = sprintf("%s", .y) 110 | ) %>% 111 | mutate(feature = as.character(feature)) 112 | ) %>% 113 | Reduce(function(...) full_join(..., by=c("feature")), .) 114 | 115 | )) %>% 116 | left_join(.data %>% tidySingleCellExperiment::as_tibble() %>% subset(!!.sample), by = quo_names(.sample)) %>% 117 | tidySingleCellExperiment::unnest(data) %>% 118 | 119 | drop_class("tidySingleCellExperiment_nested") |> 120 | 121 | as_SummarizedExperiment(.sample = !!.sample, .transcript = feature, .abundance = !!as.symbol(names(.data@assays))) 122 | 123 | } 124 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #'"sce_obj" 2 | #' 3 | #' A sce dataset of single cell RNA sequencing data 4 | #' 5 | #' 6 | #' @format A sce object. 7 | #' @usage data(sce_obj) 8 | "sce_obj" 9 | 10 | #' gate_sce_obj 11 | #' 12 | #' Coordinates for a gate interactively drawn using tidygate 13 | #' 14 | #' 15 | #' @format A list containing x,y coordinates for one gate 16 | #' @usage data(gate_sce_obj) 17 | "gate_sce_obj" 18 | 19 | 20 | #' sce_obj_UMAP3 21 | #' 22 | #' A sce dataset of single cell RNA sequencing data with 3 UMAP dimesions 23 | #' 24 | #' 25 | #' @format A sce object. 26 | #' @usage data(sce_obj_UMAP3) 27 | "sce_obj_UMAP3" 28 | 29 | #' theme_multipanel 30 | #' 31 | #' A pretty theme 32 | #' 33 | #' 34 | #' @format A theme ggplot2 object. 35 | #' @usage data(theme_multipanel) 36 | "theme_multipanel" 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![DOI](https://zenodo.org/badge/496962211.svg)](https://zenodo.org/badge/latestdoi/496962211) 3 | [![.github/workflows/basic_checks.yaml](https://github.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/workflows/.github/workflows/basic_checks.yaml/badge.svg)](https://github.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/actions) 4 | 5 | 6 | # Tidy Transcriptomics For Single-Cell RNA Sequencing Analyses 7 |

8 | BioC2022 9 | tidybulk 10 |

11 | 12 | ## Workshop Description 13 | 14 | This tutorial will showcase analysis of single-cell RNA sequencing data following the tidy data paradigm. The tidy data paradigm provides a standard way to organise data values within a dataset, where each variable is a column, each observation is a row, and data is manipulated using an easy-to-understand vocabulary. Most importantly, the data structure remains consistent across manipulation and analysis functions. 15 | 16 | This can be achieved with the integration of packages present in the R CRAN and Bioconductor ecosystem, including [tidySingleCellExperiment](https://stemangiola.github.io/tidySingleCellExperiment/) and [tidyverse](https://www.tidyverse.org/). These packages are part of the tidytranscriptomics suite that introduces a tidy approach to RNA sequencing data representation and analysis. For more information see the [tidy transcriptomics blog](https://stemangiola.github.io/tidytranscriptomics/). 17 | 18 | ### Pre-requisites 19 | 20 | * Basic familiarity with single-cell transcriptomic analyses 21 | * Basic familiarity with tidyverse 22 | 23 | ## Workshop goals and objectives 24 | 25 | * To approach single-cell data representation and analysis though a tidy data paradigm, integrating tidyverse with tidySingleCellExperiment. 26 | * Compare SingleCellExperiment and tidy representation 27 | * Apply tidy functions to SingleCellExperiment objects 28 | * Reproduce a real-world case study that showcases the power of tidy single-cell methods 29 | 30 | ### What you will learn 31 | 32 | * Basic tidy operations possible with tidySingleCellExperiment 33 | * The differences between SingleCellExperiment representation and tidy representation 34 | * How to interface SingleCellExperiment with tidy manipulation and visualisation 35 | * A real-world case study that will showcase the power of tidy single-cell methods compared with base/ad-hoc methods 36 | 37 | ### What you will not learn 38 | 39 | * The molecular technology of single-cell sequencing 40 | * The fundamentals of single-cell data analysis 41 | * The fundamentals of tidy data analysis 42 | 43 | ### Workshop Participation 44 | 45 | The workshop format is a 1.5 hour session consisting of hands-on demos, exercises and Q&A. 46 | 47 | ## Syllabus 48 | 49 | Material [web page](https://tidytranscriptomics-workshops.github.io/bioc2022_tidytranscriptomics/articles/tidytranscriptomics_case_study.html). More details on the workshop are below. 50 | 51 | ## Workshop package installation 52 | 53 | For the BioC2022 workshop, an RStudio in the cloud will be provided with everything installed, all that participants will need is a web browser. 54 | 55 | If you want to install the packages and material post-workshop, they can be installed using one of the two ways below. The workshop is designed for R `4.2` and Bioconductor 3.16. 56 | 57 | ### Via Docker image 58 | 59 | If you're familiar with [Docker](https://docs.docker.com/get-docker/) you could use the Docker image which has all the software pre-configured to the correct versions. 60 | 61 | ``` 62 | docker run -e PASSWORD=abc -p 8787:8787 ghcr.io/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics 63 | ``` 64 | 65 | Once running, navigate to and then login with 66 | `Username:rstudio` and `Password:abc`. 67 | 68 | You should see the Rmarkdown file with all the workshop code which you can run. 69 | 70 | ### Via GitHub 71 | 72 | Alternatively, you could install the workshop using the commands below in R `4.2`. 73 | 74 | ``` 75 | #install.packages('remotes') 76 | 77 | # Need to set this to prevent installation erroring due to even tiny warnings, similar to here: https://github.com/r-lib/remotes/issues/403#issuecomment-748181946 78 | Sys.setenv("R_REMOTES_NO_ERRORS_FROM_WARNINGS" = "true") 79 | 80 | # Install same versions used in the workshop 81 | remotes::install_github(c("stemangiola/tidySingleCellExperiment@v1.7.4", "stemangiola/tidySummarizedExperiment@v1.7.3", "stemangiola/tidybulk@v1.9.2")) 82 | 83 | # Install workshop package 84 | 85 | remotes::install_github("tidytranscriptomics-workshops/bioc2022_tidytranscriptomics", build_vignettes = TRUE) 86 | 87 | # To view vignettes 88 | library(bioc2022tidytranscriptomics) 89 | browseVignettes("bioc2022tidytranscriptomics") 90 | ``` 91 | 92 | To run the code, you could then copy and paste the code from the workshop vignette or [R markdown file](https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/master/vignettes/tidytranscriptomics.Rmd) into a new R Markdown file on your computer. 93 | 94 | ## Instructor names and contact information 95 | 96 | * Stefano Mangiola 97 | * Maria Doyle 98 | 99 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://tidytranscriptomics-workshops.github.io/iscb2021_tidytranscriptomics 2 | 3 | template: 4 | params: 5 | bootswatch: flatly 6 | ganalytics: UA-93043521-1 7 | 8 | home: 9 | title: "TidyTranscriptomics" 10 | type: inverse 11 | 12 | toc: 13 | depth: 4 14 | 15 | navbar: 16 | title: ~ 17 | type: default 18 | left: 19 | - text: Workshop 20 | href: articles/tidytranscriptomics_case_study.html 21 | - text: Supplementary 22 | href: articles/supplementary.html 23 | 24 | right: 25 | - icon: fab fa-github 26 | href: https://github.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics 27 | - icon: fab fa-docker 28 | href: https://ghcr.io/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics:latest 29 | 30 | -------------------------------------------------------------------------------- /data-raw/seurat_obj.R: -------------------------------------------------------------------------------- 1 | 2 | library(tidyverse) 3 | library(glue) 4 | library(Seurat) 5 | library(tidyseurat) 6 | library(tidySingleCellExperiment) 7 | 8 | seurat_obj <- readRDS("/stornext/Bioinf/data/bioinf-data/Papenfuss_lab/projects/mangiola.s/PostDoc/oligo_breast/expanded_analyses_with_control/cancer_only_analyses/lymphoid/cancer_lymphoid_cell_type_curated.rds") 9 | 10 | set.seed(123) 11 | seurat_obj = seurat_obj |> RunPCA(npcs = 20) |> select(-contains("UMAP")) |> RunUMAP(dims=1:20) 12 | seurat_obj = seurat_obj |> select(.cell, file, 3, 8, 9, S.Score, G2M.Score , Phase , curated_cell_type , contains("UMAP")) 13 | 14 | seurat_obj = seurat_obj %>% filter(.cell %in% (seurat_obj %>% sample_n(3000) %>% pull(.cell) %>% c(seurat_obj %>% filter(grepl("Delta", curated_cell_type)) %>% pull(.cell)) %>% unique)) 15 | 16 | seurat_obj = seurat_obj %>% FindVariableFeatures(assay="RNA", nfeatures = 500) 17 | seurat_obj = seurat_obj[VariableFeatures(seurat_obj, assay="RNA") %>% c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"),] 18 | seurat_obj = seurat_obj %>% mutate( 19 | file = factor(file), Barcode = factor(Barcode), batch= factor(batch), BCB= factor(BCB), Phase= factor(Phase), curated_cell_type= factor(curated_cell_type), 20 | nCount_RNA = as.integer(nCount_RNA), nFeature_RNA= as.integer(nFeature_RNA), nCount_SCT= as.integer(nCount_SCT), nFeature_SCT= as.integer(nFeature_SCT) 21 | ) 22 | #seurat_obj[["SCT"]]@scale.data = seurat_obj[["SCT"]]@scale.data[c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"),] 23 | #seurat_obj[["SCT"]]@data = seurat_obj[["SCT"]]@data[c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"),] 24 | DefaultAssay(seurat_obj) = "SCT" 25 | seurat_obj[["integrated"]] = NULL 26 | 27 | sce_obj = seurat_obj %>% 28 | as.SingleCellExperiment() |> 29 | 30 | # Add factor of interest 31 | nest(data = -file) |> 32 | mutate(condition = sample(c("treated", "untreated"), n(), replace = TRUE)) |> 33 | unnest(data) 34 | 35 | # Parse 36 | sce_obj = 37 | sce_obj |> 38 | select(-condition) |> 39 | left_join(readRDS("~/metadata_oligo.rds")) |> 40 | rename(treatment = type) |> 41 | mutate(treatment = if_else(treatment=="OMBC", "treated", "untreated")) |> 42 | #select(-file) |> 43 | mutate(sample = glue("S{as.integer(as.factor(sample))}")) |> 44 | rename(cell_type = curated_cell_type) |> 45 | 46 | # filtering because of to few samples per cell types 47 | filter(cell_type !="CD8+_Tem") |> 48 | 49 | # Replace file path 50 | mutate(file = file |> str_replace("bhupinder_10X_260819", "single_cell")) 51 | 52 | # job::job({ 53 | save(sce_obj , file="data/sce_obj.rda", compress = "xz") 54 | # }) 55 | -------------------------------------------------------------------------------- /data-raw/seurat_obj_UMAP3.R: -------------------------------------------------------------------------------- 1 | library(tidySingleCellExperiment) 2 | 3 | seurat_obj_UMAP3 = 4 | seurat_obj_for_BioCAsia2021 %>% 5 | RunUMAP(dims = 1:30, n.components = 3L, spread = 0.5,min.dist = 0.01, n.neighbors = 10L) 6 | 7 | seurat_obj_UMAP3[["RNA"]] = NULL 8 | seurat_obj_UMAP3[["SCT"]] = NULL 9 | seurat_obj_UMAP3 = seurat_obj_UMAP3[1,] 10 | 11 | 12 | sce_obj_UMAP3 = sce_obj_UMAP3 |> rename(cell_type = curated_cell_type) 13 | 14 | #seurat_obj_UMAP3 %>% saveRDS("~/PostDoc/workshops/bioc2022_tidytranscriptomics/dev/seurat_obj_UMAP3.rds", compress = "xz") 15 | save(sce_obj_UMAP3, file="data/sce_obj_UMAP3.rda", compress = "xz") 16 | 17 | 18 | seurat_obj_UMAP3 |> 19 | 20 | plot_ly( 21 | x = ~`UMAP_1`, 22 | y = ~`UMAP_2`, 23 | z = ~`UMAP_3`, 24 | color = ~curated_cell_type, 25 | size=0.05 26 | ) 27 | -------------------------------------------------------------------------------- /data-raw/theme.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | # scale_fill_manual(values = friendly_cols) 4 | # scale_color_manual(values = friendly_cols) 5 | 6 | library(ggplot2) 7 | 8 | theme_multipanel = 9 | theme_bw() + 10 | theme( 11 | panel.border = element_blank(), 12 | axis.line = element_line(size=0.1), 13 | panel.grid.major = element_blank(), 14 | panel.grid.minor = element_blank(), 15 | legend.position = "bottom", 16 | strip.background = element_blank(), 17 | axis.title.y = element_text(margin = margin(t = 0, r = 0, b = 0, l = 0), size = 7), 18 | axis.title.x = element_text(margin = margin(t = 0, r = 0, b = 0, l = 0), size = 7), 19 | panel.spacing.x=unit(0.1, "lines"), 20 | axis.text.x = element_text(size=6), 21 | axis.text.y = element_text(size=6), 22 | strip.text.x = element_text(size = 7), 23 | strip.text.y = element_text(size = 7), 24 | 25 | # legend 26 | legend.key.size = unit(5, 'mm'), 27 | legend.title = element_text(size=7), 28 | legend.text = element_text(size=6), 29 | 30 | # Avoid text clipping for facets. Currently not merged remotes::install_github("tidyverse/ggplot2#4223") 31 | #strip.clip = "off", 32 | 33 | # Title 34 | plot.title = element_text(size=7), 35 | 36 | axis.line.x = element_line(size=0.2), 37 | axis.line.y = element_line(size=0.2), 38 | axis.ticks.x = element_line(size=0.2), 39 | axis.ticks.y = element_line(size=0.2) 40 | ) 41 | 42 | # Patchwork 43 | # + plot_layout(guides = 'collect' ) + plot_annotation(tag_levels = c('A')) & theme( plot.margin = margin(0, 0, 0, 0, "pt"), legend.key.size = unit(0.2, 'cm')) 44 | 45 | save(theme_multipanel, file="data/theme_multipanel.rda", compress = "xz") 46 | -------------------------------------------------------------------------------- /data/gate_sce_obj.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/data/gate_sce_obj.rda -------------------------------------------------------------------------------- /data/sce_obj.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/data/sce_obj.rda -------------------------------------------------------------------------------- /data/sce_obj_UMAP3.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/data/sce_obj_UMAP3.rda -------------------------------------------------------------------------------- /data/theme_multipanel.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/data/theme_multipanel.rda -------------------------------------------------------------------------------- /inst/.gitignore: -------------------------------------------------------------------------------- 1 | ._.smbdeleteAAA240c2803 2 | ._bioc2022_tidytranscriptomics.pdf 3 | .smbdeleteAAA240c2803 4 | -------------------------------------------------------------------------------- /inst/bioc2022_tidytranscriptomics.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/bioc2022_tidytranscriptomics.pdf -------------------------------------------------------------------------------- /inst/vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | Thumbs.db 2 | -------------------------------------------------------------------------------- /inst/vignettes/ScreenShot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/ScreenShot2.png -------------------------------------------------------------------------------- /inst/vignettes/ScreenShot3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/ScreenShot3.png -------------------------------------------------------------------------------- /inst/vignettes/blog_screenshot.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/blog_screenshot.PNG -------------------------------------------------------------------------------- /inst/vignettes/bulk_RNAseq_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/bulk_RNAseq_pipeline.png -------------------------------------------------------------------------------- /inst/vignettes/bulk_vs_single.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/bulk_vs_single.jpg -------------------------------------------------------------------------------- /inst/vignettes/nesting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/nesting.png -------------------------------------------------------------------------------- /inst/vignettes/plotly_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/plotly_2.png -------------------------------------------------------------------------------- /inst/vignettes/roadmap_integration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/roadmap_integration.png -------------------------------------------------------------------------------- /inst/vignettes/rstudio_cloud.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/rstudio_cloud.png -------------------------------------------------------------------------------- /inst/vignettes/single_cell_RNAseq_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/single_cell_RNAseq_pipeline.png -------------------------------------------------------------------------------- /inst/vignettes/tidybulk_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/tidybulk_logo.png -------------------------------------------------------------------------------- /inst/vignettes/tidydata_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/tidydata_1.jpg -------------------------------------------------------------------------------- /inst/vignettes/tidytranscriptomics.bib: -------------------------------------------------------------------------------- 1 | @article{wickham2019welcome, 2 | title={Welcome to the Tidyverse}, 3 | author={Wickham, Hadley and Averick, Mara and Bryan, Jennifer and Chang, Winston and McGowan, Lucy D'Agostino and Fran{\c{c}}ois, Romain and Grolemund, Garrett and Hayes, Alex and Henry, Lionel and Hester, Jim and others}, 4 | journal={Journal of Open Source Software}, 5 | volume={4}, 6 | number={43}, 7 | pages={1686}, 8 | year={2019} 9 | } 10 | 11 | @article{butler2018integrating, 12 | title={Integrating single-cell transcriptomic data across different conditions, technologies, and species}, 13 | author={Butler, Andrew and Hoffman, Paul and Smibert, Peter and Papalexi, Efthymia and Satija, Rahul}, 14 | journal={Nature biotechnology}, 15 | volume={36}, 16 | number={5}, 17 | pages={411--420}, 18 | year={2018}, 19 | publisher={Nature Publishing Group} 20 | } 21 | 22 | @article{stuart2019comprehensive, 23 | title={Comprehensive integration of single-cell data}, 24 | author={Stuart, Tim and Butler, Andrew and Hoffman, Paul and Hafemeister, Christoph and Papalexi, Efthymia and Mauck III, William M and Hao, Yuhan and Stoeckius, Marlon and Smibert, Peter and Satija, Rahul}, 25 | journal={Cell}, 26 | volume={177}, 27 | number={7}, 28 | pages={1888--1902}, 29 | year={2019}, 30 | publisher={Elsevier} 31 | } 32 | 33 | @article{Pizzolato2019, 34 | doi = {10.1073/pnas.1818488116}, 35 | url = {https://doi.org/10.1073/pnas.1818488116}, 36 | year = {2019}, 37 | month = may, 38 | publisher = {Proceedings of the National Academy of Sciences}, 39 | pages = {201818488}, 40 | author = {Gabriele Pizzolato and Hannah Kaminski and Marie Tosolini and Don-Marc Franchini and Fr{\'{e}}deric Pont and Fr{\'{e}}deric Martins and Carine Valle and Delphine Labourdette and Sarah Cadot and Anne Quillet-Mary and Mary Poupot and Camille Laurent and Loic Ysebaert and Serena Meraviglia and Francesco Dieli and Pierre Merville and Pierre Milpied and Julie D{\'{e}}chanet-Merville and Jean-Jacques Fourni{\'{e}}}, 41 | title = {Single-cell {RNA} sequencing unveils the shared and the distinct cytotoxic hallmarks of human {TCRV}$\updelta$1 and {TCRV}$\updelta$2 $\upgamma$$\updelta$ T lymphocytes}, 42 | journal = {Proceedings of the National Academy of Sciences} 43 | } 44 | 45 | -------------------------------------------------------------------------------- /inst/vignettes/tidyverse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/tidyverse.png -------------------------------------------------------------------------------- /inst/vignettes/transcriptomics.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/transcriptomics.jpg -------------------------------------------------------------------------------- /man/drop_class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/aggregate_cells.R 3 | \name{drop_class} 4 | \alias{drop_class} 5 | \title{Remove class to abject} 6 | \usage{ 7 | drop_class(var, name) 8 | } 9 | \arguments{ 10 | \item{var}{A tibble} 11 | 12 | \item{name}{A character name of the class} 13 | } 14 | \value{ 15 | A tibble with an additional attribute 16 | } 17 | \description{ 18 | Remove class to abject 19 | } 20 | -------------------------------------------------------------------------------- /man/figures/BioC2022_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/man/figures/BioC2022_logo.png -------------------------------------------------------------------------------- /man/figures/new_SE_usage-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/man/figures/new_SE_usage-01.png -------------------------------------------------------------------------------- /man/gate_sce_obj.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{gate_sce_obj} 5 | \alias{gate_sce_obj} 6 | \title{gate_sce_obj} 7 | \format{ 8 | A list containing x,y coordinates for one gate 9 | } 10 | \usage{ 11 | data(gate_sce_obj) 12 | } 13 | \description{ 14 | Coordinates for a gate interactively drawn using tidygate 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/quo_names.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/aggregate_cells.R 3 | \name{quo_names} 4 | \alias{quo_names} 5 | \title{Convert array of quosure (e.g. c(col_a, col_b)) into character vector} 6 | \usage{ 7 | quo_names(v) 8 | } 9 | \arguments{ 10 | \item{v}{A array of quosures (e.g. c(col_a, col_b))} 11 | } 12 | \value{ 13 | A character vector 14 | } 15 | \description{ 16 | Convert array of quosure (e.g. c(col_a, col_b)) into character vector 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/sce_obj.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{sce_obj} 5 | \alias{sce_obj} 6 | \title{"sce_obj"} 7 | \format{ 8 | A sce object. 9 | } 10 | \usage{ 11 | data(sce_obj) 12 | } 13 | \description{ 14 | A sce dataset of single cell RNA sequencing data 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/sce_obj_UMAP3.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{sce_obj_UMAP3} 5 | \alias{sce_obj_UMAP3} 6 | \title{sce_obj_UMAP3} 7 | \format{ 8 | A sce object. 9 | } 10 | \usage{ 11 | data(sce_obj_UMAP3) 12 | } 13 | \description{ 14 | A sce dataset of single cell RNA sequencing data with 3 UMAP dimesions 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/theme_multipanel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{theme_multipanel} 5 | \alias{theme_multipanel} 6 | \title{theme_multipanel} 7 | \format{ 8 | A theme ggplot2 object. 9 | } 10 | \usage{ 11 | data(theme_multipanel) 12 | } 13 | \description{ 14 | A pretty theme 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /vignettes/solutions.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Solutions" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Solutions} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r message = FALSE} 11 | library(SingleCellExperiment) 12 | library(ggplot2) 13 | library(plotly) 14 | library(dplyr) 15 | library(colorspace) 16 | library(dittoSeq) 17 | library(tidySingleCellExperiment) 18 | 19 | sce_obj <- bioc2022tidytranscriptomics::sce_obj 20 | ``` 21 | 22 | ## Question 1 23 | 24 | What proportion of all cells are gamma-delta T cells? Use signature_score > 0.7 to identify gamma-delta T cells. 25 | 26 | ```{r} 27 | sce_obj |> 28 | 29 | join_features( 30 | features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), 31 | shape = "wide" 32 | ) |> 33 | 34 | mutate(signature_score = 35 | scales::rescale(CD3D + TRDC + TRGC1 + TRGC2, to=c(0,1)) - 36 | scales::rescale(CD8A + CD8B, to=c(0,1)) 37 | ) |> 38 | 39 | mutate(gamma_delta = signature_score > 0.7) |> 40 | 41 | count(gamma_delta) |> 42 | summarise(proportion = n/sum(n)) 43 | ``` 44 | 45 | ## Question 2 46 | 47 | There is a cluster of cells characterised by a low RNA output (nCount_RNA < 100). Identify the cell composition (cell_type) of that cluster. 48 | 49 | 50 | ```{r} 51 | sce_obj |> 52 | filter(nCount_RNA < 100) %>% 53 | count(cell_type) 54 | ``` 55 | 56 | -------------------------------------------------------------------------------- /vignettes/supplementary.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Supplementary Material" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Supplementary Material} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r setup, include=FALSE} 11 | knitr::opts_chunk$set(echo = TRUE) 12 | ``` 13 | 14 | 15 | ```{r message = FALSE} 16 | library(ggplot2) 17 | library(plotly) 18 | library(dplyr) 19 | library(colorspace) 20 | library(dittoSeq) 21 | library(tidySingleCellExperiment) 22 | library(tidygate) 23 | 24 | sce_obj <- bioc2022tidytranscriptomics::sce_obj 25 | ``` 26 | 27 | 28 | Instead of filtering using a specified threshold, the gamma delta T cells could be interactively selected from the plot using the tidygate package. 29 | 30 | ```{r eval = FALSE} 31 | sce_obj |> 32 | 33 | join_features( 34 | features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B" ), shape = "wide" 35 | ) |> 36 | 37 | mutate(signature_score = 38 | scales::rescale(CD3D + TRDC + TRGC1+ TRGC2, to=c(0,1)) - 39 | scales::rescale(CD8A + CD8B, to=c(0,1)) 40 | ) |> 41 | 42 | mutate(gate = gate_int( 43 | UMAP_1, UMAP_2, 44 | .size = 0.1, 45 | .color =signature_score 46 | )) 47 | 48 | ``` 49 | 50 | After the selection we could reload from a file the gate that was drawn, for reproducibility. 51 | 52 | ```{r} 53 | sce_obj |> 54 | 55 | join_features( 56 | features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B" ), shape = "wide" 57 | 58 | ) |> 59 | 60 | mutate(signature_score = 61 | scales::rescale(CD3D + TRDC + TRGC1+ TRGC2, to=c(0,1)) - 62 | scales::rescale(CD8A + CD8B, to=c(0,1)) 63 | ) |> 64 | 65 | mutate(gate = gate_int( 66 | UMAP_1, UMAP_2, 67 | .size = 0.1, 68 | .color =signature_score, 69 | gate_list = bioc2022tidytranscriptomics::gate_sce_obj 70 | )) 71 | 72 | ``` 73 | 74 | The dataset can be filtered for just these cells using tidyverse `filter`. 75 | 76 | ```{r} 77 | sce_obj_gamma_delta <- 78 | 79 | sce_obj |> 80 | 81 | join_features( 82 | features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B" ), shape = "wide" 83 | 84 | ) |> 85 | 86 | mutate(signature_score = 87 | scales::rescale(CD3D + TRDC + TRGC1+ TRGC2, to=c(0,1)) - 88 | scales::rescale(CD8A + CD8B, to=c(0,1)) 89 | ) |> 90 | 91 | mutate(gate = gate_int(UMAP_1, UMAP_2, gate_list = bioc2022tidytranscriptomics::gate_sce_obj)) |> 92 | 93 | filter(gate == 1) 94 | ``` 95 | -------------------------------------------------------------------------------- /vignettes/tidytranscriptomics_case_study.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Tidy Transcriptomics For Single-Cell RNA Sequencing Analyses" 3 | author: 4 | - Stefano Mangiola, Walter and Eliza Hall Institute^[] 5 | - Maria Doyle, Peter MacCallum Cancer Centre^[] 6 | output: rmarkdown::html_vignette 7 | bibliography: "`r file.path(system.file(package='bioc2022tidytranscriptomics', 'vignettes'), 'tidytranscriptomics.bib')`" 8 | vignette: > 9 | %\VignetteIndexEntry{Tidy Transcriptomics For Single-Cell RNA Sequencing Analyses} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | ```{r setup, include=FALSE} 15 | knitr::opts_chunk$set(echo = TRUE) 16 | ``` 17 | 18 | # Workshop introduction 19 | 20 |

21 | BioC2022 22 | tidybulk 23 |

24 | 25 | ## Instructors 26 | 27 | *Dr. Stefano Mangiola* is currently a Postdoctoral researcher in the laboratory of Prof. Tony Papenfuss at the Walter and Eliza Hall Institute in Melbourne, Australia. His background spans from biotechnology to bioinformatics and biostatistics. His research focuses on prostate and breast tumour microenvironment, the development of statistical models for the analysis of RNA sequencing data, and data analysis and visualisation interfaces. 28 | 29 | *Dr. Maria Doyle* is the Application and Training Specialist for Research Computing at the Peter MacCallum Cancer Centre in Melbourne, Australia. She has a PhD in Molecular Biology and currently works in bioinformatics and data science education and training. She is passionate about supporting researchers, reproducible research, open source and tidy data. 30 | 31 | ## Description 32 | 33 | This tutorial will showcase analysis of single-cell RNA sequencing data following the tidy data paradigm. The tidy data paradigm provides a standard way to organise data values within a dataset, where each variable is a column, each observation is a row, and data is manipulated using an easy-to-understand vocabulary. Most importantly, the data structure remains consistent across manipulation and analysis functions. 34 | 35 | This can be achieved with the integration of packages present in the R CRAN and Bioconductor ecosystem, including [tidySingleCellExperiment](https://stemangiola.github.io/tidySingleCellExperiment/), [tidySummarizedExperiment](https://stemangiola.github.io/tidySummarizedExperiment/), [tidybulk](https://stemangiola.github.io/tidybulk/) and [tidyverse](https://www.tidyverse.org/). These packages are part of the tidytranscriptomics suite that introduces a tidy approach to RNA sequencing data representation and analysis. For more information see the [tidy transcriptomics blog](https://stemangiola.github.io/tidytranscriptomics/). 36 | 37 | ### Pre-requisites 38 | 39 | * Basic familiarity with single-cell transcriptomic analyses 40 | * Basic familiarity with tidyverse 41 | 42 | ## Goals and objectives 43 | 44 | * To approach single-cell data representation and analysis through a tidy data paradigm, integrating tidyverse with tidySingleCellExperiment. 45 | * Compare SingleCellExperiment and tidy representation 46 | * Apply tidy functions to SingleCellExperiment objects 47 | * Reproduce a real-world case study that showcases the power of tidy single-cell methods 48 | 49 | ### What you will learn 50 | 51 | - Basic `tidy` operations possible with `tidySingleCellExperiment` 52 | - The differences between `SingleCellExperiment` representation and `tidy` representation 53 | - How to interface `SingleCellExperiment` with tidy manipulation and visualisation 54 | - A real-world case study that will showcase the power of `tidy` single-cell methods compared with base/ad-hoc methods 55 | 56 | ### What you will *not* learn 57 | 58 | - The molecular technology of single-cell sequencing 59 | - The fundamentals of single-cell data analysis 60 | - The fundamentals of tidy data analysis 61 | 62 | This workshop will demonstrate a real-world example of using tidy transcriptomics packages to analyse single cell data. This workshop is not a step-by-step introduction in how to perform single-cell analysis. For an overview of single-cell analysis steps performed in a tidy way please see the [ISMB2021 workshop](https://tidytranscriptomics-workshops.github.io/ismb2021_tidytranscriptomics/articles/tidytranscriptomics.html). 63 | 64 | ## Getting started 65 | 66 | ### Cloud 67 | 68 | Easiest way to run this material. We will use the Orchestra Cloud platform during the BioC2022 workshop. 69 | 70 | 1. Go to [Orchestra](http://app.orchestra.cancerdatasci.org/). 71 | 2. Log in. 72 | 3. Find the workshop. In the search box type bioc2022, sort by Created column, and select the most recently created workshop called "BioC2022: Tidy Transcriptomics For Single-Cell RNA Sequencing Analyses" **There are several tidy transcriptomics workshops. Be sure to select the BioC2022 one with the most recent created date**. 73 | 4. Click "Launch" (may take a minute or two). 74 | 5. Follow instructions.. **Do not share your personalized URL for the RStudio session, or use the trainers, as only one browser at a time can be connected.** 75 | 6. Open `tidytranscriptomics_case_study.Rmd` in `bioc2022_tidytranscriptomcs/vignettes` folder 76 | 77 | ### Local 78 | 79 | We will use the Orchestra Cloud platform during the BioC2022 workshop and this method is available if you want to run the material after the workshop. If you want to install on your own computer, see instructions [here](https://tidytranscriptomics-workshops.github.io/bioc2022_tidytranscriptomics/index.html#workshop-package-installation). 80 | 81 | Alternatively, you can view the material at the workshop webpage [here](https://tidytranscriptomics-workshops.github.io/bioc2022_tidytranscriptomics/articles/tidytranscriptomics_case_study.html). 82 | 83 | ## Slides 84 | 85 | *The embedded slides below may take a minute to appear. You can also view or download [here](https://github.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/blob/master/inst/bioc2022_tidytranscriptomics.pdf).* 86 | 87 | 93 | 94 | 95 | # Part 1 Introduction to tidySingleCellExperiment 96 | 97 | ```{r message = FALSE} 98 | # Load packages 99 | library(SingleCellExperiment) 100 | library(ggplot2) 101 | library(plotly) 102 | library(dplyr) 103 | library(colorspace) 104 | library(dittoSeq) 105 | ``` 106 | 107 | SingleCellExperiment is a very popular analysis toolkit for single cell RNA sequencing data [@butler2018integrating; @stuart2019comprehensive]. 108 | 109 | Here we load single-cell data in SingleCellExperiment object format. This data is peripheral blood mononuclear cells (PBMCs) from metastatic breast cancer patients. 110 | 111 | 112 | ```{r} 113 | # load single cell RNA sequencing data 114 | sce_obj <- bioc2022tidytranscriptomics::sce_obj 115 | 116 | # take a look 117 | sce_obj 118 | ``` 119 | 120 | tidySingleCellExperiment provides a bridge between the SingleCellExperiment single-cell package and the tidyverse [@wickham2019welcome]. It creates an invisible layer that enables viewing the SingleCellExperiment object as a tidyverse tibble, and provides SingleCellExperiment-compatible *dplyr*, *tidyr*, *ggplot* and *plotly* functions. 121 | 122 | If we load the *tidySingleCellExperiment* package and then view the single cell data, it now displays as a tibble. 123 | 124 | ```{r message = FALSE} 125 | library(tidySingleCellExperiment) 126 | 127 | sce_obj 128 | ``` 129 | 130 | If we want to revert to the standard SingleCellExperiment view we can do that. 131 | 132 | ```{r} 133 | options("restore_SingleCellExperiment_show" = TRUE) 134 | sce_obj 135 | ``` 136 | 137 | If we want to revert back to tidy SingleCellExperiment view we can. 138 | 139 | ```{r} 140 | options("restore_SingleCellExperiment_show" = FALSE) 141 | sce_obj 142 | ``` 143 | 144 | It can be interacted with using [SingleCellExperiment commands](https://bioconductor.org/packages/devel/bioc/vignettes/SingleCellExperiment/inst/doc/intro.html) such as `assays`. 145 | 146 | ```{r} 147 | assays(sce_obj) 148 | ``` 149 | 150 | We can also interact with our object as we do with any tidyverse tibble. 151 | 152 | ## Tidyverse commands 153 | 154 | We can use tidyverse commands, such as `filter`, `select` and `mutate` to explore the tidySingleCellExperiment object. Some examples are shown below and more can be seen at the tidySingleCellExperiment website [here](https://stemangiola.github.io/tidySingleCellExperiment/articles/introduction.html#tidyverse-commands-1). 155 | 156 | We can use `filter` to choose rows, for example, to see just the rows for the cells in G1 cell-cycle stage. 157 | 158 | ```{r} 159 | sce_obj |> filter(Phase == "G1") 160 | ``` 161 | 162 | We can use `select` to view columns, for example, to see the filename, total cellular RNA abundance and cell phase. 163 | 164 | * If we use `select` we will also get any view-only columns returned, such as the UMAP columns generated during the preprocessing. 165 | 166 | ```{r} 167 | sce_obj |> select(.cell, file, nCount_RNA, Phase) 168 | ``` 169 | 170 | We can use `mutate` to create a column. For example, we could create a new `Phase_l` column that contains a lower-case version of `Phase`. 171 | 172 | ```{r message=FALSE} 173 | sce_obj |> 174 | mutate(Phase_l = tolower(Phase)) |> 175 | select(.cell, Phase, Phase_l) 176 | ``` 177 | 178 | We can use tidyverse commands to polish an annotation column. We will extract the sample, and group information from the file name column into separate columns. 179 | 180 | ```{r message=FALSE} 181 | # First take a look at the file column 182 | sce_obj |> select(.cell, file) 183 | ``` 184 | 185 | ```{r} 186 | # Create column for sample 187 | sce_obj <- sce_obj |> 188 | # Extract sample 189 | extract(file, "sample", "../data/.*/([a-zA-Z0-9_-]+)/outs.+", remove = FALSE) 190 | 191 | # Take a look 192 | sce_obj |> select(.cell, sample, everything()) 193 | ``` 194 | 195 | We could use tidyverse `unite` to combine columns, for example to create a new column for sample id combining the sample and patient id (BCB) columns. 196 | 197 | ```{r message=FALSE} 198 | sce_obj <- sce_obj |> unite("sample_id", sample, BCB, remove = FALSE) 199 | 200 | # Take a look 201 | sce_obj |> select(.cell, sample_id, sample, BCB) 202 | ``` 203 | 204 | 205 | # Part 2 Signature visualisation 206 | 207 | ## Data pre-processing 208 | 209 | The object `sce_obj` we've been using was created as part of a study on breast cancer systemic immune response. Peripheral blood mononuclear cells have been sequenced for RNA at the single-cell level. The steps used to generate the object are summarised below. 210 | 211 | - `scran`, `scater`, and `DropletsUtils` packages have been used to eliminate empty droplets and dead cells. Samples were individually quality checked and cells were filtered for good gene coverage. 212 | 213 | - Variable features were identified using `modelGeneVar`. 214 | 215 | - Read counts were scaled and normalised using logNormCounts from `scuttle`. 216 | 217 | - Data integration was performed using `fastMNN` with default parameters. 218 | 219 | - PCA performed to reduce feature dimensionality. 220 | 221 | - Nearest-neighbor cell networks were calculated using 30 principal components. 222 | 223 | - 2 UMAP dimensions were calculated using 30 principal components. 224 | 225 | - Cells with similar transcriptome profiles were grouped into clusters using Louvain clustering from `scran`. 226 | 227 | ## Analyse custom signature 228 | 229 | The researcher analysing this dataset wanted to identify gamma delta T cells using a gene signature from a published paper [@Pizzolato2019]. We'll show how that can be done here. 230 | 231 | With tidySingleCellExperiment's `join_features` we can view the counts for genes in the signature as columns joined to our single cell tibble. 232 | 233 | ```{r} 234 | sce_obj |> 235 | join_features(c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), shape = "wide") 236 | ``` 237 | 238 | We can use tidyverse `mutate` to create a column containing the signature score. To generate the score, we scale the sum of the 4 genes, CD3D, TRDC, TRGC1, TRGC2, and subtract the scaled sum of the 2 genes, CD8A and CD8B. `mutate` is powerful in enabling us to perform complex arithmetic operations easily. 239 | 240 | ```{r} 241 | sce_obj |> 242 | 243 | join_features(c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), shape = "wide") |> 244 | 245 | mutate( 246 | signature_score = 247 | scales::rescale(CD3D + TRDC + TRGC1 + TRGC2, to = c(0, 1)) - 248 | scales::rescale(CD8A + CD8B, to = c(0, 1)) 249 | ) |> 250 | 251 | select(.cell, signature_score, everything()) 252 | ``` 253 | 254 | The gamma delta T cells could then be visualised by the signature score using Bioconductor's visualisation functions. 255 | 256 | ```{r} 257 | sce_obj |> 258 | 259 | join_features( 260 | features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), shape = "wide" 261 | ) |> 262 | 263 | mutate( 264 | signature_score = 265 | scales::rescale(CD3D + TRDC + TRGC1 + TRGC2, to = c(0, 1)) - 266 | scales::rescale(CD8A + CD8B, to = c(0, 1)) 267 | ) |> 268 | 269 | scater::plotUMAP(colour_by = "signature_score") 270 | ``` 271 | 272 | The cells could also be visualised using the popular and powerful `ggplot2` package, enabling the researcher to use ggplot functions they were familiar with, and to customise the plot with great flexibility. 273 | 274 | ```{r} 275 | sce_obj |> 276 | 277 | join_features( 278 | features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), shape = "wide" 279 | ) |> 280 | 281 | mutate( 282 | signature_score = 283 | scales::rescale(CD3D + TRDC + TRGC1 + TRGC2, to = c(0, 1)) - 284 | scales::rescale(CD8A + CD8B, to = c(0, 1)) 285 | ) |> 286 | 287 | # plot cells with high score last so they're not obscured by other cells 288 | arrange(signature_score) |> 289 | 290 | ggplot(aes(UMAP_1, UMAP_2, color = signature_score)) + 291 | geom_point() + 292 | scale_color_distiller(palette = "Spectral") + 293 | bioc2022tidytranscriptomics::theme_multipanel 294 | ``` 295 | 296 | For exploratory analyses, we can select the gamma delta T cells, the red cluster on the left with high signature score. We'll filter for cells with a signature score > 0.7. 297 | 298 | ```{r} 299 | sce_obj_gamma_delta <- 300 | 301 | sce_obj |> 302 | 303 | join_features( 304 | features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), shape = "wide" 305 | ) |> 306 | 307 | mutate( 308 | signature_score = 309 | scales::rescale(CD3D + TRDC + TRGC1 + TRGC2, to = c(0, 1)) - 310 | scales::rescale(CD8A + CD8B, to = c(0, 1)) 311 | ) |> 312 | 313 | # Proper cluster selection should be used instead (see supplementary material) 314 | filter(signature_score > 0.7) 315 | ``` 316 | 317 | For comparison, we show the alternative using base R and SingleCellExperiment. Note that the code contains more redundancy and intermediate objects. 318 | 319 | ```{r eval=FALSE} 320 | counts_positive <- 321 | assay(sce_obj, "logcounts")[c("CD3D", "TRDC", "TRGC1", "TRGC2"), ] |> 322 | colSums() |> 323 | scales::rescale(to = c(0, 1)) 324 | 325 | counts_negative <- 326 | assay(sce_obj, "logcounts")[c("CD8A", "CD8B"), ] |> 327 | colSums() |> 328 | scales::rescale(to = c(0, 1)) 329 | 330 | sce_obj$signature_score <- counts_positive - counts_negative 331 | 332 | sce_obj_gamma_delta <- sce_obj[, sce_obj$signature_score > 0.7] 333 | ``` 334 | 335 | We can then focus on just these gamma delta T cells and chain Bioconductor and tidyverse commands together to analyse. 336 | 337 | ```{r warning=FALSE, message=FALSE} 338 | library(batchelor) 339 | library(scater) 340 | 341 | sce_obj_gamma_delta <- 342 | 343 | sce_obj_gamma_delta |> 344 | 345 | # Integrate - using batchelor. 346 | multiBatchNorm(batch = colData(sce_obj_gamma_delta)$sample) |> 347 | fastMNN(batch = colData(sce_obj_gamma_delta)$sample) |> 348 | 349 | # Join metadata removed by fastMNN - using tidyverse 350 | left_join(as_tibble(sce_obj_gamma_delta)) |> 351 | 352 | # Dimension reduction - using scater 353 | runUMAP(ncomponents = 2, dimred = "corrected") 354 | ``` 355 | 356 | Visualise gamma delta T cells. As we have used rough threshold we are left with only few cells. Proper cluster selection should be used instead (see supplementary material). 357 | 358 | ```{r} 359 | sce_obj_gamma_delta |> plotUMAP() 360 | ``` 361 | 362 | 363 | It is also possible to visualise the cells as a 3D plot using plotly. 364 | The example data used here only contains a few genes, for the sake of time and size in this demonstration, but below is how you could generate the 3 dimensions needed for 3D plot with a full dataset. 365 | 366 | ```{r eval = FALSE} 367 | single_cell_object |> 368 | RunUMAP(dims = 1:30, n.components = 3L, spread = 0.5, min.dist = 0.01, n.neighbors = 10L) 369 | ``` 370 | 371 | We'll demonstrate creating a 3D plot using some data that has 3 UMAP dimensions. This is a fantastic way to visualise both reduced dimensions and metadata in the same representation. 372 | 373 | ```{r umap plot 2, message = FALSE, warning = FALSE} 374 | pbmc <- bioc2022tidytranscriptomics::sce_obj_UMAP3 375 | 376 | pbmc |> 377 | plot_ly( 378 | x = ~`UMAP_1`, 379 | y = ~`UMAP_2`, 380 | z = ~`UMAP_3`, 381 | color = ~cell_type, 382 | colors = dittoSeq::dittoColors() 383 | ) %>% 384 | add_markers(size = I(1)) 385 | ``` 386 | 387 | ## Exercises 388 | 389 | Using the `sce_obj`: 390 | 391 | 1. What proportion of all cells are gamma-delta T cells? Use signature_score > 0.7 to identify gamma-delta T cells. 392 | 393 | 2. There is a cluster of cells characterised by a low RNA output (nCount_RNA < 100). Identify the cell composition (cell_type) of that cluster. 394 | 395 | # Part 3 Pseudobulk analyses 396 | 397 | Next we want to identify genes whose transcription is affected by treatment in this dataset, comparing treated and untreated patients. We can do this with pseudobulk analysis. We aggregate cell-wise transcript abundance into pseudobulk samples and can then perform hypothesis testing using the very well established bulk RNA sequencing tools. For example, we can use edgeR in tidybulk to perform differential expression testing. For more details on pseudobulk analysis see [here](https://hbctraining.github.io/scRNA-seq/lessons/pseudobulk_DESeq2_scrnaseq.html). 398 | 399 | We want to do it for each cell type and the tidy transcriptomics ecosystem makes this very easy. 400 | 401 | 402 | ## Create pseudobulk samples 403 | 404 | To create pseudobulk samples from the single cell samples, we will use a helper function called `aggregate_cells`, available in this workshop package. This function will combine the single cells into a group for each cell type for each sample. 405 | 406 | ```{r warning=FALSE, message=FALSE, echo=FALSE} 407 | library(glue) 408 | library(tidyr) 409 | library(purrr) 410 | library(patchwork) 411 | 412 | # bulk RNA-seq libraries 413 | library(tidybulk) 414 | library(tidySummarizedExperiment) 415 | ``` 416 | 417 | ```{r} 418 | pseudo_bulk <- 419 | sce_obj |> 420 | bioc2022tidytranscriptomics::aggregate_cells(c(sample, cell_type), assays = "counts") 421 | 422 | pseudo_bulk 423 | ``` 424 | 425 | ## Tidybulk and tidySummarizedExperiment 426 | 427 | With `tidySummarizedExperiment` and `tidybulk` it is easy to split the data into groups and perform analyses on each without needing to create separate objects. 428 | 429 | ```{r, echo=FALSE, out.width = "800px"} 430 | knitr::include_graphics("../man/figures/new_SE_usage-01.png") 431 | ``` 432 | 433 | We use tidyverse `nest` to group the data. The command below will create a tibble containing a column with a SummarizedExperiment object for each cell type. `nest` is similar to tidyverse `group_by`, except with `nest` each group is stored in a single row, and can be a complex object such as a plot or SummarizedExperiment. 434 | 435 | ```{r} 436 | pseudo_bulk_nested <- 437 | pseudo_bulk |> 438 | nest(grouped_summarized_experiment = -cell_type) 439 | 440 | pseudo_bulk_nested 441 | ``` 442 | 443 | To explore the grouping, we can use tidyverse `slice` to choose a row (cell_type) and `pull` to extract the values from a column. If we pull the data column we can view the SummarizedExperiment object. 444 | 445 | ```{r} 446 | pseudo_bulk_nested |> 447 | slice(1) |> 448 | pull(grouped_summarized_experiment) 449 | ``` 450 | 451 | We can then identify differentially expressed genes for each cell type for our condition of interest, treated versus untreated patients. We use tidyverse `map` to apply differential expression functions to each cell type group in the nested data. The result columns will be added to the SummarizedExperiment objects. 452 | 453 | ```{r message=FALSE, warning=FALSE} 454 | # Differential transcription abundance 455 | pseudo_bulk <- 456 | 457 | pseudo_bulk_nested |> 458 | 459 | # map accepts a data column (.x) and a function. It applies the function to each element of the column. 460 | mutate(grouped_summarized_experiment = map( 461 | grouped_summarized_experiment, 462 | ~ .x |> 463 | 464 | # Removing genes with low expression 465 | identify_abundant(factor_of_interest = treatment) |> 466 | 467 | # Scaling counts for sequencing depth 468 | scale_abundance(method="TMMwsp") |> 469 | 470 | # Testing for differential expression using edgeR quasi likelihood 471 | test_differential_abundance(~treatment, method="edgeR_quasi_likelihood", scaling_method="TMMwsp") 472 | )) 473 | ``` 474 | 475 | The output is again a tibble containing a SummarizedExperiment object for each cell type. 476 | 477 | ```{r} 478 | pseudo_bulk 479 | ``` 480 | 481 | If we pull out the SummarizedExperiment object for the first cell type, as before, we can see it now has columns containing the differential expression results (e.g. logFC, PValue). 482 | 483 | ```{r} 484 | pseudo_bulk |> 485 | slice(1) |> 486 | pull(grouped_summarized_experiment) 487 | ``` 488 | 489 | It is useful to create plots for significant genes to visualise the transcriptional abundance in the groups being compared (treated and untreated). We can do this for each cell type without needing to create multiple objects. 490 | 491 | ```{r message = FALSE} 492 | pseudo_bulk <- 493 | 494 | pseudo_bulk |> 495 | 496 | # Filter out significant 497 | # using a high FDR value as this is toy data 498 | mutate(grouped_summarized_experiment = map( 499 | grouped_summarized_experiment, 500 | ~ filter(.x, FDR < 0.5) 501 | )) |> 502 | 503 | # Filter cell types with no differential abundant gene-transcripts 504 | # map_int is map that returns integer instead of list 505 | filter(map_int(grouped_summarized_experiment, ~ nrow(.x)) > 0) |> 506 | 507 | # Plot significant genes for each cell type 508 | # map2 is map that accepts 2 input columns (.x, .y) and a function 509 | mutate(plot = map2( 510 | grouped_summarized_experiment, cell_type, 511 | ~ .x |> 512 | ggplot(aes(treatment, counts_scaled + 1)) + 513 | geom_boxplot(aes(fill = treatment)) + 514 | geom_jitter() + 515 | scale_y_log10() + 516 | facet_wrap(~.feature) + 517 | ggtitle(.y) 518 | )) 519 | ``` 520 | 521 | The output is a nested table with a column containing a plot for each cell type. 522 | 523 | ```{r} 524 | pseudo_bulk 525 | ``` 526 | 527 | We'll use slice and pull again to have a look at one of the plots. 528 | 529 | ```{r} 530 | pseudo_bulk |> 531 | slice(1) |> 532 | pull(plot) 533 | ``` 534 | 535 | We can extract all plots and plot with `wrap_plots` from the patchwork package. 536 | 537 | ```{r, fig.height = 8} 538 | pseudo_bulk |> 539 | pull(plot) |> 540 | wrap_plots() & 541 | bioc2022tidytranscriptomics::theme_multipanel 542 | ``` 543 | 544 | # Feedback 545 | 546 | Thank you for attending this workshop. We hope it was an informative session for you. We would be grateful if you could help us by taking a few moments to provide your valuable feedback in the short form below. Your feedback will provide us with an opportunity to further improve the workshop. All the results are anonymous. 547 | 548 | [Feedback Form Link](https://forms.gle/rVzZo6k8QfYW2jJx6) 549 | 550 | **Session Information** 551 | 552 | ```{r} 553 | sessionInfo() 554 | ``` 555 | 556 | **References** 557 | --------------------------------------------------------------------------------