├── .Rbuildignore
├── .github
    └── workflows
    │   └── basic_checks.yaml
├── .gitignore
├── CONTRIBUTING.md
├── DESCRIPTION
├── Dockerfile
├── LICENSE
├── NAMESPACE
├── R
    ├── aggregate_cells.R
    └── data.R
├── README.md
├── _pkgdown.yml
├── data-raw
    ├── seurat_obj.R
    ├── seurat_obj_UMAP3.R
    └── theme.R
├── data
    ├── gate_sce_obj.rda
    ├── sce_obj.rda
    ├── sce_obj_UMAP3.rda
    └── theme_multipanel.rda
├── inst
    ├── .gitignore
    ├── bioc2022_tidytranscriptomics.pdf
    └── vignettes
    │   ├── .gitignore
    │   ├── ScreenShot2.png
    │   ├── ScreenShot3.png
    │   ├── blog_screenshot.PNG
    │   ├── bulk_RNAseq_pipeline.png
    │   ├── bulk_vs_single.jpg
    │   ├── nesting.png
    │   ├── plotly_2.png
    │   ├── roadmap_integration.png
    │   ├── rstudio_cloud.png
    │   ├── single_cell_RNAseq_pipeline.png
    │   ├── tidybulk_logo.png
    │   ├── tidydata_1.jpg
    │   ├── tidytranscriptomics.bib
    │   ├── tidyverse.png
    │   └── transcriptomics.jpg
├── man
    ├── drop_class.Rd
    ├── figures
    │   ├── BioC2022_logo.png
    │   └── new_SE_usage-01.png
    ├── gate_sce_obj.Rd
    ├── quo_names.Rd
    ├── sce_obj.Rd
    ├── sce_obj_UMAP3.Rd
    └── theme_multipanel.Rd
└── vignettes
    ├── solutions.Rmd
    ├── supplementary.Rmd
    └── tidytranscriptomics_case_study.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^doc$
 4 | ^Meta$
 5 | ^LICENSE\.md$
 6 | .github
 7 | Dockerfile
 8 | _pkgdown.yml
 9 | ^data-raw$
10 | dev$
11 | CONTRIBUTING.md
12 | ^bioc2022tidytranscriptomics\.Rcheck$
13 | ^bioc2022tidytranscriptomics.*\.tar\.gz$
14 | ^bioc2022tidytranscriptomics.*\.tgz$
15 | 


--------------------------------------------------------------------------------
/.github/workflows/basic_checks.yaml:
--------------------------------------------------------------------------------
  1 | on: [push]
  2 | 
  3 | env:
  4 |   cache-version: v4
  5 |   repo-name: tidytranscriptomics-workshops/bioc2022_tidytranscriptomics
  6 | 
  7 | jobs:
  8 |   r-build-and-check:
  9 |     runs-on: ubuntu-latest
 10 |     container: bioconductor/bioconductor_docker:devel
 11 |     env:
 12 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
 13 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 14 |     steps:
 15 |       - uses: actions/checkout@v2
 16 | 
 17 |       - name: Query dependencies and update old packages
 18 |         run: |
 19 |           # We'll install the specific versions needed
 20 |           BiocManager::install("stemangiola/tidySingleCellExperiment@v1.7.4")
 21 |           BiocManager::install("stemangiola/tidySummarizedExperiment@v1.7.3")
 22 |           BiocManager::install("stemangiola/tidybulk@v1.9.2")
 23 |           
 24 |           # Then install the other dependencies in the usual way
 25 |           BiocManager::install(ask=FALSE)
 26 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
 27 |         shell: Rscript {0}
 28 |         
 29 |       - name: Cache R packages
 30 |         if: runner.os != 'Windows'
 31 |         uses: actions/cache@v1
 32 |         with:
 33 |           path: /usr/local/lib/R/site-library
 34 |           key: ${{ env.cache-version }}-${{ runner.os }}-r-${{ hashFiles('.github/depends.Rds') }}
 35 |           restore-keys: ${{ env.cache-version }}-${{ runner.os }}-r-
 36 |           
 37 |        # This lets us augment with additional dependencies
 38 |       - name: Install system dependencies
 39 |         if: runner.os == 'Linux'
 40 |         env:
 41 |           RHUB_PLATFORM: linux-x86_64-ubuntu-gcc
 42 |         run: |
 43 |           Rscript -e "remotes::install_github('r-hub/sysreqs')"	
 44 |           sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))")	
 45 |           sudo -s eval "$sysreqs"
 46 |           
 47 |       - name: Install dependencies
 48 |         run: |
 49 |           options(repos = c(CRAN = "https://cran.r-project.org"))
 50 |           BiocManager::repositories()
 51 |           remotes::install_deps(dependencies = TRUE, repos = BiocManager::repositories())
 52 |           remotes::install_cran("rcmdcheck")
 53 |         shell: Rscript {0}
 54 | 
 55 |       - name: Check
 56 |         env:
 57 |           _R_CHECK_CRAN_INCOMING_REMOTE_: false
 58 |         run: rcmdcheck::rcmdcheck(args = c("--no-manual"), error_on = "error", check_dir = "check")
 59 |         shell: Rscript {0}
 60 | 
 61 |       - name: Build pkgdown
 62 |         if: github.ref == 'refs/heads/master' 
 63 |         run: |
 64 |            PATH=$PATH:$HOME/bin/ Rscript -e 'pkgdown::build_site(".")'
 65 | 
 66 |       # deploy needs rsync? Seems so.
 67 |       - name: Install deploy dependencies
 68 |         if: github.ref == 'refs/heads/master'
 69 |         run: |
 70 |           apt-get update
 71 |           apt-get -y install rsync
 72 | 
 73 |       - name: Deploy 🚀
 74 |         uses: JamesIves/github-pages-deploy-action@releases/v4
 75 |         if: github.ref == 'refs/heads/master'
 76 |         with:
 77 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 78 |           BRANCH: gh-pages # The branch the action should deploy to.
 79 |           FOLDER: docs # The folder the action should deploy
 80 | 
 81 |   docker-build-and-push:
 82 |     #needs: r-build-and-check
 83 |     runs-on: ubuntu-latest
 84 |     permissions:
 85 |       contents: read
 86 |       packages: write
 87 |       # This is used to complete the identity challenge
 88 |       # with sigstore/fulcio when running outside of PRs.
 89 |       id-token: write
 90 |     steps:
 91 |       - name: Checkout repository
 92 |         uses: actions/checkout@v2
 93 | 
 94 |       - name: Set Environment Variables
 95 |         run: |
 96 |           REPO_LOWER="$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]')"
 97 |           REGISTRY=ghcr.io
 98 |           echo "BUILD_DATE=$(date +'%Y-%m-%d %H:%M:%S')" >> $GITHUB_ENV
 99 |           echo "GIT_SHA=$(echo ${{ github.sha }} | cut -c1-7)" >> $GITHUB_ENV
100 |           echo "REGISTRY=${REGISTRY}" >> $GITHUB_ENV
101 |           echo "IMAGE=${REGISTRY}/${REPO_LOWER}" >> $GITHUB_ENV
102 | 
103 |       - name: Show environment
104 |         run: |
105 |           env
106 |       # Install the cosign tool except on PR
107 |       # https://github.com/sigstore/cosign-installer
108 |       - name: Install cosign
109 |         if: github.event_name != 'pull_request'
110 |         uses: sigstore/cosign-installer@1e95c1de343b5b0c23352d6417ee3e48d5bcd422
111 |         with:
112 |           cosign-release: 'v1.4.0'
113 |       # Workaround: https://github.com/docker/build-push-action/issues/461
114 |       - name: Setup Docker buildx
115 |         uses: docker/setup-buildx-action@79abd3f86f79a9d68a23c75a09a9a85889262adf
116 |       # Login against a Docker registry except on PR
117 |       # https://github.com/docker/login-action
118 |       - name: Log into registry ${{ env.REGISTRY }}
119 |         if: github.event_name != 'pull_request'
120 |         uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c
121 |         with:
122 |           registry: ${{ env.REGISTRY }}
123 |           username: ${{ github.actor }}
124 |           password: ${{ secrets.GITHUB_TOKEN }}
125 | 
126 |       # Extract metadata (tags, labels) for Docker
127 |       # https://github.com/docker/metadata-action
128 |       - name: Extract Docker metadata
129 |         id: meta
130 |         uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
131 |         with:
132 |           images: ${{ env.IMAGE }}
133 |       # Build and push Docker image with Buildx (don't push on PR)
134 |       # https://github.com/docker/build-push-action
135 |       - name: Build and push Docker image
136 |         id: build-and-push
137 |         uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
138 |         with:
139 |           context: .
140 |           push: ${{ github.event_name != 'pull_request' }}
141 |           tags: |
142 |             ${{ env.IMAGE }}:latest
143 |             ${{ env.IMAGE }}:${{ env.GIT_SHA }}
144 |       # Sign the resulting Docker image digest except on PRs.
145 |       # This will only write to the public Rekor transparency log when the Docker
146 |       # repository is public to avoid leaking data.  If you would like to publish
147 |       # transparency data even for private images, pass --force to cosign below.
148 |       # https://github.com/sigstore/cosign
149 |       - name: Sign the published Docker image
150 |         if: ${{ github.event_name != 'pull_request' }}
151 |         env:
152 |           COSIGN_EXPERIMENTAL: "true"
153 |         # This step uses the identity token to provision an ephemeral certificate
154 |         # against the sigstore community Fulcio instance.
155 |         run: cosign sign ${{ env.IMAGE }}@${{ steps.build-and-push.outputs.digest }}
156 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | doc
 3 | Meta
 4 | .RData
 5 | .Rhistory
 6 | *.Rproj
 7 | bioc2022tidytranscriptomics.Rcheck/
 8 | bioc2022tidytranscriptomics*.tar.gz
 9 | bioc2022tidytranscriptomics*.tgz
10 | dev
11 | /doc/
12 | /Meta/
13 | .DS_Store
14 | ._.DS_Store
15 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | Contributing to TidyTranscriptomics Workshop
 2 | ===
 3 | 
 4 | :+1::tada: First off, thanks for taking the time to contribute! :tada::+1:
 5 | 
 6 | The following is a set of guidelines for contributing to this training material on GitHub.
 7 | 
 8 | # Table of contents
 9 | 
10 | - [What should I know before I get started?](#what-should-i-know-before-i-get-started)
11 | - [How can I contribute?](#how-can-i-contribute)
12 | - [How do I add new content?](#how-do-i-add-new-content)
13 | - [How is the training material maintained?](#how-is-the-training-material-maintained)
14 | 
15 | # What should I know before I get started?
16 | 
17 | This repository contains the files for the TidyTranscriptomics workshop.
18 | 
19 | By contributing, you agree that we may redistribute your work under [this repository's license](LICENSE).
20 | 
21 | We will address your issues and/or assess your change proposal as promptly as we can.
22 | 
23 | If you have any questions, you can reach us by creating an [Issue](https://github.com/tidytranscriptomics-workshops/iscb2021_tidytranscriptomics/issues/new/choose) in the workshop repository.
24 | 
25 | # How can I contribute?
26 | 
27 | You can report mistakes or errors, add suggestions, additions, updates or improvements for content. Whatever is your background, there is probably a way to do it: via the GitHub website, via command-line. If you feel it is too much, you can even write it with any text editor and contact us: we will work together to integrate it.
28 | 
29 | # How is the training material maintained?
30 | 
31 | ## Maintainers
32 | 
33 | The maintainers are listed in the [DESCRIPTION](https://github.com/tidytranscriptomics-workshops/iscb2021_tidytranscriptomics/blob/master/DESCRIPTION) file.
34 | 
35 | They are responsible for making sure issues and change requests are looked at. They have the final say over what is included in the training material.
36 | 
37 | ## Labels
38 | 
39 | This repository is using the following labels for issues, pull requests and project management:
40 | 
41 | - Type
42 |     - `bug`: errors to be fixed
43 |     - `improvement`: enhancement to an existing functionality
44 |     - `feature`: new functionality
45 |     - `discussion`: discussion threads
46 |     - `question`: often turn into discussion threads
47 | - Status
48 |     - `help-wanted`: requests for assistance
49 |     - `newcomer-friendly`: suitable for people who want to start contributing
50 |     - `work-in-progress`: someone is working on this
51 |     - `review-needed`: requests for review


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: bioc2022tidytranscriptomics
 2 | Title: Tidy Transcriptomics For Single-Cell RNA Sequencing Analyses
 3 | Version: 0.13.3
 4 | Authors@R: c(
 5 |     person("Stefano", "Mangiola", email="mangiola.s@wehi.edu.au",
 6 |     role = c("aut","cre"),
 7 |     comment = c(ORCID = "0000-0001-7474-836X")),
 8 |     person("Maria", "Doyle", email="maria.doyle@petermac.org",
 9 |     role = c("aut"),
10 |     comment = c(ORCID = "0000-0003-4847-8436")))
11 | Maintainer: Stefano Mangiola <mangiola.s@wehi.edu.au>, Maria Doyle <maria.doyle@petermac.org>
12 | Description: This workshop will showcase analysis of single-cell RNA sequencing data following the tidy data paradigm, using the tidySingleCellExperiment, tidySummarizedExperiment, tidybulk and tidyverse packages.
13 | License: CC BY-SA 4.0 + file LICENSE
14 | Encoding: UTF-8
15 | LazyData: true
16 | LazyDataCompression: xz
17 | Roxygen: list(markdown = TRUE)
18 | RoxygenNote: 7.2.0
19 | Depends:
20 |     R (>= 4.1.0)
21 | Imports:
22 |     tidySingleCellExperiment,
23 |     tidySummarizedExperiment,
24 |     tidybulk,
25 |     tidygate,
26 |     scater,
27 |     batchelor,
28 |     stats,
29 |     utils,
30 |     tibble,
31 |     stringr,
32 |     ggplot2,
33 |     dplyr,
34 |     readr,
35 |     tidyr,
36 |     purrr,
37 |     forcats,
38 |     ggrepel,
39 |     plotly,
40 |     colorspace,
41 |     scales,
42 |     uwot,
43 |     broom,
44 |     devtools,
45 |     rlang,
46 |     magrittr,
47 |     R.utils,
48 |     dittoSeq,
49 |     glue,
50 |     patchwork
51 | Suggests:
52 |     knitr,
53 |     rmarkdown,
54 |     pkgdown
55 | Biarch: true
56 | biocViews: RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, SingleCell, Transcription, Transcriptomics
57 | URL: https://tidytranscriptomics-workshops.github.io/bioc2022_tidytranscriptomics/
58 | BugReports: https://github.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/issues/new/choose
59 | VignetteBuilder: knitr
60 | DockerImage: ghcr.io/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics:latest
61 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM bioconductor/bioconductor_docker:devel
 2 | 
 3 | WORKDIR /home/rstudio
 4 | 
 5 | COPY --chown=rstudio:rstudio . /home/rstudio/
 6 | 
 7 | RUN Rscript -e "options(repos = c(CRAN = 'https://cran.r-project.org')); BiocManager::install(ask=FALSE)"
 8 | 
 9 | RUN Rscript -e "BiocManager::install(c('stemangiola/tidySingleCellExperiment@v1.7.4', 'stemangiola/tidySummarizedExperiment@v1.7.3', 'stemangiola/tidybulk@v1.9.2'))"
10 | 
11 | RUN Rscript -e "options(repos = c(CRAN = 'https://cran.r-project.org')); devtools::install('.', dependencies=TRUE, build_vignettes=TRUE, repos = BiocManager::repositories())"


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2020 Maria Doyle and Stefano Mangiola
2 | 
3 | This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 International License. To view a copy of this license, visit https://creativecommons.org/licenses/by-sa/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
4 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(aggregate_cells)
 4 | import(ggplot2)
 5 | importFrom(magrittr,equals)
 6 | importFrom(purrr,map)
 7 | importFrom(purrr,map2)
 8 | importFrom(purrr,when)
 9 | importFrom(rlang,quo_name)
10 | importFrom(rlang,quo_squash)
11 | 


--------------------------------------------------------------------------------
/R/aggregate_cells.R:
--------------------------------------------------------------------------------
  1 | #' Convert array of quosure (e.g. c(col_a, col_b)) into character vector
  2 | #' 
  3 | #' @keywords internal
  4 | #'
  5 | #' @importFrom rlang quo_name
  6 | #' @importFrom rlang quo_squash
  7 | #' @importFrom purrr when map map2
  8 | #' @importFrom magrittr equals
  9 | #' @import ggplot2 
 10 | #'
 11 | #' @param v A array of quosures (e.g. c(col_a, col_b))
 12 | #'
 13 | #' @return A character vector
 14 | quo_names <- function(v) {
 15 |   
 16 |   v = rlang::quo_name(rlang::quo_squash(v))
 17 |   gsub('^c\\(|`|\\)$', '', v) %>% 
 18 |     strsplit(', ') %>% 
 19 |     unlist 
 20 | }
 21 | 
 22 | #' Remove class to abject
 23 | #'
 24 | #'
 25 | #' @param var A tibble
 26 | #' @param name A character name of the class
 27 | #'
 28 | #' @return A tibble with an additional attribute
 29 | drop_class = function(var, name) {
 30 |   class(var) <- class(var)[!class(var)%in%name]
 31 |   var
 32 | }
 33 | 
 34 | get_specific_annotation_columns = function(.data, .col){
 35 |   
 36 |   
 37 |   # Comply with CRAN NOTES
 38 |   . = NULL
 39 |   
 40 |   # Make col names
 41 |   .col = enquo(.col)
 42 |   
 43 |   # x-annotation df
 44 |   n_x = .data %>% dplyr::distinct_at(vars(!!.col)) %>% nrow
 45 |   
 46 |   # element wise columns
 47 |   .data %>%
 48 |     select(-!!.col) %>%
 49 |     colnames %>%
 50 |     map(
 51 |       ~
 52 |         .x %>%
 53 |         when(
 54 |           .data %>%
 55 |             distinct_at(vars(!!.col, .x)) %>%
 56 |             nrow %>%
 57 |             magrittr::equals(n_x) ~ (.),
 58 |           ~ NULL
 59 |         )
 60 |     ) %>%
 61 |     
 62 |     # Drop NULL
 63 |     {	(.)[lengths((.)) != 0]	} %>%
 64 |     unlist
 65 |   
 66 | }
 67 | 
 68 | 
 69 | subset = 		function(.data,	 .column)	{
 70 |   # Make col names
 71 |   .column = enquo(.column)
 72 |   
 73 |   # Check if column present
 74 |   if(quo_names(.column) %in% colnames(.data) %>% all %>% `!`)
 75 |     stop("nanny says: some of the .column specified do not exist in the input data frame.")
 76 |   
 77 |   .data %>%
 78 |     
 79 |     # Selecting the right columns
 80 |     select(	!!.column,	get_specific_annotation_columns(.data, !!.column)	) %>%
 81 |     distinct()
 82 |   
 83 | }
 84 | 
 85 | #' @export
 86 | aggregate_cells = function(.data, .sample = NULL, slot = "data", assays = NULL, aggregation_function = Matrix::rowSums) {
 87 | 	
 88 | 	.sample = enquo(.sample)
 89 | 	
 90 | 	# Subset only wanted assays
 91 | 	if(!is.null(assays)){
 92 | 		.data@assays@data = .data@assays@data[assays]
 93 | 	}
 94 | 	
 95 | 	.data %>%
 96 | 		
 97 | 		tidySingleCellExperiment::nest(data = -!!.sample) %>%
 98 | 		mutate(.aggregated_cells = map_int(data, ~ ncol(.x))) %>% 
 99 | 		mutate(data = map(data, ~ 
100 | 												# loop over assays
101 | 												map2(
102 | 													as.list(assays(.x)), names(.x@assays),
103 | 													
104 | 													# Get counts
105 | 													~  .x %>%
106 | 														aggregation_function(na.rm = T) %>%
107 | 														tibble::enframe(
108 | 															name  = "feature",
109 | 															value = sprintf("%s", .y)
110 | 														) %>%
111 | 														mutate(feature = as.character(feature)) 
112 | 												) %>%
113 | 												Reduce(function(...) full_join(..., by=c("feature")), .)
114 | 											
115 | 		)) %>%
116 | 		left_join(.data %>% tidySingleCellExperiment::as_tibble() %>% subset(!!.sample), by = quo_names(.sample)) %>%
117 | 		tidySingleCellExperiment::unnest(data) %>%
118 | 		
119 | 		drop_class("tidySingleCellExperiment_nested") |> 
120 | 		
121 | 		as_SummarizedExperiment(.sample = !!.sample, .transcript = feature, .abundance = !!as.symbol(names(.data@assays)))
122 | 	
123 | }
124 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #'"sce_obj"
 2 | #'
 3 | #' A sce dataset of single cell RNA sequencing data
 4 | #'
 5 | #'
 6 | #' @format  A sce object.
 7 | #' @usage data(sce_obj)
 8 | "sce_obj"
 9 | 
10 | #' gate_sce_obj
11 | #'
12 | #' Coordinates for a gate interactively drawn using tidygate
13 | #'
14 | #'
15 | #' @format  A list containing x,y coordinates for one gate
16 | #' @usage data(gate_sce_obj)
17 | "gate_sce_obj"
18 | 
19 | 
20 | #' sce_obj_UMAP3
21 | #'
22 | #' A sce dataset of single cell RNA sequencing data with 3 UMAP dimesions
23 | #'
24 | #'
25 | #' @format  A sce object.
26 | #' @usage data(sce_obj_UMAP3)
27 | "sce_obj_UMAP3"
28 | 
29 | #' theme_multipanel
30 | #'
31 | #' A pretty theme
32 | #'
33 | #'
34 | #' @format  A theme ggplot2 object.
35 | #' @usage data(theme_multipanel)
36 | "theme_multipanel"
37 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!-- badges: start -->
 2 | [![DOI](https://zenodo.org/badge/496962211.svg)](https://zenodo.org/badge/latestdoi/496962211)
 3 | [![.github/workflows/basic_checks.yaml](https://github.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/workflows/.github/workflows/basic_checks.yaml/badge.svg)](https://github.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/actions) 	
 4 | <!-- badges: end -->
 5 | 
 6 | # Tidy Transcriptomics For Single-Cell RNA Sequencing Analyses
 7 | <p float="left">
 8 | <img style="height:100px;" alt="BioC2022" src="https://bioc2022.bioconductor.org/img/carousel/BioC2022.png"/>
 9 | <img style="height:100px;" alt="tidybulk" src="https://github.com/Bioconductor/BiocStickers/blob/master/tidybulk/tidybulk.png?raw=true"/>
10 | </p>
11 | 
12 | ## Workshop Description
13 | 
14 | This tutorial will showcase analysis of single-cell RNA sequencing data following the tidy data paradigm. The tidy data paradigm provides a standard way to organise data values within a dataset, where each variable is a column, each observation is a row, and data is manipulated using an easy-to-understand vocabulary. Most importantly, the data structure remains consistent across manipulation and analysis functions.
15 | 
16 | This can be achieved with the integration of packages present in the R CRAN and Bioconductor ecosystem, including [tidySingleCellExperiment](https://stemangiola.github.io/tidySingleCellExperiment/) and [tidyverse](https://www.tidyverse.org/). These packages are part of the tidytranscriptomics suite that introduces a tidy approach to RNA sequencing data representation and analysis. For more information see the [tidy transcriptomics blog](https://stemangiola.github.io/tidytranscriptomics/).
17 | 
18 | ### Pre-requisites
19 | 
20 | * Basic familiarity with single-cell transcriptomic analyses
21 | * Basic familiarity with tidyverse
22 | 
23 | ## Workshop goals and objectives
24 | 
25 | * To approach single-cell data representation and analysis though a tidy data paradigm, integrating tidyverse with tidySingleCellExperiment.
26 | * Compare SingleCellExperiment and tidy representation  
27 | * Apply tidy functions to SingleCellExperiment objects  
28 | * Reproduce a real-world case study that showcases the power of tidy single-cell methods
29 | 
30 | ### What you will learn
31 | 
32 | * Basic tidy operations possible with tidySingleCellExperiment
33 | * The differences between SingleCellExperiment representation and tidy representation
34 | * How to interface SingleCellExperiment with tidy manipulation and visualisation
35 | * A real-world case study that will showcase the power of tidy single-cell methods compared with base/ad-hoc methods
36 | 
37 | ### What you will not learn
38 | 
39 | * The molecular technology of single-cell sequencing
40 | * The fundamentals of single-cell data analysis
41 | * The fundamentals of tidy data analysis
42 | 
43 | ### Workshop Participation
44 | 
45 | The workshop format is a 1.5 hour session consisting of hands-on demos, exercises and Q&A.
46 | 
47 | ## Syllabus
48 | 
49 | Material [web page](https://tidytranscriptomics-workshops.github.io/bioc2022_tidytranscriptomics/articles/tidytranscriptomics_case_study.html). More details on the workshop are below.
50 | 
51 | ## Workshop package installation 
52 | 
53 | For the BioC2022 workshop, an RStudio in the cloud will be provided with everything installed, all that participants will need is a web browser. 
54 | 
55 | If you want to install the packages and material post-workshop, they can be installed using one of the two ways below. The workshop is designed for R `4.2` and Bioconductor 3.16.
56 | 
57 | ### Via Docker image
58 | 
59 | If you're familiar with [Docker](https://docs.docker.com/get-docker/) you could use the Docker image which has all the software pre-configured to the correct versions.
60 | 
61 | ```
62 | docker run -e PASSWORD=abc -p 8787:8787 ghcr.io/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics
63 | ```
64 | 
65 | Once running, navigate to <http://localhost:8787/> and then login with
66 | `Username:rstudio` and `Password:abc`.
67 | 
68 | You should see the Rmarkdown file with all the workshop code which you can run.
69 | 
70 | ### Via GitHub
71 | 
72 | Alternatively, you could install the workshop using the commands below in R `4.2`.
73 | 
74 | ```
75 | #install.packages('remotes')
76 | 
77 | # Need to set this to prevent installation erroring due to even tiny warnings, similar to here: https://github.com/r-lib/remotes/issues/403#issuecomment-748181946
78 | Sys.setenv("R_REMOTES_NO_ERRORS_FROM_WARNINGS" = "true")
79 | 
80 | # Install same versions used in the workshop
81 | remotes::install_github(c("stemangiola/tidySingleCellExperiment@v1.7.4", "stemangiola/tidySummarizedExperiment@v1.7.3", "stemangiola/tidybulk@v1.9.2"))
82 | 
83 | # Install workshop package
84 | 
85 | remotes::install_github("tidytranscriptomics-workshops/bioc2022_tidytranscriptomics", build_vignettes = TRUE)
86 | 
87 | # To view vignettes
88 | library(bioc2022tidytranscriptomics)
89 | browseVignettes("bioc2022tidytranscriptomics")
90 | ```
91 | 
92 | To run the code, you could then copy and paste the code from the workshop vignette or [R markdown file](https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/master/vignettes/tidytranscriptomics.Rmd) into a new R Markdown file on your computer.
93 | 
94 | ## Instructor names and contact information
95 | 
96 | * Stefano Mangiola <mangiola.s at wehi.edu.au>
97 | * Maria Doyle <Maria.Doyle at petermac.org>  
98 | 
99 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://tidytranscriptomics-workshops.github.io/iscb2021_tidytranscriptomics
 2 | 
 3 | template:
 4 |   params:
 5 |     bootswatch: flatly
 6 |     ganalytics: UA-93043521-1
 7 | 
 8 | home:
 9 |   title: "TidyTranscriptomics"
10 |   type: inverse
11 |   
12 | toc:
13 |   depth: 4
14 |   
15 | navbar:
16 |   title: ~
17 |   type: default
18 |   left:
19 |     - text: Workshop
20 |       href: articles/tidytranscriptomics_case_study.html
21 |     - text: Supplementary 
22 |       href: articles/supplementary.html
23 | 
24 |   right:
25 |    - icon: fab fa-github
26 |      href: https://github.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics
27 |    - icon: fab fa-docker
28 |      href: https://ghcr.io/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics:latest
29 |      
30 | 


--------------------------------------------------------------------------------
/data-raw/seurat_obj.R:
--------------------------------------------------------------------------------
 1 | 
 2 | library(tidyverse)
 3 | library(glue)
 4 | library(Seurat)
 5 | library(tidyseurat)
 6 | library(tidySingleCellExperiment)
 7 | 
 8 | seurat_obj <- readRDS("/stornext/Bioinf/data/bioinf-data/Papenfuss_lab/projects/mangiola.s/PostDoc/oligo_breast/expanded_analyses_with_control/cancer_only_analyses/lymphoid/cancer_lymphoid_cell_type_curated.rds")
 9 | 
10 | set.seed(123)
11 | seurat_obj = seurat_obj |> RunPCA(npcs = 20) |> select(-contains("UMAP")) |> RunUMAP(dims=1:20)
12 | seurat_obj = seurat_obj |> select(.cell, file, 3, 8, 9, S.Score, G2M.Score , Phase , curated_cell_type , contains("UMAP"))
13 | 
14 | seurat_obj = seurat_obj %>% filter(.cell %in% (seurat_obj %>% sample_n(3000) %>%  pull(.cell) %>% c(seurat_obj %>% filter(grepl("Delta", curated_cell_type)) %>% pull(.cell)) %>% unique))
15 | 
16 | seurat_obj = seurat_obj %>% FindVariableFeatures(assay="RNA", nfeatures = 500)
17 | seurat_obj = seurat_obj[VariableFeatures(seurat_obj, assay="RNA") %>% c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"),]
18 | seurat_obj = seurat_obj %>% mutate(
19 | 	file = factor(file), Barcode = factor(Barcode), batch= factor(batch), BCB= factor(BCB), Phase= factor(Phase), curated_cell_type= factor(curated_cell_type),	
20 | 	nCount_RNA = as.integer(nCount_RNA), nFeature_RNA= as.integer(nFeature_RNA), nCount_SCT= as.integer(nCount_SCT), nFeature_SCT= as.integer(nFeature_SCT)
21 | ) 
22 | #seurat_obj[["SCT"]]@scale.data = seurat_obj[["SCT"]]@scale.data[c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"),]
23 | #seurat_obj[["SCT"]]@data = seurat_obj[["SCT"]]@data[c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"),]
24 | DefaultAssay(seurat_obj) = "SCT"
25 | seurat_obj[["integrated"]] = NULL
26 | 
27 | sce_obj = seurat_obj %>%
28 | 	as.SingleCellExperiment() |>
29 | 	
30 | 	# Add factor of interest
31 | 	nest(data = -file) |> 
32 | 	mutate(condition = sample(c("treated", "untreated"), n(), replace = TRUE)) |>
33 | 	unnest(data)
34 | 
35 | # Parse
36 | sce_obj = 
37 | 	sce_obj |> 
38 | 	select(-condition) |>  
39 | 	left_join(readRDS("~/metadata_oligo.rds")) |> 
40 | 	rename(treatment = type) |> 
41 | 	mutate(treatment = if_else(treatment=="OMBC", "treated", "untreated")) |> 
42 | 	#select(-file) |> 
43 | 	mutate(sample = glue("S{as.integer(as.factor(sample))}")) |> 
44 | 	rename(cell_type = curated_cell_type) |> 
45 | 
46 | 	# filtering because of to few samples per cell types
47 | 	filter(cell_type !="CD8+_Tem") |> 
48 | 	
49 | 	# Replace file path
50 | 	mutate(file = file |> str_replace("bhupinder_10X_260819", "single_cell"))
51 | 
52 | # job::job({
53 | 	save(sce_obj , file="data/sce_obj.rda", compress = "xz")
54 | # })
55 | 


--------------------------------------------------------------------------------
/data-raw/seurat_obj_UMAP3.R:
--------------------------------------------------------------------------------
 1 | library(tidySingleCellExperiment)
 2 | 
 3 | seurat_obj_UMAP3 = 
 4 |   seurat_obj_for_BioCAsia2021 %>% 
 5 |   RunUMAP(dims = 1:30, n.components = 3L, spread    = 0.5,min.dist  = 0.01, n.neighbors = 10L)
 6 | 
 7 | seurat_obj_UMAP3[["RNA"]] = NULL
 8 | seurat_obj_UMAP3[["SCT"]] = NULL
 9 | seurat_obj_UMAP3 = seurat_obj_UMAP3[1,] 
10 | 
11 | 
12 | sce_obj_UMAP3 = sce_obj_UMAP3 |> rename(cell_type = curated_cell_type)
13 | 
14 | #seurat_obj_UMAP3 %>% saveRDS("~/PostDoc/workshops/bioc2022_tidytranscriptomics/dev/seurat_obj_UMAP3.rds", compress = "xz")
15 | save(sce_obj_UMAP3, file="data/sce_obj_UMAP3.rda", compress = "xz")
16 | 
17 | 
18 | seurat_obj_UMAP3 |>
19 |   
20 |   plot_ly(
21 |     x = ~`UMAP_1`,
22 |     y = ~`UMAP_2`,
23 |     z = ~`UMAP_3`,
24 |     color = ~curated_cell_type,
25 |     size=0.05
26 |   )
27 | 


--------------------------------------------------------------------------------
/data-raw/theme.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #   scale_fill_manual(values = friendly_cols)
 4 | #   scale_color_manual(values = friendly_cols)
 5 | 
 6 | library(ggplot2)
 7 | 
 8 | theme_multipanel =
 9 | 	theme_bw() +
10 | 	theme(
11 | 		panel.border = element_blank(),
12 | 		axis.line = element_line(size=0.1),
13 | 		panel.grid.major = element_blank(), 
14 | 		panel.grid.minor = element_blank(),
15 | 		legend.position = "bottom",
16 | 		strip.background = element_blank(),
17 | 		axis.title.y = element_text(margin = margin(t = 0, r = 0, b = 0, l = 0), size = 7),
18 | 		axis.title.x = element_text(margin = margin(t = 0, r = 0, b = 0, l = 0), size = 7),
19 | 		panel.spacing.x=unit(0.1, "lines"),
20 | 		axis.text.x = element_text(size=6),
21 | 		axis.text.y = element_text(size=6),
22 | 		strip.text.x = element_text(size = 7),
23 | 		strip.text.y = element_text(size = 7),
24 | 		
25 | 		# legend
26 | 		legend.key.size = unit(5, 'mm'),
27 | 		legend.title = element_text(size=7),
28 | 		legend.text = element_text(size=6),
29 | 		
30 | 		# Avoid text clipping for facets. Currently not merged remotes::install_github("tidyverse/ggplot2#4223")
31 | 		#strip.clip = "off",
32 | 		
33 | 		# Title
34 | 		plot.title = element_text(size=7),
35 | 		
36 | 		axis.line.x = element_line(size=0.2),
37 | 		axis.line.y = element_line(size=0.2),
38 | 		axis.ticks.x = element_line(size=0.2),
39 | 		axis.ticks.y = element_line(size=0.2)
40 | 	)
41 | 
42 | # Patchwork
43 | # +  plot_layout(guides = 'collect' ) + plot_annotation(tag_levels = c('A')) & theme( plot.margin = margin(0, 0, 0, 0, "pt"),  legend.key.size = unit(0.2, 'cm'))
44 | 
45 | save(theme_multipanel, file="data/theme_multipanel.rda", compress = "xz")
46 | 


--------------------------------------------------------------------------------
/data/gate_sce_obj.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/data/gate_sce_obj.rda


--------------------------------------------------------------------------------
/data/sce_obj.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/data/sce_obj.rda


--------------------------------------------------------------------------------
/data/sce_obj_UMAP3.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/data/sce_obj_UMAP3.rda


--------------------------------------------------------------------------------
/data/theme_multipanel.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/data/theme_multipanel.rda


--------------------------------------------------------------------------------
/inst/.gitignore:
--------------------------------------------------------------------------------
1 | ._.smbdeleteAAA240c2803
2 | ._bioc2022_tidytranscriptomics.pdf
3 | .smbdeleteAAA240c2803
4 | 


--------------------------------------------------------------------------------
/inst/bioc2022_tidytranscriptomics.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/bioc2022_tidytranscriptomics.pdf


--------------------------------------------------------------------------------
/inst/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | Thumbs.db
2 | 


--------------------------------------------------------------------------------
/inst/vignettes/ScreenShot2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/ScreenShot2.png


--------------------------------------------------------------------------------
/inst/vignettes/ScreenShot3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/ScreenShot3.png


--------------------------------------------------------------------------------
/inst/vignettes/blog_screenshot.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/blog_screenshot.PNG


--------------------------------------------------------------------------------
/inst/vignettes/bulk_RNAseq_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/bulk_RNAseq_pipeline.png


--------------------------------------------------------------------------------
/inst/vignettes/bulk_vs_single.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/bulk_vs_single.jpg


--------------------------------------------------------------------------------
/inst/vignettes/nesting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/nesting.png


--------------------------------------------------------------------------------
/inst/vignettes/plotly_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/plotly_2.png


--------------------------------------------------------------------------------
/inst/vignettes/roadmap_integration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/roadmap_integration.png


--------------------------------------------------------------------------------
/inst/vignettes/rstudio_cloud.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/rstudio_cloud.png


--------------------------------------------------------------------------------
/inst/vignettes/single_cell_RNAseq_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/single_cell_RNAseq_pipeline.png


--------------------------------------------------------------------------------
/inst/vignettes/tidybulk_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/tidybulk_logo.png


--------------------------------------------------------------------------------
/inst/vignettes/tidydata_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/tidydata_1.jpg


--------------------------------------------------------------------------------
/inst/vignettes/tidytranscriptomics.bib:
--------------------------------------------------------------------------------
 1 | @article{wickham2019welcome,
 2 |   title={Welcome to the Tidyverse},
 3 |   author={Wickham, Hadley and Averick, Mara and Bryan, Jennifer and Chang, Winston and McGowan, Lucy D'Agostino and Fran{\c{c}}ois, Romain and Grolemund, Garrett and Hayes, Alex and Henry, Lionel and Hester, Jim and others},
 4 |   journal={Journal of Open Source Software},
 5 |   volume={4},
 6 |   number={43},
 7 |   pages={1686},
 8 |   year={2019}
 9 | }
10 | 
11 | @article{butler2018integrating,
12 |   title={Integrating single-cell transcriptomic data across different conditions, technologies, and species},
13 |   author={Butler, Andrew and Hoffman, Paul and Smibert, Peter and Papalexi, Efthymia and Satija, Rahul},
14 |   journal={Nature biotechnology},
15 |   volume={36},
16 |   number={5},
17 |   pages={411--420},
18 |   year={2018},
19 |   publisher={Nature Publishing Group}
20 | }
21 | 
22 | @article{stuart2019comprehensive,
23 |   title={Comprehensive integration of single-cell data},
24 |   author={Stuart, Tim and Butler, Andrew and Hoffman, Paul and Hafemeister, Christoph and Papalexi, Efthymia and Mauck III, William M and Hao, Yuhan and Stoeckius, Marlon and Smibert, Peter and Satija, Rahul},
25 |   journal={Cell},
26 |   volume={177},
27 |   number={7},
28 |   pages={1888--1902},
29 |   year={2019},
30 |   publisher={Elsevier}
31 | }
32 | 
33 | @article{Pizzolato2019,
34 |   doi = {10.1073/pnas.1818488116},
35 |   url = {https://doi.org/10.1073/pnas.1818488116},
36 |   year = {2019},
37 |   month = may,
38 |   publisher = {Proceedings of the National Academy of Sciences},
39 |   pages = {201818488},
40 |   author = {Gabriele Pizzolato and Hannah Kaminski and Marie Tosolini and Don-Marc Franchini and Fr{\'{e}}deric Pont and Fr{\'{e}}deric Martins and Carine Valle and Delphine Labourdette and Sarah Cadot and Anne Quillet-Mary and Mary Poupot and Camille Laurent and Loic Ysebaert and Serena Meraviglia and Francesco Dieli and Pierre Merville and Pierre Milpied and Julie D{\'{e}}chanet-Merville and Jean-Jacques Fourni{\'{e}}},
41 |   title = {Single-cell {RNA} sequencing unveils the shared and the distinct cytotoxic hallmarks of human {TCRV}$\updelta$1 and {TCRV}$\updelta$2 $\upgamma$$\updelta$ T lymphocytes},
42 |   journal = {Proceedings of the National Academy of Sciences}
43 | }
44 | 
45 | 


--------------------------------------------------------------------------------
/inst/vignettes/tidyverse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/tidyverse.png


--------------------------------------------------------------------------------
/inst/vignettes/transcriptomics.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/inst/vignettes/transcriptomics.jpg


--------------------------------------------------------------------------------
/man/drop_class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/aggregate_cells.R
 3 | \name{drop_class}
 4 | \alias{drop_class}
 5 | \title{Remove class to abject}
 6 | \usage{
 7 | drop_class(var, name)
 8 | }
 9 | \arguments{
10 | \item{var}{A tibble}
11 | 
12 | \item{name}{A character name of the class}
13 | }
14 | \value{
15 | A tibble with an additional attribute
16 | }
17 | \description{
18 | Remove class to abject
19 | }
20 | 


--------------------------------------------------------------------------------
/man/figures/BioC2022_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/man/figures/BioC2022_logo.png


--------------------------------------------------------------------------------
/man/figures/new_SE_usage-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/d6d8538efc381dedb3391b651d61f451536c73bf/man/figures/new_SE_usage-01.png


--------------------------------------------------------------------------------
/man/gate_sce_obj.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{gate_sce_obj}
 5 | \alias{gate_sce_obj}
 6 | \title{gate_sce_obj}
 7 | \format{
 8 | A list containing x,y coordinates for one gate
 9 | }
10 | \usage{
11 | data(gate_sce_obj)
12 | }
13 | \description{
14 | Coordinates for a gate interactively drawn using tidygate
15 | }
16 | \keyword{datasets}
17 | 


--------------------------------------------------------------------------------
/man/quo_names.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/aggregate_cells.R
 3 | \name{quo_names}
 4 | \alias{quo_names}
 5 | \title{Convert array of quosure (e.g. c(col_a, col_b)) into character vector}
 6 | \usage{
 7 | quo_names(v)
 8 | }
 9 | \arguments{
10 | \item{v}{A array of quosures (e.g. c(col_a, col_b))}
11 | }
12 | \value{
13 | A character vector
14 | }
15 | \description{
16 | Convert array of quosure (e.g. c(col_a, col_b)) into character vector
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/sce_obj.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{sce_obj}
 5 | \alias{sce_obj}
 6 | \title{"sce_obj"}
 7 | \format{
 8 | A sce object.
 9 | }
10 | \usage{
11 | data(sce_obj)
12 | }
13 | \description{
14 | A sce dataset of single cell RNA sequencing data
15 | }
16 | \keyword{datasets}
17 | 


--------------------------------------------------------------------------------
/man/sce_obj_UMAP3.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{sce_obj_UMAP3}
 5 | \alias{sce_obj_UMAP3}
 6 | \title{sce_obj_UMAP3}
 7 | \format{
 8 | A sce object.
 9 | }
10 | \usage{
11 | data(sce_obj_UMAP3)
12 | }
13 | \description{
14 | A sce dataset of single cell RNA sequencing data with 3 UMAP dimesions
15 | }
16 | \keyword{datasets}
17 | 


--------------------------------------------------------------------------------
/man/theme_multipanel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{theme_multipanel}
 5 | \alias{theme_multipanel}
 6 | \title{theme_multipanel}
 7 | \format{
 8 | A theme ggplot2 object.
 9 | }
10 | \usage{
11 | data(theme_multipanel)
12 | }
13 | \description{
14 | A pretty theme
15 | }
16 | \keyword{datasets}
17 | 


--------------------------------------------------------------------------------
/vignettes/solutions.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Solutions"
 3 | output: rmarkdown::html_vignette
 4 | vignette: >
 5 |   %\VignetteIndexEntry{Solutions}
 6 |   %\VignetteEngine{knitr::rmarkdown}
 7 |   %\VignetteEncoding{UTF-8}
 8 | ---
 9 | 
10 | ```{r message = FALSE}
11 | library(SingleCellExperiment)
12 | library(ggplot2)
13 | library(plotly)
14 | library(dplyr)
15 | library(colorspace)
16 | library(dittoSeq)
17 | library(tidySingleCellExperiment)
18 | 
19 | sce_obj <- bioc2022tidytranscriptomics::sce_obj
20 | ```
21 | 
22 | ## Question 1
23 | 
24 | What proportion of all cells are gamma-delta T cells? Use signature_score > 0.7 to identify gamma-delta T cells.
25 | 
26 | ```{r}
27 | sce_obj |>
28 |   
29 |   join_features(
30 |     features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"),
31 |     shape = "wide"
32 |   ) |>
33 |   
34 |   mutate(signature_score =
35 |            scales::rescale(CD3D + TRDC + TRGC1 + TRGC2, to=c(0,1)) -
36 |            scales::rescale(CD8A + CD8B, to=c(0,1))
37 |   ) |>
38 | 
39 |   mutate(gamma_delta = signature_score > 0.7) |>
40 |   
41 |   count(gamma_delta) |> 
42 |   summarise(proportion = n/sum(n))
43 | ```
44 | 
45 | ## Question 2
46 | 
47 | There is a cluster of cells characterised by a low RNA output (nCount_RNA < 100). Identify the cell composition (cell_type) of that cluster.
48 | 
49 | 
50 | ```{r}
51 | sce_obj |>
52 |     filter(nCount_RNA < 100) %>% 
53 |     count(cell_type)
54 | ```
55 | 
56 | 


--------------------------------------------------------------------------------
/vignettes/supplementary.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Supplementary Material"
 3 | output: rmarkdown::html_vignette
 4 | vignette: >
 5 |   %\VignetteIndexEntry{Supplementary Material}
 6 |   %\VignetteEngine{knitr::rmarkdown}
 7 |   %\VignetteEncoding{UTF-8}
 8 | ---
 9 | 
10 | ```{r setup, include=FALSE}
11 | knitr::opts_chunk$set(echo = TRUE)
12 | ```
13 | 
14 | 
15 | ```{r message = FALSE}
16 | library(ggplot2)
17 | library(plotly)
18 | library(dplyr)
19 | library(colorspace)
20 | library(dittoSeq)
21 | library(tidySingleCellExperiment)
22 | library(tidygate)
23 | 
24 | sce_obj <- bioc2022tidytranscriptomics::sce_obj
25 | ```
26 | 
27 | 
28 | Instead of filtering using a specified threshold, the gamma delta T cells could be interactively selected from the plot using the tidygate package.
29 | 
30 | ```{r eval = FALSE}
31 | sce_obj |>
32 | 
33 |   join_features(
34 |     features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B" ), shape = "wide"
35 |   ) |>
36 | 
37 |   mutate(signature_score =
38 |            scales::rescale(CD3D + TRDC + TRGC1+ TRGC2, to=c(0,1)) -
39 |            scales::rescale(CD8A + CD8B, to=c(0,1))
40 |   ) |>
41 | 
42 |   mutate(gate = gate_int(
43 |     UMAP_1, UMAP_2,
44 |     .size = 0.1,
45 |     .color =signature_score
46 |   ))
47 | 
48 | ```
49 | 
50 | After the selection we could reload from a file the gate that was drawn, for reproducibility.
51 | 
52 | ```{r}
53 | sce_obj |>
54 | 
55 |   join_features(
56 |     features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B" ), shape = "wide"
57 | 
58 |   ) |>
59 | 
60 |   mutate(signature_score =
61 |            scales::rescale(CD3D + TRDC + TRGC1+ TRGC2, to=c(0,1)) -
62 |            scales::rescale(CD8A + CD8B, to=c(0,1))
63 |   ) |>
64 | 
65 |   mutate(gate = gate_int(
66 |     UMAP_1, UMAP_2,
67 |     .size = 0.1,
68 |     .color =signature_score,
69 |     gate_list = bioc2022tidytranscriptomics::gate_sce_obj
70 |   ))
71 | 
72 | ```
73 | 
74 | The dataset can be filtered for just these cells using tidyverse `filter`.
75 | 
76 | ```{r}
77 | sce_obj_gamma_delta <-
78 |     
79 |   sce_obj |>
80 | 
81 |   join_features(
82 |     features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B" ), shape = "wide"
83 | 
84 |   ) |>
85 | 
86 |   mutate(signature_score =
87 |            scales::rescale(CD3D + TRDC + TRGC1+ TRGC2, to=c(0,1)) -
88 |            scales::rescale(CD8A + CD8B, to=c(0,1))
89 |   ) |>
90 | 
91 |   mutate(gate = gate_int(UMAP_1, UMAP_2, gate_list = bioc2022tidytranscriptomics::gate_sce_obj)) |>
92 | 
93 |   filter(gate == 1)
94 | ```
95 | 


--------------------------------------------------------------------------------
/vignettes/tidytranscriptomics_case_study.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Tidy Transcriptomics For Single-Cell RNA Sequencing Analyses"
  3 | author:
  4 |   - Stefano Mangiola, Walter and Eliza Hall Institute^[<mangiola.s at wehi.edu.au>]
  5 |   - Maria Doyle, Peter MacCallum Cancer Centre^[<maria.doyle at petermac.org>]
  6 | output: rmarkdown::html_vignette
  7 | bibliography: "`r file.path(system.file(package='bioc2022tidytranscriptomics', 'vignettes'), 'tidytranscriptomics.bib')`"
  8 | vignette: >
  9 |   %\VignetteIndexEntry{Tidy Transcriptomics For Single-Cell RNA Sequencing Analyses}
 10 |   %\VignetteEngine{knitr::rmarkdown}
 11 |   %\VignetteEncoding{UTF-8}
 12 | ---
 13 | 
 14 | ```{r setup, include=FALSE}
 15 | knitr::opts_chunk$set(echo = TRUE)
 16 | ```
 17 | 
 18 | # Workshop introduction
 19 | 
 20 | <p float="left">
 21 | <img style="height:100px;" alt="BioC2022" src="https://bioc2022.bioconductor.org/img/carousel/BioC2022.png"/>
 22 | <img style="height:100px;" alt="tidybulk" src="https://github.com/Bioconductor/BiocStickers/blob/master/tidybulk/tidybulk.png?raw=true"/>
 23 | </p>
 24 | 
 25 | ## Instructors
 26 | 
 27 | *Dr. Stefano Mangiola* is currently a Postdoctoral researcher in the laboratory of Prof. Tony Papenfuss at the Walter and Eliza Hall Institute in Melbourne, Australia. His background spans from biotechnology to bioinformatics and biostatistics. His research focuses on prostate and breast tumour microenvironment, the development of statistical models for the analysis of RNA sequencing data, and data analysis and visualisation interfaces.
 28 | 
 29 | *Dr. Maria Doyle* is the Application and Training Specialist for Research Computing at the Peter MacCallum Cancer Centre in Melbourne, Australia. She has a PhD in Molecular Biology and currently works in bioinformatics and data science education and training. She is passionate about supporting researchers, reproducible research, open source and tidy data.
 30 | 
 31 | ## Description
 32 | 
 33 | This tutorial will showcase analysis of single-cell RNA sequencing data following the tidy data paradigm. The tidy data paradigm provides a standard way to organise data values within a dataset, where each variable is a column, each observation is a row, and data is manipulated using an easy-to-understand vocabulary. Most importantly, the data structure remains consistent across manipulation and analysis functions.
 34 | 
 35 | This can be achieved with the integration of packages present in the R CRAN and Bioconductor ecosystem, including [tidySingleCellExperiment](https://stemangiola.github.io/tidySingleCellExperiment/), [tidySummarizedExperiment](https://stemangiola.github.io/tidySummarizedExperiment/), [tidybulk](https://stemangiola.github.io/tidybulk/) and [tidyverse](https://www.tidyverse.org/). These packages are part of the tidytranscriptomics suite that introduces a tidy approach to RNA sequencing data representation and analysis. For more information see the [tidy transcriptomics blog](https://stemangiola.github.io/tidytranscriptomics/).
 36 | 
 37 | ### Pre-requisites
 38 | 
 39 | * Basic familiarity with single-cell transcriptomic analyses
 40 | * Basic familiarity with tidyverse
 41 | 
 42 | ## Goals and objectives
 43 | 
 44 | * To approach single-cell data representation and analysis through a tidy data paradigm, integrating tidyverse with tidySingleCellExperiment.
 45 | * Compare SingleCellExperiment and tidy representation  
 46 | * Apply tidy functions to SingleCellExperiment objects  
 47 | * Reproduce a real-world case study that showcases the power of tidy single-cell methods
 48 | 
 49 | ### What you will learn
 50 | 
 51 | -   Basic `tidy` operations possible with `tidySingleCellExperiment`
 52 | -   The differences between `SingleCellExperiment` representation and `tidy` representation
 53 | -   How to interface `SingleCellExperiment` with tidy manipulation and visualisation
 54 | -   A real-world case study that will showcase the power of `tidy` single-cell methods compared with base/ad-hoc methods
 55 | 
 56 | ### What you will *not* learn
 57 | 
 58 | -   The molecular technology of single-cell sequencing
 59 | -   The fundamentals of single-cell data analysis
 60 | -   The fundamentals of tidy data analysis
 61 | 
 62 | This workshop will demonstrate a real-world example of using tidy transcriptomics packages to analyse single cell data. This workshop is not a step-by-step introduction in how to perform single-cell analysis. For an overview of single-cell analysis steps performed in a tidy way please see the [ISMB2021 workshop](https://tidytranscriptomics-workshops.github.io/ismb2021_tidytranscriptomics/articles/tidytranscriptomics.html).
 63 | 
 64 | ## Getting started
 65 | 
 66 | ### Cloud
 67 | 
 68 | Easiest way to run this material. We will use the Orchestra Cloud platform during the BioC2022 workshop.
 69 | 
 70 | 1. Go to [Orchestra](http://app.orchestra.cancerdatasci.org/).
 71 | 2. Log in.
 72 | 3. Find the workshop. In the search box type bioc2022, sort by Created column, and select the most recently created workshop called "BioC2022: Tidy Transcriptomics For Single-Cell RNA Sequencing Analyses" **There are several tidy transcriptomics workshops. Be sure to select the BioC2022 one with the most recent created date**.
 73 | 4. Click "Launch" (may take a minute or two). 
 74 | 5. Follow instructions.. **Do not share your personalized URL for the RStudio session, or use the trainers, as only one browser at a time can be connected.**
 75 | 6. Open `tidytranscriptomics_case_study.Rmd` in `bioc2022_tidytranscriptomcs/vignettes` folder
 76 | 
 77 | ### Local
 78 | 
 79 | We will use the Orchestra Cloud platform during the BioC2022 workshop and this method is available if you want to run the material after the workshop. If you want to install on your own computer, see instructions [here](https://tidytranscriptomics-workshops.github.io/bioc2022_tidytranscriptomics/index.html#workshop-package-installation).
 80 | 
 81 | Alternatively, you can view the material at the workshop webpage [here](https://tidytranscriptomics-workshops.github.io/bioc2022_tidytranscriptomics/articles/tidytranscriptomics_case_study.html).
 82 | 
 83 | ## Slides
 84 | 
 85 | *The embedded slides below may take a minute to appear. You can also view or download [here](https://github.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/blob/master/inst/bioc2022_tidytranscriptomics.pdf).*
 86 | 
 87 | <iframe 
 88 |     src="https://docs.google.com/gview?url=https://raw.githubusercontent.com/tidytranscriptomics-workshops/bioc2022_tidytranscriptomics/master/inst/bioc2022_tidytranscriptomics.pdf&embedded=true" 
 89 |     scrolling="yes" 
 90 |     style="width:100%; height:600px;" 
 91 |     frameborder="0">
 92 | </iframe>
 93 | 
 94 | 
 95 | # Part 1 Introduction to tidySingleCellExperiment
 96 | 
 97 | ```{r message = FALSE}
 98 | # Load packages
 99 | library(SingleCellExperiment)
100 | library(ggplot2)
101 | library(plotly)
102 | library(dplyr)
103 | library(colorspace)
104 | library(dittoSeq)
105 | ```
106 | 
107 | SingleCellExperiment is a very popular analysis toolkit for single cell RNA sequencing data [@butler2018integrating; @stuart2019comprehensive].
108 | 
109 | Here we load single-cell data in SingleCellExperiment object format. This data is peripheral blood mononuclear cells (PBMCs) from metastatic breast cancer patients.
110 | 
111 | 
112 | ```{r}
113 | # load single cell RNA sequencing data
114 | sce_obj <- bioc2022tidytranscriptomics::sce_obj
115 | 
116 | # take a look
117 | sce_obj
118 | ```
119 | 
120 | tidySingleCellExperiment provides a bridge between the SingleCellExperiment single-cell package and the tidyverse [@wickham2019welcome]. It creates an invisible layer that enables viewing the SingleCellExperiment object as a tidyverse tibble, and provides SingleCellExperiment-compatible *dplyr*, *tidyr*, *ggplot* and *plotly* functions.
121 | 
122 | If we load the *tidySingleCellExperiment* package and then view the single cell data, it now displays as a tibble.
123 | 
124 | ```{r message = FALSE}
125 | library(tidySingleCellExperiment)
126 | 
127 | sce_obj
128 | ```
129 | 
130 | If we want to revert to the standard SingleCellExperiment view we can do that.
131 | 
132 | ```{r}
133 | options("restore_SingleCellExperiment_show" = TRUE)
134 | sce_obj
135 | ```
136 | 
137 | If we want to revert back to tidy SingleCellExperiment view we can.
138 | 
139 | ```{r}
140 | options("restore_SingleCellExperiment_show" = FALSE)
141 | sce_obj
142 | ```
143 | 
144 | It can be interacted with using [SingleCellExperiment commands](https://bioconductor.org/packages/devel/bioc/vignettes/SingleCellExperiment/inst/doc/intro.html) such as `assays`.
145 | 
146 | ```{r}
147 | assays(sce_obj)
148 | ```
149 | 
150 | We can also interact with our object as we do with any tidyverse tibble.
151 | 
152 | ## Tidyverse commands
153 | 
154 | We can use tidyverse commands, such as `filter`, `select` and `mutate` to explore the tidySingleCellExperiment object. Some examples are shown below and more can be seen at the tidySingleCellExperiment website [here](https://stemangiola.github.io/tidySingleCellExperiment/articles/introduction.html#tidyverse-commands-1).
155 | 
156 | We can use `filter` to choose rows, for example, to see just the rows for the cells in G1 cell-cycle stage.
157 | 
158 | ```{r}
159 | sce_obj |> filter(Phase == "G1")
160 | ```
161 | 
162 | We can use `select` to view columns, for example, to see the filename, total cellular RNA abundance and cell phase.
163 | 
164 | * If we use `select` we will also get any view-only columns returned, such as the UMAP columns generated during the preprocessing.
165 | 
166 | ```{r}
167 | sce_obj |> select(.cell, file, nCount_RNA, Phase)
168 | ```
169 | 
170 | We can use `mutate` to create a column. For example, we could create a new `Phase_l` column that contains a lower-case version of `Phase`.
171 | 
172 | ```{r message=FALSE}
173 | sce_obj |>
174 |   mutate(Phase_l = tolower(Phase)) |>
175 |   select(.cell, Phase, Phase_l)
176 | ```
177 | 
178 | We can use tidyverse commands to polish an annotation column. We will extract the sample, and group information from the file name column into separate columns.
179 | 
180 | ```{r message=FALSE}
181 | # First take a look at the file column
182 | sce_obj |> select(.cell, file)
183 | ```
184 | 
185 | ```{r}
186 | # Create column for sample
187 | sce_obj <- sce_obj |>
188 |   # Extract sample
189 |   extract(file, "sample", "../data/.*/([a-zA-Z0-9_-]+)/outs.+", remove = FALSE)
190 | 
191 | # Take a look
192 | sce_obj |> select(.cell, sample, everything())
193 | ```
194 | 
195 | We could use tidyverse `unite` to combine columns, for example to create a new column for sample id combining the sample and patient id (BCB) columns.
196 | 
197 | ```{r message=FALSE}
198 | sce_obj <- sce_obj |> unite("sample_id", sample, BCB, remove = FALSE)
199 | 
200 | # Take a look
201 | sce_obj |> select(.cell, sample_id, sample, BCB)
202 | ```
203 | 
204 | 
205 | # Part 2 Signature visualisation
206 | 
207 | ## Data pre-processing
208 | 
209 | The object `sce_obj` we've been using was created as part of a study on breast cancer systemic immune response. Peripheral blood mononuclear cells have been sequenced for RNA at the single-cell level. The steps used to generate the object are summarised below.
210 | 
211 | -   `scran`, `scater`, and `DropletsUtils` packages have been used to eliminate empty droplets and dead cells. Samples were individually quality checked and cells were filtered for good gene coverage.
212 | 
213 | -   Variable features were identified using `modelGeneVar`.
214 | 
215 | -   Read counts were scaled and normalised using logNormCounts from `scuttle`.
216 | 
217 | -   Data integration was performed using `fastMNN` with default parameters.
218 | 
219 | -   PCA performed to reduce feature dimensionality.
220 | 
221 | -   Nearest-neighbor cell networks were calculated using 30 principal components.
222 | 
223 | -   2 UMAP dimensions were calculated using 30 principal components.
224 | 
225 | -   Cells with similar transcriptome profiles were grouped into clusters using Louvain clustering from `scran`.
226 | 
227 | ## Analyse custom signature
228 | 
229 | The researcher analysing this dataset wanted to identify gamma delta T cells using a gene signature from a published paper [@Pizzolato2019]. We'll show how that can be done here.
230 | 
231 | With tidySingleCellExperiment's `join_features` we can view the counts for genes in the signature as columns joined to our single cell tibble.
232 | 
233 | ```{r}
234 | sce_obj |>
235 |   join_features(c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), shape = "wide")
236 | ```
237 | 
238 | We can use tidyverse `mutate` to create a column containing the signature score. To generate the score, we scale the sum of the 4 genes, CD3D, TRDC, TRGC1, TRGC2, and subtract the scaled sum of the 2 genes, CD8A and CD8B. `mutate` is powerful in enabling us to perform complex arithmetic operations easily.
239 | 
240 | ```{r}
241 | sce_obj |>
242 |     
243 |   join_features(c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), shape = "wide") |>
244 |     
245 |   mutate(
246 |     signature_score =
247 |       scales::rescale(CD3D + TRDC + TRGC1 + TRGC2, to = c(0, 1)) -
248 |         scales::rescale(CD8A + CD8B, to = c(0, 1))
249 |   ) |>
250 |     
251 |   select(.cell, signature_score, everything())
252 | ```
253 | 
254 | The gamma delta T cells could then be visualised by the signature score using Bioconductor's visualisation functions.
255 | 
256 | ```{r}
257 | sce_obj |>
258 |     
259 |   join_features(
260 |     features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), shape = "wide"
261 |   ) |>
262 |     
263 |   mutate(
264 |     signature_score =
265 |       scales::rescale(CD3D + TRDC + TRGC1 + TRGC2, to = c(0, 1)) -
266 |         scales::rescale(CD8A + CD8B, to = c(0, 1))
267 |   ) |>
268 |     
269 |   scater::plotUMAP(colour_by = "signature_score")
270 | ```
271 | 
272 | The cells could also be visualised using the popular and powerful `ggplot2` package, enabling the researcher to use ggplot functions they were familiar with, and to customise the plot with great flexibility.
273 | 
274 | ```{r}
275 | sce_obj |>
276 |     
277 |   join_features(
278 |     features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), shape = "wide"
279 |   ) |>
280 |     
281 |   mutate(
282 |     signature_score =
283 |       scales::rescale(CD3D + TRDC + TRGC1 + TRGC2, to = c(0, 1)) -
284 |         scales::rescale(CD8A + CD8B, to = c(0, 1))
285 |   ) |>
286 |     
287 |   # plot cells with high score last so they're not obscured by other cells
288 |   arrange(signature_score) |>
289 |     
290 |   ggplot(aes(UMAP_1, UMAP_2, color = signature_score)) +
291 |   geom_point() +
292 |   scale_color_distiller(palette = "Spectral") +
293 |   bioc2022tidytranscriptomics::theme_multipanel
294 | ```
295 | 
296 | For exploratory analyses, we can select the gamma delta T cells, the red cluster on the left with high signature score. We'll filter for cells with a signature score > 0.7.
297 | 
298 | ```{r}
299 | sce_obj_gamma_delta <-
300 |     
301 |   sce_obj |>
302 |     
303 |   join_features(
304 |     features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), shape = "wide"
305 |   ) |>
306 |     
307 |   mutate(
308 |     signature_score =
309 |       scales::rescale(CD3D + TRDC + TRGC1 + TRGC2, to = c(0, 1)) -
310 |         scales::rescale(CD8A + CD8B, to = c(0, 1))
311 |   ) |>
312 |     
313 | 	# Proper cluster selection should be used instead (see supplementary material)
314 |   filter(signature_score > 0.7)
315 | ```
316 | 
317 | For comparison, we show the alternative using base R and SingleCellExperiment. Note that the code contains more redundancy and intermediate objects.
318 | 
319 | ```{r eval=FALSE}
320 | counts_positive <-
321 |   assay(sce_obj, "logcounts")[c("CD3D", "TRDC", "TRGC1", "TRGC2"), ] |>
322 |   colSums() |>
323 |   scales::rescale(to = c(0, 1))
324 | 
325 | counts_negative <-
326 |   assay(sce_obj, "logcounts")[c("CD8A", "CD8B"), ] |>
327 |   colSums() |>
328 |   scales::rescale(to = c(0, 1))
329 | 
330 | sce_obj$signature_score <- counts_positive - counts_negative
331 | 
332 | sce_obj_gamma_delta <- sce_obj[, sce_obj$signature_score > 0.7]
333 | ```
334 | 
335 | We can then focus on just these gamma delta T cells and chain Bioconductor and tidyverse commands together to analyse.
336 | 
337 | ```{r warning=FALSE, message=FALSE}
338 | library(batchelor)
339 | library(scater)
340 | 
341 | sce_obj_gamma_delta <-
342 |     
343 |   sce_obj_gamma_delta |>
344 |     
345 |   # Integrate - using batchelor.
346 |   multiBatchNorm(batch = colData(sce_obj_gamma_delta)$sample) |>
347 |   fastMNN(batch = colData(sce_obj_gamma_delta)$sample) |>
348 |     
349 |   # Join metadata removed by fastMNN - using tidyverse
350 |   left_join(as_tibble(sce_obj_gamma_delta)) |>
351 |     
352 |   # Dimension reduction - using scater
353 |   runUMAP(ncomponents = 2, dimred = "corrected")
354 | ```
355 | 
356 | Visualise gamma delta T cells. As we have used rough threshold we are left with only few cells. Proper cluster selection should be used instead (see supplementary material).
357 | 
358 | ```{r}	
359 | sce_obj_gamma_delta |> plotUMAP()	
360 | ```
361 | 
362 | 
363 | It is also possible to visualise the cells as a 3D plot using plotly.
364 | The example data used here only contains a few genes, for the sake of time and size in this demonstration, but below is how you could generate the 3 dimensions needed for 3D plot with a full dataset. 
365 | 
366 | ```{r eval = FALSE}
367 | single_cell_object |>
368 |   RunUMAP(dims = 1:30, n.components = 3L, spread = 0.5, min.dist = 0.01, n.neighbors = 10L)
369 | ```
370 | 
371 | We'll demonstrate creating a 3D plot using some data that has 3 UMAP dimensions. This is a fantastic way to visualise both reduced dimensions and metadata in the same representation. 
372 | 
373 | ```{r umap plot 2, message = FALSE, warning = FALSE}
374 | pbmc <- bioc2022tidytranscriptomics::sce_obj_UMAP3
375 | 
376 | pbmc |>
377 |   plot_ly(
378 |     x = ~`UMAP_1`,
379 |     y = ~`UMAP_2`,
380 |     z = ~`UMAP_3`,
381 |     color = ~cell_type,
382 |     colors = dittoSeq::dittoColors()
383 |   ) %>%
384 |   add_markers(size = I(1))
385 | ```
386 | 
387 | ## Exercises
388 | 
389 | Using the `sce_obj`:
390 | 
391 | 1. What proportion of all cells are gamma-delta T cells? Use signature_score > 0.7 to identify gamma-delta T cells.
392 | 
393 | 2. There is a cluster of cells characterised by a low RNA output (nCount_RNA < 100). Identify the cell composition (cell_type) of that cluster.
394 | 
395 | # Part 3 Pseudobulk analyses
396 | 
397 | Next we want to identify genes whose transcription is affected by treatment in this dataset, comparing treated and untreated patients. We can do this with pseudobulk analysis. We aggregate cell-wise transcript abundance into pseudobulk samples and can then perform hypothesis testing using the very well established bulk RNA sequencing tools. For example, we can use edgeR in tidybulk to perform differential expression testing. For more details on pseudobulk analysis see [here](https://hbctraining.github.io/scRNA-seq/lessons/pseudobulk_DESeq2_scrnaseq.html).
398 | 
399 | We want to do it for each cell type and the tidy transcriptomics ecosystem makes this very easy. 
400 | 
401 | 
402 | ## Create pseudobulk samples
403 | 
404 | To create pseudobulk samples from the single cell samples, we will use a helper function called `aggregate_cells`, available in this workshop package. This function will combine the single cells into a group for each cell type for each sample.
405 | 
406 | ```{r warning=FALSE, message=FALSE, echo=FALSE}
407 | library(glue)
408 | library(tidyr)
409 | library(purrr)
410 | library(patchwork)
411 | 
412 | # bulk RNA-seq libraries
413 | library(tidybulk)
414 | library(tidySummarizedExperiment)
415 | ```
416 | 
417 | ```{r}
418 | pseudo_bulk <-
419 |   sce_obj |>
420 |   bioc2022tidytranscriptomics::aggregate_cells(c(sample, cell_type), assays = "counts")
421 | 
422 | pseudo_bulk
423 | ```
424 | 
425 | ## Tidybulk and tidySummarizedExperiment
426 | 
427 | With `tidySummarizedExperiment` and `tidybulk` it is easy to split the data into groups and perform analyses on each without needing to create separate objects.
428 | 
429 | ```{r, echo=FALSE, out.width = "800px"}
430 | knitr::include_graphics("../man/figures/new_SE_usage-01.png")
431 | ```
432 | 
433 | We use tidyverse `nest` to group the data. The command below will create a tibble containing a column with a SummarizedExperiment object for each cell type. `nest` is similar to tidyverse `group_by`, except with `nest` each group is stored in a single row, and can be a complex object such as a plot or SummarizedExperiment.
434 | 
435 | ```{r}
436 | pseudo_bulk_nested <- 
437 |   pseudo_bulk |>
438 |   nest(grouped_summarized_experiment = -cell_type)
439 | 
440 | pseudo_bulk_nested
441 | ```
442 | 
443 | To explore the grouping, we can use tidyverse `slice` to choose a row (cell_type) and `pull` to extract the values from a column. If we pull the data column we can view the SummarizedExperiment object. 
444 | 
445 | ```{r}
446 | pseudo_bulk_nested |>
447 |   slice(1) |>
448 |   pull(grouped_summarized_experiment)
449 | ```
450 | 
451 | We can then identify differentially expressed genes for each cell type for our condition of interest, treated versus untreated patients. We use tidyverse `map` to apply differential expression functions to each cell type group in the nested data. The result columns will be added to the SummarizedExperiment objects.
452 | 
453 | ```{r message=FALSE, warning=FALSE}
454 | # Differential transcription abundance
455 | pseudo_bulk <-
456 |     
457 |   pseudo_bulk_nested |>
458 |     
459 |   # map accepts a data column (.x) and a function. It applies the function to each element of the column.
460 |   mutate(grouped_summarized_experiment = map(
461 |     grouped_summarized_experiment,
462 |     ~ .x |>
463 |         
464 |       # Removing genes with low expression
465 |       identify_abundant(factor_of_interest = treatment) |>
466 |         
467 |       # Scaling counts for sequencing depth	
468 |       scale_abundance(method="TMMwsp") |>
469 |       
470 |       # Testing for differential expression using edgeR quasi likelihood	
471 |       test_differential_abundance(~treatment, method="edgeR_quasi_likelihood", scaling_method="TMMwsp")
472 |   ))
473 | ```
474 | 
475 | The output is again a tibble containing a SummarizedExperiment object for each cell type.
476 | 
477 | ```{r}
478 | pseudo_bulk
479 | ```
480 | 
481 | If we pull out the SummarizedExperiment object for the first cell type, as before, we can see it now has columns containing the differential expression results (e.g. logFC, PValue).
482 | 
483 | ```{r}
484 | pseudo_bulk |>
485 |   slice(1) |>
486 |   pull(grouped_summarized_experiment)
487 | ```
488 | 
489 | It is useful to create plots for significant genes to visualise the transcriptional abundance in the groups being compared (treated and untreated). We can do this for each cell type without needing to create multiple objects. 
490 | 
491 | ```{r message = FALSE}
492 | pseudo_bulk <-
493 |     
494 |   pseudo_bulk |>
495 |     
496 |   # Filter out significant
497 |   # using a high FDR value as this is toy data
498 |   mutate(grouped_summarized_experiment = map(
499 |   	grouped_summarized_experiment, 
500 |   	~ filter(.x, FDR < 0.5)
501 |   )) |>
502 |     
503 |   # Filter cell types with no differential abundant gene-transcripts
504 |   # map_int is map that returns integer instead of list
505 |   filter(map_int(grouped_summarized_experiment, ~ nrow(.x)) > 0) |>
506 | 
507 |   # Plot significant genes for each cell type
508 |   # map2 is map that accepts 2 input columns (.x, .y) and a function
509 |   mutate(plot = map2(
510 |     grouped_summarized_experiment, cell_type,
511 |     ~ .x |>
512 |       ggplot(aes(treatment, counts_scaled + 1)) +
513 |       geom_boxplot(aes(fill = treatment)) +
514 |       geom_jitter() +
515 |       scale_y_log10() +
516 |       facet_wrap(~.feature) +
517 |       ggtitle(.y)
518 |   ))
519 | ```
520 | 
521 | The output is a nested table with a column containing a plot for each cell type.
522 | 
523 | ```{r}
524 | pseudo_bulk
525 | ```
526 | 
527 | We'll use slice and pull again to have a look at one of the plots.
528 | 
529 | ```{r}
530 | pseudo_bulk |>
531 |   slice(1) |>
532 |   pull(plot)
533 | ```
534 | 
535 | We can extract all plots and plot with `wrap_plots` from the patchwork package. 
536 | 
537 | ```{r, fig.height = 8}
538 | pseudo_bulk |>
539 |   pull(plot) |>
540 |   wrap_plots() &
541 |   bioc2022tidytranscriptomics::theme_multipanel
542 | ```
543 | 
544 | # Feedback
545 | 
546 | Thank you for attending this workshop. We hope it was an informative session for you. We would be grateful if you could help us by taking a few moments to provide your valuable feedback in the short form below. Your feedback will provide us with an opportunity to further improve the workshop. All the results are anonymous. 
547 | 
548 | [Feedback Form Link](https://forms.gle/rVzZo6k8QfYW2jJx6) 
549 | 
550 | **Session Information**
551 | 
552 | ```{r}
553 | sessionInfo()
554 | ```
555 | 
556 | **References**
557 | 


--------------------------------------------------------------------------------