├── .gitignore
├── 10X Datasets UMI per marker.Rmd
├── 10X-Datasets-UMI-per-marker.md
├── 10X-Datasets-UMI-per-marker_files
    └── figure-gfm
    │   └── unnamed-chunk-3-1.png
├── ADT counting methods.Rmd
├── ADT reads in cells vs empty drops.Rmd
├── ADT-counting-methods.md
├── ADT-counting-methods_files
    └── figure-gfm
    │   ├── unnamed-chunk-1-1.png
    │   ├── unnamed-chunk-1-2.png
    │   ├── unnamed-chunk-4-1.png
    │   ├── unnamed-chunk-6-1.png
    │   └── unnamed-chunk-9-1.png
├── ADT-reads-in-cells-vs-empty-drops.md
├── ADT-reads-in-cells-vs-empty-drops_files
    └── figure-gfm
    │   ├── DSBfigure-1.png
    │   ├── alluvial-1.png
    │   ├── barplot-1.png
    │   ├── figure-1.png
    │   ├── plotRatioDSB-1.png
    │   ├── ratio-1.png
    │   ├── unnamed-chunk-2-1.png
    │   └── unnamed-chunk-3-1.png
├── Antibody titration.Rmd
├── Antibody-titration.md
├── Antibody-titration_files
    └── figure-gfm
    │   ├── Figure1-1.png
    │   ├── UMIcountsPerCondition-1.png
    │   ├── UMIinExpressingCells-1.png
    │   ├── plotUMIcountsPerMarker-1.png
    │   ├── suppFig1-1.png
    │   ├── suppFig1-2.png
    │   ├── suppFig1-3.png
    │   ├── suppFig1-4.png
    │   ├── suppFig1-5.png
    │   ├── titrationExamples-1.png
    │   ├── tsnePlots-1.png
    │   └── unnamed-chunk-3-1.png
├── CITE-seq_optimization.Rproj
├── Cell number titration.Rmd
├── Cell-number-titration.md
├── Cell-number-titration_files
    └── figure-gfm
    │   ├── UMIcountsPerCondition-1.png
    │   ├── UMIinExpressingCells-1.png
    │   ├── figure-1.png
    │   ├── plotUMIcountsPerMarker-1.png
    │   ├── suppFig-1.png
    │   ├── suppFig-2.png
    │   ├── suppFig1-1.png
    │   ├── suppFig1-2.png
    │   ├── tsnePlots-1.png
    │   ├── unnamed-chunk-1-1.png
    │   └── unnamed-chunk-2-1.png
├── Demux_Preprocess_Downsample.Rmd
├── Demux_Preprocess_Downsample.md
├── Demux_Preprocess_Downsample_files
    └── figure-gfm
    │   ├── demux-1.png
    │   ├── demux-2.png
    │   ├── downsample-1.png
    │   ├── dsbnorm-1.png
    │   ├── filter-1.png
    │   ├── fineClusters-1.png
    │   ├── fineClusters-2.png
    │   ├── preprocessRNA-1.png
    │   ├── preprocessRNA-2.png
    │   ├── preprocessRNA-3.png
    │   ├── superclustering-1.png
    │   ├── superclustering-2.png
    │   ├── unnamed-chunk-1-1.png
    │   ├── unnamed-chunk-2-1.png
    │   ├── unnamed-chunk-3-1.png
    │   ├── unnamed-chunk-3-2.png
    │   └── unnamed-chunk-5-1.png
├── Load unfiltered data.Rmd
├── Load-unfiltered-data.md
├── Load-unfiltered-data_files
    └── figure-gfm
    │   ├── loadADT-1.png
    │   ├── loadGEX-1.png
    │   ├── loadHTO-1.png
    │   ├── unnamed-chunk-3-1.png
    │   ├── unnamed-chunk-4-1.png
    │   ├── unnamed-chunk-5-1.png
    │   └── unnamed-chunk-5-2.png
├── R
    ├── Utilities.R
    ├── color.R
    ├── feature_rankplot.R
    ├── feature_rankplot_hist.R
    ├── feature_rankplot_hist_custom.R
    └── ggplot_settings.R
├── README.md
├── Sequencing satuation.R
├── Snakemake
    ├── 10X_VDJ
    │   ├── Snakefile
    │   ├── include
    │   │   └── feature-ref.csv
    │   └── runSnakemake.sh
    ├── 10X_v3
    │   ├── Snakefile
    │   ├── include
    │   │   └── feature-ref.csv
    │   └── runSnakemake.sh
    ├── Snakefile
    ├── include
    │   └── feature-ref.csv
    └── runSnakemake.sh
├── Volume and cell number titration.Rmd
├── Volume titration.Rmd
├── Volume-and-cell-number-titration.md
├── Volume-and-cell-number-titration_files
    └── figure-gfm
    │   ├── UMIcountsPerCondition-1.png
    │   ├── UMIinExpressingCells-1.png
    │   ├── figure3-1.png
    │   ├── plotUMIcountsPerMarker-1.png
    │   ├── tsnePlots-1.png
    │   └── unnamed-chunk-1-1.png
├── Volume-titration.md
├── Volume-titration_files
    └── figure-gfm
    │   ├── UMIcountsPerCondition-1.png
    │   ├── UMIinExpressingCells-1.png
    │   ├── figure3-1.png
    │   ├── plotUMIcountsPerMarker-1.png
    │   ├── suppFig1-1.png
    │   ├── suppFig1-2.png
    │   ├── tsnePlots-1.png
    │   ├── unnamed-chunk-1-1.png
    │   └── unnamed-chunk-2-1.png
├── data
    ├── 5P-CITE-seq_Titration.rds
    ├── 5P-CITE-seq_Titration_full.rds
    ├── Supplementary_Table_1.xlsx
    └── markerByClusterStats.tsv
└── figures
    ├── Figure 1.png
    ├── Figure 2.png
    ├── Figure 3.png
    ├── Figure 4.png
    ├── Figure 5 wMule.png
    ├── Figure 5.png
    ├── Figure 6.png
    ├── Supplementary Fig S5.png
    ├── Supplementary Figure 2A.png
    ├── Supplementary Figure 2B.png
    ├── Supplementary Figure 2C.png
    ├── Supplementary Figure 2D.png
    ├── Supplementary Figure 2E.png
    ├── Supplementary Figure 3A.png
    ├── Supplementary Figure 3B.png
    ├── Supplementary Figure 4A.png
    ├── Supplementary Figure 4B.png
    ├── Supplementary Figure S1.png
    ├── Supplementary Figure S5.png
    ├── Supplementary Figure S6.png
    ├── Supplementary Figure S7A.png
    ├── Supplementary Figure S7B.png
    ├── Supplementary Figure S8.png
    ├── review_CD8_protein_rna_correlation.png
    ├── review_protein_rna_correlations.png
    └── review_washing_test.png


/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/10X Datasets UMI per marker.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "CITE-seq optimization - 10X datasets: UMI per marker plot"
  3 | author: "Terkild Brink Buus"
  4 | date: "30/3/2020"
  5 | output: github_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(warning=FALSE, message=FALSE)
 10 | options(stringsAsFactors=FALSE)
 11 | ```
 12 | 
 13 | ## Load utilities
 14 | 
 15 | Including libraries, plotting and color settings and custom utility functions
 16 | 
 17 | ```{r loadLibraries, results='hide', message=FALSE, warning=FALSE}
 18 | set.seed(114)
 19 | require("tidyverse", quietly=T)
 20 | library("Matrix", quietly=T)
 21 | 
 22 | ## Load ggplot theme and defaults
 23 | source("R/ggplot_settings.R")
 24 | 
 25 | ## Load helper functions
 26 | source("R/Utilities.R")
 27 | 
 28 | outdir <- "figures"
 29 | data.10X <- "data/data.10X.datasets.Rdata"
 30 | ```
 31 | 
 32 | ## Load data
 33 | 
 34 | 10X datasets were preprocessed in the [Load unfiltered data vignette](Load-unfiltered-data.md)
 35 | 
 36 | ```{r}
 37 | load(file=data.10X)
 38 | ```
 39 | 
 40 | ## Draw UMI per marker
 41 | 
 42 | These three 10X dataset used the same panel of antibodies at three conditions, 3' V3 chemistry at ~1,000 and ~10,000 cells or 5' at 10,000 cells using TotalSeqB or TotalSeqC antibodies, respectively.
 43 | 
 44 | ```{r, fig.height=3, fig.width=7}
 45 | ## Extract data from list into a combined data.frame
 46 | for(i in seq_along(data.10X.datasets)){
 47 |   dataset <- data.10X.datasets[i]
 48 |   
 49 |   kallisto <- data.10X.datasets.adt.kallisto[[dataset]]
 50 |   cells <- intersect(data.10X.datasets.gex.aboveInf[[dataset]],colnames(kallisto))
 51 |   
 52 |   
 53 |   total <- Matrix::rowSums(kallisto)
 54 |   Cell <- Matrix::rowSums(kallisto[,cells])
 55 |   EmptyDrop <- total-Cell
 56 | 
 57 |   add <- data.frame(Dataset=dataset,Marker=names(Cell),Cell,EmptyDrop)
 58 |   
 59 |   if(i == 1){
 60 |     plotData <- add
 61 |   } else {
 62 |     plotData <- rbind(plotData,add)
 63 |   }
 64 | }
 65 | 
 66 | ## Convert data into "long format" for plotting with ggplot
 67 | plotData <- plotData %>% pivot_longer(c(-Marker, -Dataset))
 68 | 
 69 | ## Rename isotype controls to get shorter names
 70 | plotData$Marker <- gsub("isotype_control_","",plotData$Marker)
 71 | plotData$subset <- factor(as.character(plotData$name), levels=c("EmptyDrop","Cell"))
 72 | plotData$Dataset <- factor(as.character(plotData$Dataset), levels=data.10X.datasets)
 73 | 
 74 | ## Make plot
 75 | data.10X.markerBarplot <- ggplot(plotData, aes(x=Marker, y=value/10^6, fill=subset)) + 
 76 |   geom_bar(stat="identity", position="dodge", color="black", width=0.65) + 
 77 |   scale_y_continuous(expand=c(0,0,0.05,0)) + 
 78 |   scale_fill_manual(values=c("lightgrey","black")) + 
 79 |   labs(y=bquote("ADT UMI counts ("~10^6~")")) + 
 80 |   coord_flip() + 
 81 |   facet_wrap(~Dataset, nrow=1, scales="free_x") + 
 82 |   theme(axis.title.y=element_blank(), 
 83 |         legend.position=c(1,0.98), 
 84 |         legend.justification=c(1,1), 
 85 |         legend.title=element_blank(),
 86 |         legend.background=element_blank())
 87 | 
 88 | ```
 89 | 
 90 | ## Final figure
 91 | 
 92 | ```{r, fig.height=5, fig.width=7}
 93 | ## Include knee_plots from preprocessing in the figure
 94 | data.10X.GEXrank <- cowplot::plot_grid(plotlist=data.10X.datasets.knee_plots, 
 95 |                                        labels=data.10X.datasets, 
 96 |                                        hjust=-0.65, 
 97 |                                        vjust=1.6, 
 98 |                                        label_size=7, 
 99 |                                        nrow=1)
100 | 
101 | p.figure <- cowplot::plot_grid(data.10X.GEXrank, data.10X.markerBarplot, 
102 |                                labels=c("A", "B"), 
103 |                                ncol=1, 
104 |                                rel_heights=c(2,3), 
105 |                                label_size=panel.label_size, 
106 |                                vjust=panel.label_vjust, 
107 |                                hjust=panel.label_hjust)
108 | 
109 | 
110 | png(file=file.path(outdir,"Supplementary Figure S6.png"), 
111 |     width=figure.width.full, 
112 |     height=5, 
113 |     units = figure.unit, 
114 |     res=figure.resolution, 
115 |     antialias=figure.antialias)
116 | 
117 |   p.figure
118 | 
119 | dev.off()
120 | 
121 | p.figure
122 | 
123 | ```
124 | 


--------------------------------------------------------------------------------
/10X-Datasets-UMI-per-marker.md:
--------------------------------------------------------------------------------
  1 | CITE-seq optimization - 10X datasets: UMI per marker plot
  2 | ================
  3 | Terkild Brink Buus
  4 | 30/3/2020
  5 | 
  6 | ## Load utilities
  7 | 
  8 | Including libraries, plotting and color settings and custom utility
  9 | functions
 10 | 
 11 | ``` r
 12 | set.seed(114)
 13 | require("tidyverse", quietly=T)
 14 | library("Matrix", quietly=T)
 15 | 
 16 | ## Load ggplot theme and defaults
 17 | source("R/ggplot_settings.R")
 18 | 
 19 | ## Load helper functions
 20 | source("R/Utilities.R")
 21 | 
 22 | outdir <- "figures"
 23 | data.10X <- "data/data.10X.datasets.Rdata"
 24 | ```
 25 | 
 26 | ## Load data
 27 | 
 28 | 10X datasets were preprocessed in the [Load unfiltered data
 29 | vignette](Load-unfiltered-data.md)
 30 | 
 31 | ``` r
 32 | load(file=data.10X)
 33 | ```
 34 | 
 35 | ## Draw UMI per marker
 36 | 
 37 | These three 10X dataset used the same panel of antibodies at three
 38 | conditions, 3’ V3 chemistry at \~1,000 and \~10,000 cells or 5’ at
 39 | 10,000 cells using TotalSeqB or TotalSeqC antibodies, respectively.
 40 | 
 41 | ``` r
 42 | ## Extract data from list into a combined data.frame
 43 | for(i in seq_along(data.10X.datasets)){
 44 |   dataset <- data.10X.datasets[i]
 45 |   
 46 |   kallisto <- data.10X.datasets.adt.kallisto[[dataset]]
 47 |   cells <- intersect(data.10X.datasets.gex.aboveInf[[dataset]],colnames(kallisto))
 48 |   
 49 |   
 50 |   total <- Matrix::rowSums(kallisto)
 51 |   Cell <- Matrix::rowSums(kallisto[,cells])
 52 |   EmptyDrop <- total-Cell
 53 | 
 54 |   add <- data.frame(Dataset=dataset,Marker=names(Cell),Cell,EmptyDrop)
 55 |   
 56 |   if(i == 1){
 57 |     plotData <- add
 58 |   } else {
 59 |     plotData <- rbind(plotData,add)
 60 |   }
 61 | }
 62 | 
 63 | ## Convert data into "long format" for plotting with ggplot
 64 | plotData <- plotData %>% pivot_longer(c(-Marker, -Dataset))
 65 | 
 66 | ## Rename isotype controls to get shorter names
 67 | plotData$Marker <- gsub("isotype_control_","",plotData$Marker)
 68 | plotData$subset <- factor(as.character(plotData$name), levels=c("EmptyDrop","Cell"))
 69 | plotData$Dataset <- factor(as.character(plotData$Dataset), levels=data.10X.datasets)
 70 | 
 71 | ## Make plot
 72 | data.10X.markerBarplot <- ggplot(plotData, aes(x=Marker, y=value/10^6, fill=subset)) + 
 73 |   geom_bar(stat="identity", position="dodge", color="black", width=0.65) + 
 74 |   scale_y_continuous(expand=c(0,0,0.05,0)) + 
 75 |   scale_fill_manual(values=c("lightgrey","black")) + 
 76 |   labs(y=bquote("ADT UMI counts ("~10^6~")")) + 
 77 |   coord_flip() + 
 78 |   facet_wrap(~Dataset, nrow=1, scales="free_x") + 
 79 |   theme(axis.title.y=element_blank(), 
 80 |         legend.position=c(1,0.98), 
 81 |         legend.justification=c(1,1), 
 82 |         legend.title=element_blank(),
 83 |         legend.background=element_blank())
 84 | ```
 85 | 
 86 | ## Final figure
 87 | 
 88 | ``` r
 89 | ## Include knee_plots from preprocessing in the figure
 90 | data.10X.GEXrank <- cowplot::plot_grid(plotlist=data.10X.datasets.knee_plots, 
 91 |                                        labels=data.10X.datasets, 
 92 |                                        hjust=-0.65, 
 93 |                                        vjust=1.6, 
 94 |                                        label_size=7, 
 95 |                                        nrow=1)
 96 | 
 97 | p.figure <- cowplot::plot_grid(data.10X.GEXrank, data.10X.markerBarplot, 
 98 |                                labels=c("A", "B"), 
 99 |                                ncol=1, 
100 |                                rel_heights=c(2,3), 
101 |                                label_size=panel.label_size, 
102 |                                vjust=panel.label_vjust, 
103 |                                hjust=panel.label_hjust)
104 | 
105 | 
106 | png(file=file.path(outdir,"Supplementary Figure S6.png"), 
107 |     width=figure.width.full, 
108 |     height=5, 
109 |     units = figure.unit, 
110 |     res=figure.resolution, 
111 |     antialias=figure.antialias)
112 | 
113 |   p.figure
114 | 
115 | dev.off()
116 | ```
117 | 
118 |     ## png 
119 |     ##   2
120 | 
121 | ``` r
122 | p.figure
123 | ```
124 | 
125 | ![](10X-Datasets-UMI-per-marker_files/figure-gfm/unnamed-chunk-3-1.png)<!-- -->
126 | 


--------------------------------------------------------------------------------
/10X-Datasets-UMI-per-marker_files/figure-gfm/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/10X-Datasets-UMI-per-marker_files/figure-gfm/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/ADT reads in cells vs empty drops.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "CITE-seq optimization - ADT in cell-containing vs empty drops"
  3 | author: "Terkild Brink Buus"
  4 | date: "30/3/2020"
  5 | output: github_document
  6 | ---
  7 | 
  8 | Background signal in CITE-seq has been proposed to be primarily caused by free-floating antibodies and can be assessed by measuring reads from Non-cell-containing (empty) droplets (Mulé et al. 2020). In this vignette, we compare UMI counts from cell-containing vs. empty drops
  9 | 
 10 | ```{r setup, include=FALSE}
 11 | knitr::opts_chunk$set(warning=FALSE, message=FALSE)
 12 | options(stringsAsFactors=FALSE)
 13 | ```
 14 | 
 15 | ## Load utilities
 16 | 
 17 | Including libraries, plotting and color settings and custom utility functions
 18 | 
 19 | ```{r loadLibraries, results='hide', message=FALSE, warning=FALSE}
 20 | set.seed(114)
 21 | require("Seurat", quietly=T)
 22 | require("tidyverse", quietly=T)
 23 | library("Matrix", quietly=T)
 24 | 
 25 | ## Load ggplot theme and defaults
 26 | source("R/ggplot_settings.R")
 27 | 
 28 | ## Load helper functions
 29 | source("R/Utilities.R")
 30 | 
 31 | ## Load color schemes
 32 | source("R/color.R")
 33 | 
 34 | outdir <- "figures"
 35 | data.drive <- "F:/"
 36 | data.abpanel <- "data/Supplementary_Table_1.xlsx"
 37 | data.markerStats <- "data/markerByClusterStats.tsv"
 38 | data.Seurat <- "data/5P-CITE-seq_Titration.rds"
 39 | 
 40 | show_tsne_markers <- c("CD4", "CD19", "CD86", "CD279", "TCRgd")
 41 | 
 42 | ## Make a custom function for formatting the concentration scale
 43 | scaleFUNformat <- function(x) sprintf("%.2f", x)
 44 | ```
 45 | 
 46 | ## Load the data
 47 | 
 48 | The ADT UMI count data has already been loaded and filtered in the "ADT counting methods" vignette. We'll load it from there. This includes the kallisto.ADT UMI count matrix as well as a list of barcodes that have been filtered to have gene expression UMI counts above the inflection point in the rank-barcode plot (used for calling cell-containing vs. empty droplets).
 49 | 
 50 | ```{r loadADTdata}
 51 | load("data/data.ADT.Rdata")
 52 | 
 53 | ## ADT UMI counts
 54 | kallisto.ADT[1:5,1:5]
 55 | 
 56 | ## Barcodes for cell-containing droplet 
 57 | head(gex.aboveInf)
 58 | ```
 59 | 
 60 | ## Load antibody panel data
 61 | 
 62 | Antibody panel concentration data is loaded from the supplementary data excel sheet.
 63 | 
 64 | ```{r abdata}
 65 | abpanel <- data.frame(readxl::read_excel(data.abpanel))
 66 | rownames(abpanel) <- abpanel$Marker
 67 | 
 68 | head(abpanel)
 69 | ```
 70 | 
 71 | ## Preprocess data for plotting
 72 | 
 73 | Make sums of ADT UMI counts within cell-containing and empty droplets.
 74 | 
 75 | ```{r preprocess}
 76 | ADT.matrix <- kallisto.ADT
 77 | 
 78 | ## Calculate total UMI count per marker
 79 | markerUMI <- apply(ADT.matrix,1,sum)
 80 | 
 81 | ## Calculate UMI count within cell-containing and empty droplets
 82 | markerUMI.inCell <- apply(ADT.matrix[,gex.aboveInf],1,sum)
 83 | markerUMI.inCell.freq <- markerUMI.inCell/sum(markerUMI.inCell)
 84 | markerUMI.inDrop <- markerUMI-markerUMI.inCell
 85 | markerUMI.inDrop.freq <- markerUMI.inDrop/sum(markerUMI.inDrop)
 86 | 
 87 | ## Make DF to allow combination of the data into a "long" format
 88 | df.inCell <- data.frame(count=markerUMI.inCell, freq=markerUMI.inCell.freq, subset="Cell", marker=names(markerUMI.inCell.freq))
 89 | df.inDrop <- data.frame(count=markerUMI.inDrop, freq=markerUMI.inDrop.freq, subset="EmptyDrop", marker=names(markerUMI.inDrop.freq))
 90 | 
 91 | plotData <- rbind(df.inCell, df.inDrop)
 92 | 
 93 | ## Add "metadata
 94 | plotData$conc <- abpanel[plotData$marker,"conc_µg_per_mL"]
 95 | 
 96 | plotData$subset <- factor(plotData$subset, levels=c("EmptyDrop","Cell"))
 97 | 
 98 | ## Order markers according to antibody concentration and UMI frequency within empty droplets (by setting levels)
 99 | plotData$marker <- factor(plotData$marker, 
100 |                           levels=plotData$marker[order(plotData$conc[plotData$subset=="EmptyDrop"], 
101 |                                                        plotData$freq[plotData$subset=="EmptyDrop"])])
102 | 
103 | head(plotData)
104 | ```
105 | 
106 | ## Draw cell-containing to empty droplet frequency ratio plot
107 | 
108 | ```{r ratio, fig.width=2, fig.height=5}
109 | data.ratio <- data.frame(ratio=markerUMI.inCell.freq/markerUMI.inDrop.freq) %>% mutate(Marker=rownames(.), conc=abpanel[rownames(.),"conc_µg_per_mL"]) %>% arrange(conc, ratio)
110 | 
111 | data.ratio$Marker <- factor(data.ratio$Marker, levels=data.ratio$Marker)
112 | 
113 | p.ratio <- ggplot(data.ratio, aes(x=Marker, y=log2(ratio))) + 
114 |   geom_rect(aes(xmin=-Inf,xmax=Inf,ymin=-1,ymax=-1.25,fill=conc), col="black") + 
115 |   scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) + 
116 |   ggnewscale::new_scale_fill() + 
117 |   geom_bar(stat="identity", aes(fill=log2(ratio)>0), color="black", width=0.4) +
118 |   geom_hline(yintercept=0) + 
119 |   scale_fill_manual(values=c(`FALSE`="lightgrey",`TRUE`="black")) + 
120 |   scale_x_discrete(expand=c(0, 0.5)) + 
121 |   scale_y_continuous(expand=c(0,0.05,0,0.05)) + 
122 |   coord_flip() + 
123 |   facet_grid(rows="conc", scales="free_y", space="free_y") + 
124 |   labs(title="Cell:Empty ratio", y="log2(Cells:Empty ratio)", fill="µg/mL") + 
125 |   theme(plot.title=element_text(size=7, face="bold", hjust=0.5), 
126 |         panel.spacing=unit(0.5,"mm"),
127 |         axis.line=element_line(), 
128 |         axis.title.y=element_blank(), 
129 |         strip.placement="outside", 
130 |         strip.text=element_blank(), 
131 |         panel.border=element_rect(color=alpha("black",0.25)),
132 |         legend.position="none", 
133 |         legend.justification=c(0,1),
134 |         legend.direction="horizontal",
135 |         legend.text.align=0, 
136 |         legend.key.width=unit(0.3,"cm"), 
137 |         legend.key.height=unit(0.4,"cm"), 
138 |         legend.text=element_text(size=unit(5,"pt")))
139 | 
140 | p.ratio
141 | ```
142 | 
143 | ## Draw barplot of UMI counts in cell-containing and empty-droplets
144 | 
145 | ```{r barplot, fig.height=5, fig.width=3}
146 | plotData$marker <- factor(as.character(plotData$marker), levels=levels(data.ratio$Marker))
147 | 
148 | p.barplot <- ggplot(plotData, aes(x=marker, y=count/10^6)) + 
149 |   geom_rect(aes(xmin=-Inf,xmax=Inf,ymin=-0.050000,ymax=-0.010000,fill=conc), col="black") + 
150 |   scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) + 
151 |   ggnewscale::new_scale_fill() + 
152 |   geom_bar(aes(fill=subset),stat="identity", position="dodge", color="black", width=0.65) + 
153 |   geom_hline(yintercept=0, col="black") + 
154 |   scale_fill_manual(values=c("Cell"="black","EmptyDrop"="lightgrey")) + 
155 |   scale_x_discrete(expand=c(0, 0.5)) + 
156 |   scale_y_continuous(expand=c(0,0,0,0.05)) + 
157 |   coord_flip() +
158 |   facet_grid(rows="conc", scales="free_y", space="free_y") +
159 |   guides(fill=guide_legend(reverse=TRUE)) + 
160 |   labs(title="UMI counts", y=bquote("UMI count ("~10^6~")"), fill="Compartment") + 
161 |   theme(plot.title=element_text(size=7, face="bold", hjust=0.5),
162 |         panel.border=element_blank(), 
163 |         panel.grid.major.y=element_blank(), 
164 |         panel.spacing=unit(0.5,"mm"),
165 |         axis.line=element_line(), 
166 |         axis.title.y=element_blank(),
167 |         #axis.text.y=element_blank(), 
168 |         strip.placement="outside", 
169 |         strip.text=element_blank(), 
170 |         legend.position=c(1,1), 
171 |         legend.justification=c(1,1),
172 |         legend.text.align=0, 
173 |         legend.key.width=unit(0.3,"cm"), 
174 |         legend.key.height=unit(0.4,"cm"), 
175 |         legend.text=element_text(size=unit(5,"pt")))
176 | 
177 | p.barplot
178 | ```
179 | 
180 | # Highlight markers
181 | 
182 | Determine which markers should be highlighted due to their differences between cell-containing and empty droplets. 
183 | 
184 | ```{r highlight}
185 | freq.threshold <- 0.05
186 | 
187 | plotData$highlight <- ifelse(plotData$marker %in% plotData$marker[plotData$freq >= freq.threshold],1,0)
188 | 
189 | ## Determine which compartment has the highest frequency for the markers above the threshold and assign the labels accordingly
190 | max.label <- plotData[plotData$freq >= freq.threshold,] %>% group_by(marker) %>% summarize(subset.max=subset[which.max(freq)])
191 | 
192 | plotData$label <- ifelse((paste(plotData$marker,plotData$subset) %in% 
193 |                             paste(max.label$marker,max.label$subset.max))==FALSE | 
194 |                            plotData$freq < freq.threshold, 
195 |                          NA,as.character(plotData$marker))
196 | ```
197 | 
198 | ## Make alluvial "river" plot of markers in each compartment
199 | 
200 | To allow labelling the markers, we need to calculate the cummulativeFrequency.
201 | 
202 | ```{r alluvial, fig.height=5, fig.width=1.3}
203 | ## Order the dataframe
204 | plotData$marker.conc <- factor(as.character(plotData$marker), levels=unique(plotData$marker[order(-plotData$conc, plotData$marker, decreasing=TRUE)]))
205 | plotData <- plotData[order(plotData$marker.conc, decreasing=TRUE),]
206 | 
207 | plotData$cummulativeFreq <- 0
208 | plotData$cummulativeFreq[plotData$subset=="EmptyDrop"] <- cumsum(plotData$freq[plotData$subset=="EmptyDrop"])
209 | plotData$cummulativeFreq[plotData$subset=="Cell"] <- cumsum(plotData$freq[plotData$subset=="Cell"])
210 | 
211 | ## A bit of a hack to get the columns in order
212 | #plotData$subset.rev <- factor(as.character(plotData$subset), levels=c("Cell","EmptyDrop"))
213 | 
214 | p.alluvial <- ggplot(plotData, aes(y=freq, x=subset, fill=conc, stratum = marker.conc, alluvium = marker.conc)) + 
215 |   ggalluvial::geom_flow(width = 1/2, color=alpha("black",0.25), alpha=0.75) + 
216 |   ggalluvial::geom_stratum(width = 1/2) +
217 |   geom_text(aes(y=cummulativeFreq-(freq/2),label=label), na.rm=TRUE, vjust=0.5, hjust=0.5,  angle=30, size=1.5, fontface="bold") + 
218 |   scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) + 
219 |   scale_y_continuous(expand=c(0,0)) + 
220 |   scale_x_discrete(expand=c(0,0), limits=rev(levels(plotData$subset))) + 
221 |   labs(title="Frequency", y="UMI frequency", fill="DF1 µg/mL") + 
222 |   theme(plot.title=element_text(size=7, face="bold", hjust=0.5),
223 |         legend.position="none", 
224 |         axis.title.x=element_blank(), 
225 |         panel.grid=element_blank())
226 | 
227 | p.alluvial
228 | ```
229 | 
230 | # Specific signals despite background
231 | 
232 | Despite high background (as assayed by high number of reads in empty droplets), most markers provide specific signal. However, the number of UMIs neede to achieve this signal is much lower in the markers with high signal-to-noise.
233 | 
234 | ```{r}
235 | 
236 | object <- readRDS(file=data.Seurat)
237 | 
238 | ## Show number of cells from each sample
239 | table(object$group)
240 | 
241 | object <- subset(object, subset=volume == "50µl" & dilution == "DF1")
242 | object
243 | 
244 | DefaultAssay(object) <- "ADT.kallisto"
245 | ```
246 | 
247 | ## Show "positive" cutoff according to concentration
248 | 
249 | Another way to show this is to show the number of UMIs required to get above the background threshold (defined in Supplementary Figure S1)
250 | 
251 | ```{r, fig.height=5, fig.width=3}
252 | markerStats <- read.table(data.markerStats)
253 | rownames(markerStats) <- paste(markerStats$marker,markerStats$tissue,sep="_")
254 | 
255 | ## Determine which tissue has highest percentage positive cells and use this to set cutoff.
256 | markerStats.max <- markerStats %>% group_by(marker) %>% filter(pct==max(pct))
257 | 
258 | data.UMI <- GetAssayData(object, assay="ADT.kallisto", slot="counts")
259 | data.meta <- FetchData(object, vars=c("tissue"))
260 | 
261 | marker.data <- as.data.frame(data.UMI) %>% 
262 |   mutate(marker=rownames(.)) %>% 
263 |   pivot_longer(-marker) %>% 
264 |   group_by(marker, tissue=data.meta[name,"tissue"]) %>% 
265 |   summarize(pos.cutoff=quantile(value, probs=(1-min(0.95,(markerStats[paste(marker[1],tissue[1],sep="_"),"pct"]+20)/100)))) %>% left_join(markerStats)
266 | 
267 | marker.data$marker <- factor(as.character(marker.data$marker), levels=levels(data.ratio$Marker))
268 | 
269 | p.UMIcutoff <- ggplot(marker.data, aes(x=marker, y=pos.cutoff, group=tissue, fill=tissue)) + 
270 |   geom_bar(position="dodge", stat="identity", color="black", width=0.65) + 
271 |   scale_fill_manual(values=color.tissue) + 
272 |   scale_x_discrete(expand=c(0, 0.5)) + 
273 |   scale_y_continuous(expand=c(0,0.05,0,0.05)) + 
274 |   coord_flip() + 
275 |   facet_grid(rows="conc_µg_per_mL", scales="free_y", space="free_y") + 
276 |   labs(title="UMI cutoff", y="Above-background cutoff (UMI)", fill="Tissue") + 
277 |   theme(plot.title=element_text(size=7, face="bold", hjust=0.5), 
278 |         panel.border=element_blank(), 
279 |         panel.grid.major.y=element_blank(), 
280 |         panel.spacing=unit(0.5,"mm"),
281 |         axis.line=element_line(), 
282 |         axis.title.y=element_blank(),
283 |         axis.text.y=element_blank(), 
284 |         strip.placement="outside", 
285 |         strip.text=element_blank(), 
286 |         legend.position=c(1,1), 
287 |         legend.justification=c(1,1),
288 |         legend.text.align=0, 
289 |         legend.key.width=unit(0.3,"cm"), 
290 |         legend.key.height=unit(0.4,"cm"), 
291 |         legend.text=element_text(size=unit(5,"pt")))
292 | 
293 | p.UMIcutoff
294 | 
295 | ```
296 | 
297 | Make tSNE plots with raw UMI counts. Use rainbow color scheme to show dynamic range in expression levels.
298 | 
299 | ```{r, fig.height=1.6, fig.width=7}
300 | f.tsne.format <- function(x){
301 |     x + 
302 |     scale_color_gradientn(colours = c("#000033","#3333FF","#3377FF","#33AAFF","#33CC33","orange","red"), 
303 |                           limits=c(0,NA)) + 
304 |     scale_y_continuous(expand=c(0.15,0,0.05,0)) + 
305 |     theme_get() + 
306 |     theme(plot.title=element_text(size=7, face="bold", hjust=0.5),
307 |           plot.background=element_blank(),
308 |           panel.background=element_blank(),
309 |           axis.title=element_blank(),
310 |           axis.text.x=element_blank(),
311 |           axis.text.y=element_blank(),
312 |           legend.key.width=unit(3,"mm"),
313 |           legend.key.height=unit(2,"mm"),
314 |           legend.position=c(1,-0.03),
315 |           legend.justification=c(1,0),
316 |           legend.background=element_blank(),
317 |           legend.direction="horizontal")
318 | }
319 | 
320 | p.tsne <- lapply(FeaturePlot(object, reduction="tsne", sort=TRUE,  combine=FALSE,  
321 |                            features=show_tsne_markers, 
322 |                            slot="counts", 
323 |                            max.cutoff='q90', 
324 |                            pt.size=0.1),
325 |                FUN=f.tsne.format)
326 | 
327 | ## Get common y-axis label
328 | p.tsne[[1]] <- p.tsne[[1]] + theme(axis.title.y=element_text())
329 | # a bit of a hack to get a common x-axis label
330 | p.tsne[[3]] <- p.tsne[[3]] + theme(axis.title.x=element_text(hjust=0.5))
331 | 
332 | p.UMI.tsne <- cowplot::plot_grid(plotlist=p.tsne, 
333 |                                  align="h", 
334 |                                  axis="tb", 
335 |                                  nrow=1, 
336 |                                  rel_widths=c(1.07,1,1,1,1),
337 |                                  labels=c("E","","F","","G"), 
338 |                                  label_size=panel.label_size, 
339 |                                  vjust=panel.label_vjust, 
340 |                                  hjust=panel.label_hjust)
341 | 
342 | p.UMI.tsne
343 | ```
344 | 
345 | Make similar plots for all markers
346 | 
347 | ```{r}
348 | markers <- sort(rownames(object[["ADT.kallisto"]])) 
349 | 
350 | p.tsne.all <- lapply(FeaturePlot(object, reduction="tsne", sort=TRUE,  combine=FALSE,  
351 |                            features=markers, 
352 |                            slot="counts", 
353 |                            max.cutoff='q90', 
354 |                            pt.size=0.1),
355 |                FUN=f.tsne.format)
356 | 
357 | names(p.tsne.all) <- markers
358 | 
359 | p.tsne.all <- lapply(markers, function(x) p.tsne.all[[x]] + ggtitle(paste0(x," (",markerStats[paste0(x,"_PBMC"),"conc_µg_per_mL"]," µg/mL)")))
360 | 
361 | plot.columns <- 5
362 | plot.num <- length(p.tsne.all)
363 | plot.rows <- ceiling(plot.num/plot.columns)
364 | plot.rowSplit <- 6
365 | 
366 | ## Reduce margins
367 | p.tsne.all <- lapply(p.tsne.all, function(x) x + 
368 |                        theme(plot.margin=unit(c(0.1,0.1,0.3,0.1),"mm")))
369 | 
370 | ## Get common y-axis label
371 | p.tsne.all[(c(0:(plot.rows-1))*plot.columns+1)] <- lapply(p.tsne.all[(c(0:(plot.rows-1))*plot.columns+1)], function(x) x + theme(axis.title.y=element_text()))
372 | 
373 | ## Show axis label for the center plot of the last row
374 | p.tsne.all[[(plot.columns*plot.rowSplit-floor(plot.columns/2))]] <- p.tsne.all[[(plot.columns*plot.rowSplit-floor(plot.columns/2))]] + theme(axis.title.x=element_text(hjust=0.5))
375 | # a bit of a hack to get a common x-axis label on the last row (hardcoded)
376 | p.tsne.all[[52]] <- p.tsne.all[[52]] + theme(axis.title.x=element_text(hjust=2))
377 | 
378 | p.UMI.tsne.all.1 <- cowplot::plot_grid(plotlist=p.tsne.all[1:(plot.rowSplit*plot.columns)], align="h", axis="tb", ncol=plot.columns, rel_widths=c(1.1,1,1,1,1))
379 | p.UMI.tsne.all.2 <- cowplot::plot_grid(plotlist=p.tsne.all[(plot.rowSplit*plot.columns+1):52], align="h", axis="tb", ncol=plot.columns, rel_widths=c(1.1,1,1,1,1))
380 | 
381 | png(file=file.path(outdir,paste0("Supplementary Figure S7A.png")), 
382 |       units=figure.unit, 
383 |       res=figure.resolution, 
384 |       width=figure.width.full, 
385 |       height=(figure.width.full/plot.columns*plot.rowSplit)*1.1,
386 |       antialias=figure.antialias)
387 | 
388 |   p.UMI.tsne.all.1
389 |   
390 | dev.off()
391 | 
392 | png(file=file.path(outdir,paste0("Supplementary Figure S7B.png")), 
393 |       units=figure.unit, 
394 |       res=figure.resolution, 
395 |       width=figure.width.full, 
396 |       height=(figure.width.full/plot.columns*(plot.rows-plot.rowSplit))*1.1,
397 |       antialias=figure.antialias)
398 | 
399 |   p.UMI.tsne.all.2
400 |   
401 | dev.off()
402 | ```
403 | 
404 | ## Combine figure
405 | 
406 | ```{r figure, fig.height=5.9, fig.width=figure.width.full}
407 | p.row1 <- cowplot::plot_grid(p.barplot + theme(plot.margin=unit(c(0.02,0,0,0),"npc")), 
408 |                               p.alluvial, 
409 |                               p.ratio + theme(plot.margin=unit(c(0,0,0,0.05),"npc")), 
410 |                               p.UMIcutoff + theme(plot.margin=unit(c(0,0,0,-0.007),"npc")), 
411 |                               nrow=1, 
412 |                               rel_widths=c(1.75,0.75,1.2,1.3), 
413 |                               align="h", 
414 |                               axis="tb", 
415 |                               labels=c("A", "B", "C", "D"), 
416 |                               label_size=panel.label_size, 
417 |                               vjust=panel.label_vjust, 
418 |                               hjust=panel.label_hjust)
419 | 
420 | p.final <- cowplot::plot_grid(p.row1, p.UMI.tsne, 
421 |                               ncol=1, 
422 |                               rel_heights=c(3,1.05))
423 | 
424 | p.final
425 | 
426 | png(file=file.path(outdir,"Figure 5.png"), width=figure.width.full, height=5.9, units=figure.unit, res=figure.resolution, antialias=figure.antialias)
427 | p.final
428 | dev.off()
429 | ```


--------------------------------------------------------------------------------
/ADT-counting-methods_files/figure-gfm/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-counting-methods_files/figure-gfm/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/ADT-counting-methods_files/figure-gfm/unnamed-chunk-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-counting-methods_files/figure-gfm/unnamed-chunk-1-2.png


--------------------------------------------------------------------------------
/ADT-counting-methods_files/figure-gfm/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-counting-methods_files/figure-gfm/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/ADT-counting-methods_files/figure-gfm/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-counting-methods_files/figure-gfm/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/ADT-counting-methods_files/figure-gfm/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-counting-methods_files/figure-gfm/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/DSBfigure-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/DSBfigure-1.png


--------------------------------------------------------------------------------
/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/alluvial-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/alluvial-1.png


--------------------------------------------------------------------------------
/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/barplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/barplot-1.png


--------------------------------------------------------------------------------
/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/figure-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/figure-1.png


--------------------------------------------------------------------------------
/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/plotRatioDSB-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/plotRatioDSB-1.png


--------------------------------------------------------------------------------
/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/ratio-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/ratio-1.png


--------------------------------------------------------------------------------
/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/Antibody-titration_files/figure-gfm/Figure1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/Figure1-1.png


--------------------------------------------------------------------------------
/Antibody-titration_files/figure-gfm/UMIcountsPerCondition-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/UMIcountsPerCondition-1.png


--------------------------------------------------------------------------------
/Antibody-titration_files/figure-gfm/UMIinExpressingCells-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/UMIinExpressingCells-1.png


--------------------------------------------------------------------------------
/Antibody-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png


--------------------------------------------------------------------------------
/Antibody-titration_files/figure-gfm/suppFig1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/suppFig1-1.png


--------------------------------------------------------------------------------
/Antibody-titration_files/figure-gfm/suppFig1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/suppFig1-2.png


--------------------------------------------------------------------------------
/Antibody-titration_files/figure-gfm/suppFig1-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/suppFig1-3.png


--------------------------------------------------------------------------------
/Antibody-titration_files/figure-gfm/suppFig1-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/suppFig1-4.png


--------------------------------------------------------------------------------
/Antibody-titration_files/figure-gfm/suppFig1-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/suppFig1-5.png


--------------------------------------------------------------------------------
/Antibody-titration_files/figure-gfm/titrationExamples-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/titrationExamples-1.png


--------------------------------------------------------------------------------
/Antibody-titration_files/figure-gfm/tsnePlots-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/tsnePlots-1.png


--------------------------------------------------------------------------------
/Antibody-titration_files/figure-gfm/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/CITE-seq_optimization.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/Cell number titration.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "CITE-seq optimization - Reducing cell number at staining"
  3 | author: "Terkild Brink Buus"
  4 | date: "30/3/2020"
  5 | output: github_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(warning=FALSE, message=FALSE)
 10 | options(stringsAsFactors=FALSE)
 11 | ```
 12 | 
 13 | ## Load utilities
 14 | 
 15 | Including libraries, plotting and color settings and custom utility functions
 16 | 
 17 | ```{r loadLibraries, results='hide', message=FALSE, warning=FALSE}
 18 | set.seed(114)
 19 | require("Seurat", quietly=T)
 20 | require("tidyverse", quietly=T)
 21 | library("Matrix", quietly=T)
 22 | library("patchwork", quietly=T)
 23 | 
 24 | ## Load ggplot theme and defaults
 25 | source("R/ggplot_settings.R")
 26 | 
 27 | ## Load helper functions
 28 | source("R/Utilities.R")
 29 | 
 30 | ## Load predefined color schemes
 31 | source("R/color.R")
 32 | 
 33 | ## Load feature_rankplot functions
 34 | source("R/feature_rankplot.R")
 35 | source("R/feature_rankplot_hist.R")
 36 | source("R/feature_rankplot_hist_custom.R")
 37 | 
 38 | outdir <- "figures"
 39 | data.Seurat <- "data/5P-CITE-seq_Titration.rds"
 40 | data.abpanel <- "data/Supplementary_Table_1.xlsx"
 41 | data.markerStats <- "data/markerByClusterStats.tsv"
 42 | 
 43 | ## Make a custom function for formatting the concentration scale
 44 | scaleFUNformat <- function(x) sprintf("%.2f", x)
 45 | ```
 46 | 
 47 | ## Load Seurat object
 48 | 
 49 | Subset to only focus on conditions with 1 mio cells and dilution factor 4 (thus comparing 50µl to 25µl staining volume in PBMCs).
 50 | 
 51 | ```{r loadSeurat}
 52 | object <- readRDS(file=data.Seurat)
 53 | 
 54 | ## Show number of cells from each sample
 55 | table(object$group)
 56 | 
 57 | object <- subset(object, subset=volume == "25µl")
 58 | object
 59 | ```
 60 | 
 61 | ## Load Ab panel annotation and concentrations
 62 | 
 63 | Marker stats is reused in other comparisons and was calculated in the end of the preprocessing vignette.
 64 | 
 65 | ```{r loadABPanel}
 66 | abpanel <- data.frame(readxl::read_excel(data.abpanel))
 67 | rownames(abpanel) <- abpanel$Marker
 68 | 
 69 | ## As we are only working with dilution factor 4 samples here, we want to show labels accordingly
 70 | # a bit of a hack...
 71 | abpanel$conc_µg_per_mL <- abpanel$conc_µg_per_mL/4
 72 | 
 73 | markerStats <- read.table(data.markerStats)
 74 | markerStats.PBMC <- markerStats[markerStats$tissue == "PBMC",]
 75 | rownames(markerStats) <- paste(markerStats$marker,markerStats$tissue,sep="_")
 76 | 
 77 | ## Make a ordering vector ordering markers per concentration and total UMI count
 78 | marker.order <- markerStats.PBMC$marker[order(markerStats.PBMC$conc_µg_per_mL, markerStats.PBMC$UMItotal, decreasing=TRUE)]
 79 | 
 80 | head(abpanel)
 81 | head(markerStats)
 82 | ```
 83 | 
 84 | ## Cell type and tissue overview
 85 | 
 86 | Make tSNE plots colored by cell type, cluster and tissue of origin.
 87 | 
 88 | ```{r tsnePlots, fig.height=3, fig.width=7}
 89 | p.tsne.cellsAtStaining <- DimPlot(object, group.by="cellsAtStaining", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap(~"cellsAtStaining") + scale_color_manual(values=color.cellsAtStaining)
 90 | 
 91 | p.tsne.cluster <- DimPlot(object, group.by="supercluster", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + scale_color_manual(values=color.supercluster) + facet_wrap(~"Cell types")
 92 | 
 93 | p.tsne.finecluster <- DimPlot(object, label=TRUE, label.size=3, reduction="tsne", group.by="fineCluster", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap(  ~"Clusters") + guides(col=F)
 94 | 
 95 | p.tsne.cluster + p.tsne.finecluster + p.tsne.cellsAtStaining
 96 | ```
 97 | 
 98 | ## Overall ADT counts
 99 | 
100 | Extract UMI data and calculate UMI sum per marker within each condition.
101 | 
102 | ```{r calculateUMIcountsPerMarker}
103 | ## Get the data
104 | ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts"))
105 | ADT.matrix$marker <- rownames(ADT.matrix)
106 | ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"]
107 | ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc))
108 | 
109 | ## Get cell annotations
110 | cell.annotation <- FetchData(object, vars=c("cellsAtStaining"))
111 | 
112 | ## Calculate marker sum from each dilution within both tissues
113 | ADT.matrix.agg <- ADT.matrix %>% group_by(cellsAtStaining=cell.annotation[name,"cellsAtStaining"], marker, conc) %>% summarise(sum=sum(value))
114 | 
115 | ## Order markers by concentration
116 | ADT.matrix.agg$marker.byConc <- factor(ADT.matrix.agg$marker, levels=marker.order)
117 | 
118 | ## Extract marker annotation
119 | ann.markerConc <- abpanel[marker.order,]
120 | ann.markerConc$Marker <- factor(marker.order, levels=marker.order)
121 | 
122 | ADT.matrix.agg.total <- ADT.matrix.agg
123 | ```
124 | 
125 | ## Plot overall ADT counts by conditions
126 | 
127 | Samples stained with diluted Ab panel have reduced ADT counts.
128 | 
129 | ```{r UMIcountsPerCondition, fig.width=2.5, fig.height=2}
130 | 
131 | p.UMIcountsPerCondition <- ggplot(ADT.matrix.agg.total[order(-ADT.matrix.agg$conc, -ADT.matrix.agg$sum),], aes(x=cellsAtStaining, y=sum/10^6, fill=conc)) + 
132 |   geom_bar(stat="identity", col=alpha(col="black",alpha=0.05)) + 
133 |   scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) + 
134 |   scale_y_continuous(expand=c(0,0,0,0.05)) + 
135 |   labs(fill="DF4\nµg/mL", y=bquote("ADT UMI counts ("~10^6~")")) + 
136 |   guides(fill=guide_colourbar(reverse=T)) + 
137 |   theme(panel.grid.major=element_blank(), axis.title.x=element_blank(), panel.border=element_blank(), axis.line = element_line(), legend.position="right")
138 | 
139 | p.UMIcountsPerCondition
140 | ```
141 | 
142 | ## Compare total UMI counts per marker
143 | 
144 | Plot total UMI counts for each marker at the investigated dilution factors (DF1 vs. DF4). To ease readability, we place dashed lines between each concentration.
145 | 
146 | ```{r plotUMIcountsPerMarker, fig.width=4.5, fig.height=5}
147 | ## Calculate "breaks" where concentration change.
148 | lines <- length(marker.order)-cumsum(sapply(split(ann.markerConc$Marker,ann.markerConc$conc_µg_per_mL),length))+0.5
149 | lines <- data.frame(breaks=lines[-length(lines)])
150 | 
151 | ## Make a marker by concentration "heatmap"
152 | p.markerByConc <- ggplot(ann.markerConc, aes(x=1, y=Marker, fill=conc_µg_per_mL)) + 
153 |   geom_tile(col=alpha(col="black",alpha=0.2)) + 
154 |   geom_hline(data=lines,aes(yintercept=breaks), linetype="dashed", alpha=0.5) + 
155 |   scale_fill_viridis_c(trans="log2") + 
156 |   labs(fill="µg/mL") + 
157 |   theme_get() + 
158 |   theme(axis.ticks.x=element_blank(), axis.title = element_blank(), axis.text.x=element_blank(), panel.grid=element_blank(), legend.position="right", plot.margin=unit(c(0.1,0.1,0.1,0.1),"mm")) + scale_x_continuous(expand=c(0,0))
159 |   
160 | ## Make UMI counts per Marker plot
161 | p.UMIcountsPerMarker <- ggplot(ADT.matrix.agg, aes(x=marker.byConc,y=log2(sum))) + 
162 |   geom_line(aes(group=marker), size=1.2, color="#666666") + 
163 |   geom_point(aes(group=cellsAtStaining, fill=cellsAtStaining), pch=21, size=0.7) + 
164 |   geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 
165 |   scale_fill_manual(values=color.cellsAtStaining) + 
166 |   scale_y_continuous(breaks=c(9:17)) + 
167 |   ylab("log2(UMI sum)") + 
168 |   guides(fill=guide_legend(override.aes=list(size=1.5), reverse=TRUE)) + 
169 |   theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="bottom", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm"), legend.title=element_blank()) + 
170 |   coord_flip()
171 | 
172 | ## Combine plot with markerByConc annotation heatmap
173 | plotUMIcountsPerMarker <- p.markerByConc + guides(fill=F) + p.UMIcountsPerMarker + guides(fill=F) + plot_spacer() + guide_area() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect')
174 | 
175 | plotUMIcountsPerMarker
176 | ```
177 | 
178 | ## Compare change in UMI/cell within expressing cluster
179 | 
180 | Using a specific percentile may be prone to outliers in small clusters (i.e. the 90th percentile of a cluster of 30 will be the #3 higest cell making it prone to outliers). We thus set a threshold of the value to only be the 90th percentile if cluster contains more than 100 cells. For smaller clusters, the median is used. Expressing cluster is identified in the "preprocessing" vignette.
181 | 
182 | ```{r UMIinExpressingCells, fig.width=4.5, fig.height=5}
183 | ## Get the data
184 | ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts"))
185 | ADT.matrix$marker <- rownames(ADT.matrix)
186 | ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"]
187 | ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc))
188 | 
189 | ## Get cell annotations
190 | cell.annotation <- FetchData(object, vars=c("cellsAtStaining", "fineCluster"))
191 | 
192 | ## Calculate marker statistics from each dilution within each cluster
193 | ADT.matrix.agg <- ADT.matrix %>% group_by(cellsAtStaining=cell.annotation[name,"cellsAtStaining"], fineCluster=cell.annotation[name,"fineCluster"], marker, conc) %>% summarise(sum=sum(value), median=quantile(value, probs=c(0.9)), nth=nth(value))
194 | ADT.matrix.agg$tissue == "PBMC"
195 | 
196 | ## Use data for the previously determined expressing cluster.
197 | Cluster.max <- markerStats[markerStats$tissue == "PBMC",c("marker","fineCluster")]
198 | Cluster.max$fineCluster <- factor(Cluster.max$fineCluster)
199 | 
200 | ADT.matrix.aggByClusterMax <- Cluster.max %>% left_join(ADT.matrix.agg)
201 | ADT.matrix.aggByClusterMax$marker.byConc <- factor(ADT.matrix.aggByClusterMax$marker, levels=marker.order)
202 | 
203 | p.UMIinExpressingCells <- ggplot(ADT.matrix.aggByClusterMax, aes(x=marker.byConc, y=log2(nth))) + 
204 |   geom_line(aes(group=marker), size=1.2, color="#666666") + 
205 |   geom_point(aes(group=cellsAtStaining, fill=cellsAtStaining), pch=21, size=0.7) + 
206 |   geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 
207 |   geom_text(aes(label=paste0(fineCluster," ")), y=Inf, adj=1, size=1.5) + 
208 |   scale_fill_manual(values=color.cellsAtStaining) + 
209 |   scale_y_continuous(breaks=c(0:11), labels=2^c(0:11), expand=c(0.05,0.5)) + 
210 |   ylab("90th percentile UMI of expressing cluster") + 
211 |   theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="right", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm")) + 
212 |   coord_flip()
213 | 
214 | ## Combine plot with markerByConc annotation heatmap
215 | UMIinExpressingCells <- p.markerByConc + theme(legend.position="none") + p.UMIinExpressingCells + theme(legend.position="none") + plot_spacer() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect')
216 | 
217 | UMIinExpressingCells
218 | ```
219 | 
220 | ## Titration examples
221 | 
222 | Most markers are largely unaffected by reducing staining cellsAtStaining. However, some antibodies used at low concentrations and targeting abundant epitopes are affected, an example of such is CD31:
223 | 
224 | ```{r fig.width=1.4, fig.height=2.3}
225 | ## Make helper function for plotting titration plots
226 | titrationPlot <- function(marker, gate.PBMC=NULL, gate.Lung=NULL, y.axis=FALSE, show.gate=TRUE, legend=FALSE){
227 |   curMarker.name <- marker
228 |   
229 |   ## Get antibody concentration for legends
230 |   curMarker.DF1conc <- abpanel[curMarker.name, "conc_µg_per_mL"]
231 |   if(show.gate==TRUE){
232 |     ## Load gating percentages from manually set DSB thresholds
233 |     gate <- data.frame(gate=markerStats[markerStats$marker == curMarker.name & markerStats$tissue== "PBMC",c("pct")])
234 |     gate$gate <- 1-(gate$gate/100)
235 |     rownames(gate) <- gate$wrap
236 |     ## Allow manual gating
237 |     if(!is.null(gate.PBMC)) gate <- gate.PBMC
238 |   } else {
239 |     gate <- NULL
240 |   }
241 | 
242 |   p <- feature_rankplot_hist_custom(data=object, 
243 |                                     marker=paste0("adt_",curMarker.name),      
244 |                                     group="cellsAtStaining",
245 |                                     barcodeGroup="supercluster",
246 |                                     conc=curMarker.DF1conc, 
247 |                                     legend=legend, 
248 |                                     yaxis.text=y.axis, 
249 |                                     gates=gate,
250 |                                     histogram.colors=color.cellsAtStaining, 
251 |                                     title=curMarker.name)
252 |   
253 |   return(p)
254 | }
255 | 
256 | p.CD31 <- titrationPlot("CD31", legend=TRUE)
257 | 
258 | p.CD31
259 | ```
260 | 
261 | ## tSNE plots
262 | 
263 | Make tSNE plots with raw UMI counts. Use rainbow color scheme to show dynamic range in expression levels.
264 | 
265 | ```{r, fig.height=2, fig.width=7}
266 | show_tsne_markers <- c("CD31","CD44")
267 | f.tsne.format <- function(x){
268 |     x + 
269 |     scale_color_gradientn(colours = c("#000033","#3333FF","#3377FF","#33AAFF","#33CC33","orange","red"), 
270 |                           limits=c(0,NA)) + 
271 |     scale_y_continuous(expand=c(0,0,0.05,0), limits=c(-45.52796,37.94770)) + 
272 |     xlim(c(-40.83170,49.63832)) + 
273 |     theme_get() + 
274 |     theme(plot.title=element_text(size=7, face="bold", hjust=0.5),
275 |           plot.background=element_blank(),
276 |           panel.background=element_blank(),
277 |           axis.title=element_blank(),
278 |           axis.text.x=element_blank(),
279 |           axis.text.y=element_blank(),
280 |           legend.key.width=unit(3,"mm"),
281 |           legend.key.height=unit(2,"mm"),
282 |           legend.position=c(1,-0.03),
283 |           legend.justification=c(1,0),
284 |           legend.background=element_blank(),
285 |           legend.direction="horizontal")
286 | }
287 | 
288 | maximum <- apply(FetchData(object, vars=paste0("adt_",show_tsne_markers), slot="counts"),2,quantile,probs=c(0.95))
289 | 
290 | p.tsne.1 <- f.tsne.format(FeaturePlot(subset(object, subset=cellsAtStaining=="1000k"), reduction="tsne", sort=TRUE,  combine=FALSE, features=paste0("adt_",show_tsne_markers[1]), slot="counts", max.cutoff=maximum[1], pt.size=0.1)[[1]])
291 | p.tsne.2 <- f.tsne.format(FeaturePlot(subset(object, subset=cellsAtStaining=="200k"), reduction="tsne", sort=TRUE,  combine=FALSE, features=paste0("adt_",show_tsne_markers[1]), slot="counts", max.cutoff=maximum[1], pt.size=0.1)[[1]])
292 | p.tsne.3 <- f.tsne.format(FeaturePlot(subset(object, subset=cellsAtStaining=="1000k"), reduction="tsne", sort=TRUE,  combine=FALSE, features=paste0("adt_",show_tsne_markers[2]), slot="counts", max.cutoff=maximum[2], pt.size=0.1)[[1]])
293 | p.tsne.4 <- f.tsne.format(FeaturePlot(subset(object, subset=cellsAtStaining=="200k"), reduction="tsne", sort=TRUE,  combine=FALSE, features=paste0("adt_",show_tsne_markers[2]), slot="counts", max.cutoff=maximum[2], pt.size=0.1)[[1]])
294 | 
295 | p.tsne <- list(p.tsne.1 + ggtitle("1000k"),p.tsne.2 + ggtitle("200k"),p.tsne.3 + ggtitle("1000k"),p.tsne.4 + ggtitle("200k"))
296 | ## Get common y-axis label
297 | p.tsne[[1]] <- p.tsne[[1]] + theme(axis.title.y=element_text())
298 | # a bit of a hack to get a common x-axis label
299 | p.tsne[[2]] <- p.tsne[[2]] + theme(axis.title.x=element_text(hjust=1.2))
300 | 
301 | p.UMI.tsne <- cowplot::plot_grid(plotlist=p.tsne, 
302 |                                  align="h", 
303 |                                  axis="tb", 
304 |                                  nrow=1, 
305 |                                  rel_widths=c(1.05,1,1,1),
306 |                                  labels=c("E",show_tsne_markers[1],"",show_tsne_markers[2]),
307 |                                  label_size=panel.label_size, 
308 |                                  vjust=panel.label_vjust, 
309 |                                  hjust=c(panel.label_hjust,0.5,panel.label_hjust,0.5))
310 | 
311 | p.UMI.tsne
312 | ```
313 | 
314 | ## Final plot
315 | 
316 | ```{r figure, fig.width=7, fig.height=6}
317 | A <- p.UMIcountsPerCondition + theme(legend.key.width=unit(0.3,"cm"), 
318 |                                      legend.key.height=unit(0.4,"cm"), 
319 |                                      legend.text=element_text(size=unit(5,"pt")),
320 |                                      plot.margin=unit(c(0.3,0,0.5,0),"cm"))
321 | 
322 | B1 <- p.markerByConc + theme(text = element_text(size=10), 
323 |                              plot.margin=unit(c(0.3,0,0,0),"cm"),
324 |                              legend.position="none")
325 | B2 <- p.UMIcountsPerMarker + theme(legend.position="none")
326 | C <- p.UMIinExpressingCells + theme(legend.position="none")
327 | 
328 | BC.legend <- cowplot::get_legend(p.UMIcountsPerMarker + 
329 |                                    guides(fill=guide_legend(reverse=FALSE)) + 
330 |                                    theme(legend.position="bottom", 
331 |                                          legend.direction="horizontal", 
332 |                                          legend.background=element_blank(), 
333 |                                          legend.box.background=element_blank(), legend.key=element_blank()))
334 | 
335 | D <- p.CD31 + theme(plot.margin=unit(c(0.5,0,0,0),"cm"))
336 | 
337 | AD <- cowplot::plot_grid(A,D,NULL, 
338 |                          ncol=1, 
339 |                          rel_heights=c(13,17,1.5),
340 |                          labels=c("A","D",""), 
341 |                          label_size=panel.label_size, 
342 |                          vjust=panel.label_vjust, 
343 |                          hjust=panel.label_hjust)
344 | 
345 | BC <- cowplot::plot_grid(B1, B2, C, 
346 |                          nrow=1, 
347 |                          rel_widths=c(2,10,10), 
348 |                          align="h", 
349 |                          axis="tb", 
350 |                          labels=c("B", "", "C"), 
351 |                          label_size=panel.label_size, 
352 |                          vjust=panel.label_vjust, 
353 |                          hjust=panel.label_hjust)
354 | 
355 | p.figure <- cowplot::plot_grid(cowplot::ggdraw(plot_grid(AD, BC, 
356 |                                       nrow=1, 
357 |                                       rel_widths=c(1,4), 
358 |                                       align="v", 
359 |                                       axis="l")) + 
360 |     cowplot::draw_plot(BC.legend,0.27,0.020,0.2,0.00001),
361 |     p.UMI.tsne, rel_heights=c(3,1.35), align="v", axis="lr", ncol=1)
362 | 
363 | png(file=file.path(outdir,"Figure 4.png"), 
364 |     width=figure.width.full, 
365 |     height=6, 
366 |     units = figure.unit, 
367 |     res=figure.resolution, 
368 |     antialias=figure.antialias)
369 | 
370 |   p.figure
371 |   
372 | dev.off()
373 | 
374 | p.figure
375 | ```
376 | 
377 | ## Individual titration plots
378 | 
379 | For supplementary information.
380 | 
381 | ```{r suppFig, fig.width=7, fig.height=10}
382 | plots.columns = 6
383 | rows.max <- 5
384 | 
385 | markers <- abpanel[rownames(object[["ADT.kallisto"]]),]
386 | markers <- markers[order(markers$Category, markers$Marker),]
387 | 
388 | plots <- list()
389 | 
390 | ## Make individual plots for each marker
391 | for(i in 1:nrow(markers)){
392 |   curMarker <- markers[i,]
393 |   curMarker.name <- curMarker$Marker
394 |   y.axis <- ifelse((i-1) %in% c(0,6,12,18,24,30,36,42,48),TRUE,FALSE)
395 |   plots[[curMarker.name]] <- titrationPlot(curMarker.name, y.axis=y.axis)
396 | }
397 | 
398 | # a bit of a hack to make celltype legend
399 | p.legend <- cowplot::get_legend(ggplot(data.frame(supercluster=object$supercluster), 
400 |                                            aes(color=supercluster,x=1,y=1)) + 
401 |   geom_point(shape=15, size=1.5) + 
402 |   scale_color_manual(values=color.supercluster) + 
403 |   theme(legend.title=element_blank(), 
404 |         legend.margin=margin(0,0,0,0), 
405 |         legend.key.size = unit(0.15,"cm"),
406 |         legend.position = c(0.98,1.1), 
407 |         legend.justification=c(1,1), 
408 |         legend.direction="horizontal"))
409 | 
410 | plots.num <- length(plots)
411 | plots.perPage <- plots.columns*rows.max
412 | plots.pages <- ceiling(plots.num/plots.perPage)
413 | 
414 | ## Make a supplementary figure split into pages
415 | for(i in 1:plots.pages){
416 |   start <- (i-1)*plots.perPage+1
417 |   end <- i*plots.perPage
418 |   end <- min(end,plots.num)
419 |   curPlots <- c(start:end)
420 |   plots.rows <- ceiling(length(curPlots)/plots.columns)
421 |   
422 |   curPlots <- cowplot::plot_grid(plotlist=plots[curPlots],ncol=plots.columns, rel_widths=c(1.1,1,1,1,1,1), align="h", axis="tb")
423 |   curPlots.layout <- cowplot::plot_grid(NULL, p.legend, curPlots, vjust=-0.5, hjust=panel.label_hjust, label_size=panel.label_size, ncol=1, rel_heights= c(0.5, 1.3, 70/5*plots.rows))
424 |   
425 |   png(file=file.path(outdir,paste0("Supplementary Figure 4",LETTERS[i],".png")), 
426 |       units=figure.unit, 
427 |       res=figure.resolution, 
428 |       width=figure.width.full, 
429 |       height=(2*plots.rows),
430 |       antialias=figure.antialias)
431 | 
432 |   print(curPlots.layout)
433 |   
434 |   dev.off()
435 |   
436 |   print(curPlots.layout)
437 | }
438 | ```
439 | 


--------------------------------------------------------------------------------
/Cell-number-titration_files/figure-gfm/UMIcountsPerCondition-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/UMIcountsPerCondition-1.png


--------------------------------------------------------------------------------
/Cell-number-titration_files/figure-gfm/UMIinExpressingCells-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/UMIinExpressingCells-1.png


--------------------------------------------------------------------------------
/Cell-number-titration_files/figure-gfm/figure-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/figure-1.png


--------------------------------------------------------------------------------
/Cell-number-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png


--------------------------------------------------------------------------------
/Cell-number-titration_files/figure-gfm/suppFig-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/suppFig-1.png


--------------------------------------------------------------------------------
/Cell-number-titration_files/figure-gfm/suppFig-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/suppFig-2.png


--------------------------------------------------------------------------------
/Cell-number-titration_files/figure-gfm/suppFig1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/suppFig1-1.png


--------------------------------------------------------------------------------
/Cell-number-titration_files/figure-gfm/suppFig1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/suppFig1-2.png


--------------------------------------------------------------------------------
/Cell-number-titration_files/figure-gfm/tsnePlots-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/tsnePlots-1.png


--------------------------------------------------------------------------------
/Cell-number-titration_files/figure-gfm/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/Cell-number-titration_files/figure-gfm/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/demux-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/demux-1.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/demux-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/demux-2.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/downsample-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/downsample-1.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/dsbnorm-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/dsbnorm-1.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/filter-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/filter-1.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/fineClusters-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/fineClusters-1.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/fineClusters-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/fineClusters-2.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/preprocessRNA-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/preprocessRNA-1.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/preprocessRNA-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/preprocessRNA-2.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/preprocessRNA-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/preprocessRNA-3.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/superclustering-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/superclustering-1.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/superclustering-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/superclustering-2.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-3-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-3-2.png


--------------------------------------------------------------------------------
/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/Load unfiltered data.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "CITE-seq optimization - Load unfiltered data"
  3 | author: "Terkild Brink Buus"
  4 | date: "30/3/2020"
  5 | output: github_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(warning=FALSE, message=FALSE)
 10 | options(stringsAsFactors=FALSE)
 11 | ```
 12 | 
 13 | ## Load utilities
 14 | 
 15 | Including libraries, plotting and color settings and custom utility functions
 16 | 
 17 | ```{r loadLibraries, results='hide', message=FALSE, warning=FALSE}
 18 | set.seed(114)
 19 | require("Seurat", quietly=T)
 20 | require("tidyverse", quietly=T)
 21 | library("Matrix", quietly=T)
 22 | library("DropletUtils", quietly=T)
 23 | 
 24 | ## Load ggplot theme and defaults
 25 | source("R/ggplot_settings.R")
 26 | 
 27 | ## Load helper functions
 28 | source("R/Utilities.R")
 29 | 
 30 | read_kallisto_data <- function(file.path){
 31 |   ## Load mtx and transpose it
 32 |   res_mat <- as(t(readMM(file.path(file.path,"cells_x_genes.mtx"))), 'CsparseMatrix') 
 33 |   ## Attach genes
 34 |   rownames(res_mat) <- read.csv(file.path(file.path,"cells_x_genes.genes.txt"), sep = '\t', header = F)[,1]
 35 |   ## Attach barcodes
 36 |   colnames(res_mat) <- read.csv(file.path(file.path,"cells_x_genes.barcodes.txt"), header = F, sep = '\t')[,1]
 37 |   
 38 |   return(res_mat)
 39 | }
 40 | ```
 41 | 
 42 | ## Set file paths
 43 | 
 44 | How the different aligned and counted read outputs from various algorithms were generated using Snakemake and can be seen in the included [Snakefile](Snakefile)
 45 | 
 46 | ```{r}
 47 | data.drive <- "F:/"
 48 | data.project.dir <- "Projects/ECCITE-seq/TotalSeqC_TitrationA"
 49 | outdir <- "figures"
 50 | t2g.file <- file.path(data.drive,data.project.dir,"/kallisto/t2g_cellranger.txt")
 51 | kallistobusDir <- file.path(data.drive,data.project.dir,"kallisto/gex/c1/counts_unfiltered")
 52 | 
 53 | ## ADT data
 54 | kallistobusDirADT <- file.path(data.drive,data.project.dir,"kallisto/features/A1_S5.ADT_15/counts_unfiltered")
 55 | data10XADTDir <- file.path(data.drive,data.project.dir,"cellranger_A1/outs/raw_feature_bc_matrix")
 56 | dataCSCADTDir <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT/umi_count")
 57 | dataCSCADTDir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT/uncorrected_cells/dense_umis.tsv")
 58 | dataCSCADTnocorrectDir <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT_nocorrect/umi_count")
 59 | dataCSCADTnocorrectDir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT_nocorrect/uncorrected_cells/dense_umis.tsv")
 60 | 
 61 | ## HTO data
 62 | kallistobusDirHTO <- file.path(data.drive,data.project.dir,"kallisto/features/H1_S6.HTO_A_13/counts_unfiltered")
 63 | data10XHTODir <- file.path(data.drive,data.project.dir,"cellranger_H1/outs/raw_feature_bc_matrix")
 64 | dataCSCHTODir <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO/umi_count")
 65 | dataCSCHTODir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO/uncorrected_cells/dense_umis.tsv")
 66 | dataCSCHTOnocorrectDir <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO_nocorrect/umi_count")
 67 | dataCSCHTOnocorrectDir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO_nocorrect/uncorrected_cells/dense_umis.tsv")
 68 | 
 69 | ## 10X datasets
 70 | data.10X.dir <- file.path(data.drive,"data","10XDatasets")
 71 | ```
 72 | 
 73 | # TITRATION DATASET
 74 | ## Load GEX data
 75 | 
 76 | From kallisto-bustools output. Modified from <https://github.com/Sarah145/scRNA_pre_process>
 77 | 
 78 | ```{r loadGEX}
 79 | raw_mtx <- read_kallisto_data(kallistobusDir)
 80 | 
 81 | t2g <- unique(read.csv(t2g.file, sep = '\t', header=F)[,2:3]) # load t2g file
 82 | t2g <- data.frame(t2g[,2], row.names = t2g[,1])
 83 | gene_sym <- t2g[as.character(rownames(raw_mtx)),1] # get symbols for gene ids
 84 | 
 85 | ## Which rows have same gene symbol (but different Ensembl gene id)
 86 | gene_sym.duplicated <- which(gene_sym %in% gene_sym[which(duplicated(gene_sym))])
 87 | 
 88 | ## Which genes are have duplicated entries
 89 | gene_sym.duplicated.unique <- unique(gene_sym[gene_sym.duplicated])
 90 | 
 91 | ## Make placeholder matrix for duplicate gene symbols
 92 | raw_mtx_dedup <- Matrix(data=0,nrow=length(gene_sym.duplicated.unique),ncol=ncol(raw_mtx))
 93 | rownames(raw_mtx_dedup) <- gene_sym.duplicated.unique
 94 | colnames(raw_mtx_dedup) <- colnames(raw_mtx)
 95 | 
 96 | ## Combine counts from genes with same gene symbol (but different Ensembl gene id)
 97 | for(i in seq_along(gene_sym.duplicated)){
 98 |   curGene <- gene_sym[gene_sym.duplicated[i]]
 99 |   curRow <- gene_sym.duplicated.unique == curGene
100 |   raw_mtx_dedup[curRow,] <- raw_mtx_dedup[curRow,] + raw_mtx[gene_sym.duplicated[i],]
101 | }
102 | 
103 | ## Merged combined counts duplicate gene symbol with matrix of unique gene symbol counts
104 | raw_mtx <- raw_mtx[-gene_sym.duplicated,]
105 | rownames(raw_mtx) <- gene_sym[-gene_sym.duplicated]
106 | raw_mtx <- rbind(raw_mtx,raw_mtx_dedup)
107 | 
108 | tot_counts <- Matrix::colSums(raw_mtx)
109 | bc_rank <- DropletUtils::barcodeRanks(raw_mtx, lower = 10)
110 | gex.inflection <- S4Vectors::metadata(bc_rank)[["inflection"]]
111 | gex.aboveInf <- names(which(tot_counts > gex.inflection))
112 | 
113 | GEX.knee_plot <- knee_plot_auc(bc_rank)
114 | GEX.knee_plot
115 | 
116 | kallisto.GEX <- raw_mtx
117 | ```
118 | 
119 | ## Load ADT data from titration dataset
120 | 
121 | ```{r loadADT}
122 | ADT.res_mat <- read_kallisto_data(kallistobusDirADT)
123 | 
124 | ADT.tot_counts <- Matrix::colSums(ADT.res_mat)
125 | ADT.bc_rank <- DropletUtils::barcodeRanks(ADT.res_mat, lower = 10)
126 | 
127 | ADT.knee_plot <- knee_plot_highlight(ADT.bc_rank, highlight=gex.aboveInf)
128 | ADT.knee_plot
129 | 
130 | kallisto.ADT <- ADT.res_mat
131 | 
132 | cellranger.ADT <- Read10X(data.dir=data10XADTDir)
133 | 
134 | CSC.ADT <- Read10X(data.dir=dataCSCADTDir, gene.column=1)
135 | CSC.ADT.dense <- read.table(file=dataCSCADTDir.dense)
136 | CSC.ADT <- cbind(CSC.ADT,CSC.ADT.dense)
137 | CSC.ADT <- CSC.ADT[rownames(CSC.ADT) != "unmapped",]
138 | 
139 | CSC.ADT.uncorrected <- Read10X(data.dir=dataCSCADTnocorrectDir, gene.column=1)
140 | CSC.ADT.uncorrected.dense <- read.table(file=dataCSCADTnocorrectDir.dense)
141 | CSC.ADT.uncorrected <- cbind(CSC.ADT.uncorrected,CSC.ADT.uncorrected.dense)
142 | CSC.ADT.uncorrected <- CSC.ADT.uncorrected[rownames(CSC.ADT.uncorrected) != "unmapped",]
143 | 
144 | ```
145 | 
146 | ## Load HTO data from titration dataset
147 | 
148 | ```{r loadHTO}
149 | HTO.res_mat <- read_kallisto_data(kallistobusDirHTO)
150 | 
151 | HTO.tot_counts <- Matrix::colSums(HTO.res_mat)
152 | HTO.bc_rank <- DropletUtils::barcodeRanks(HTO.res_mat, lower = 10)
153 | 
154 | HTO.knee_plot <- knee_plot_highlight(HTO.bc_rank, highlight=gex.aboveInf)
155 | HTO.knee_plot
156 | 
157 | kallisto.HTO <- HTO.res_mat
158 | 
159 | knee_plots_combined <- cowplot::plot_grid(GEX.knee_plot, ADT.knee_plot, HTO.knee_plot, labels=c("mRNA","ADT","HTO"), nrow=1, label_size=panel.label_size-1, vjust=panel.label_vjust, hjust=panel.label_hjust)
160 | 
161 | 
162 | png(file=file.path(outdir,"Supplementary Figure S5.png"), width=figure.width.full, height=2.5, units=figure.unit, res=figure.resolution, antialias=figure.antialias)
163 | knee_plots_combined
164 | dev.off()
165 | 
166 | cellranger.HTO <- Read10X(data.dir=data10XHTODir)
167 | 
168 | CSC.HTO <- Read10X(data.dir=dataCSCHTODir, gene.column=1)
169 | CSC.HTO.dense <- read.table(file=dataCSCHTODir.dense)
170 | CSC.HTO <- cbind(CSC.HTO,CSC.HTO.dense)
171 | CSC.HTO <- CSC.HTO[rownames(CSC.HTO) != "unmapped",]
172 | 
173 | CSC.HTO.uncorrected <- Read10X(data.dir=dataCSCHTOnocorrectDir, gene.column=1)
174 | CSC.HTO.uncorrected.dense <- read.table(file=dataCSCHTOnocorrectDir.dense)
175 | CSC.HTO.uncorrected <- cbind(CSC.HTO.uncorrected,CSC.HTO.uncorrected.dense)
176 | CSC.HTO.uncorrected <- CSC.HTO.uncorrected[rownames(CSC.HTO.uncorrected) != "unmapped",]
177 | 
178 | ```
179 | 
180 | # 10X DATASETS
181 | ## Load GEX data from 10X datasets
182 | 
183 | ```{r}
184 | data.10X.datasets <- c("PBMC_1k_GEXFeature_v3","PBMC_10k_GEXFeature_v3","PBMC_GEXFeatureVDJ_v1")
185 | data.10X.datasets.dir <- file.path(data.10X.dir,data.10X.datasets)
186 | names(data.10X.datasets.dir) <- data.10X.datasets
187 | 
188 | ## Load gene expression data to distinguish cell-containing droplets from empty-droplets
189 | data.10X.datasets.gex.dir <- file.path(data.10X.datasets.dir,"raw_feature_bc_matrix")
190 | names(data.10X.datasets.gex.dir) <- data.10X.datasets
191 | data.10X.datasets.gex <- lapply(data.10X.datasets.gex.dir, function(dir)Read10X(data.dir=dir)$`Gene Expression`)
192 | data.10X.datasets.gex.bc_rank <- lapply(data.10X.datasets.gex,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10))
193 | data.10X.datasets.gex.tot_counts <- lapply(data.10X.datasets.gex,function(raw_mtx)tot_counts <- Matrix::colSums(raw_mtx))
194 | 
195 | ## Get inflection points
196 | data.10X.datasets.gex.inflection <- sapply(data.10X.datasets.gex.bc_rank,function(bc_rank)S4Vectors::metadata(bc_rank)[["inflection"]])
197 | 
198 | ## Extract barcodes above inflection point
199 | data.10X.datasets.gex.aboveInf <- sapply(data.10X.datasets, function(dataset)names(which(data.10X.datasets.gex.tot_counts[[dataset]] > data.10X.datasets.gex.inflection[dataset])))
200 | 
201 | data.10X.datasets.gex.aboveInf.index <- sapply(data.10X.datasets, function(dataset)which(data.10X.datasets.gex.tot_counts[[dataset]] > data.10X.datasets.gex.inflection[dataset]))
202 | 
203 | identical(colnames(data.10X.datasets.gex[[1]])[data.10X.datasets.gex.aboveInf.index[[1]]],data.10X.datasets.gex.aboveInf[[1]])
204 | 
205 | sapply(data.10X.datasets.gex.aboveInf,length)
206 | 
207 | ## Draw knee plots
208 | data.10X.datasets.knee_plots <- lapply(data.10X.datasets.gex.bc_rank,function(x)knee_plot_auc(x)+theme(legend.position="none"))
209 | 
210 | rm(data.10X.datasets.gex)
211 | rm(data.10X.datasets.gex.bc_rank)
212 | rm(data.10X.datasets.gex.tot_counts)
213 | 
214 | ```
215 | 
216 | 
217 | ## Load Kallisto ADT data
218 | 
219 | 10Xv3 chemestry needs to translate feature barcodes to GEX barcodes to be compatible. Traslation matrix can be downloaded here: https://github.com/10XGenomics/cellranger/blob/master/lib/python/cellranger/barcodes/translation/3M-february-2018.txt.gz
220 | 
221 | ```{r}
222 | ## Translate V3 feature barcodes into cell barcodes for using 
223 | translateV3 <- read.table("F:/data/10XDatasets/10xv3_feature_to_gex_barcode_translation.txt", header=FALSE)
224 | translateV3.names <- translateV3[,1]
225 | translateV3 <- translateV3[,2]
226 | names(translateV3) <- translateV3.names
227 | 
228 | data.10X.datasets.adt.kallisto.dir <- sapply(data.10X.datasets.dir,function(datasetDir)dir(path=file.path(datasetDir,"kallisto","features"), pattern="counts_unfiltered", recursive=TRUE, full.names=TRUE, include.dirs=TRUE))
229 | 
230 | data.10X.datasets.adt.kallisto <- lapply(data.10X.datasets.adt.kallisto.dir,function(dir)read_kallisto_data(dir))
231 | 
232 | lapply(data.10X.datasets.adt.kallisto,dim)
233 | 
234 | data.10X.datasets.adt.kallisto[grep("_v3$",data.10X.datasets)] <- lapply(data.10X.datasets.adt.kallisto[grep("_v3$",data.10X.datasets)],function(data){colnames(data) <- translateV3[colnames(data)]; return(data)})
235 | 
236 | data.10X.datasets.adt.kallisto.bc_rank <- lapply(data.10X.datasets.adt.kallisto,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10))
237 | 
238 | adt.kallist.aboveInf <- names(which(Matrix::colSums(data.10X.datasets.adt.kallisto[[1]]) > S4Vectors::metadata(data.10X.datasets.adt.kallisto.bc_rank[[1]])[["inflection"]]))
239 | 
240 | knee_plots.adt.kallisto <- lapply(data.10X.datasets.adt.kallisto.bc_rank,knee_plot)
241 | cowplot::plot_grid(plotlist=knee_plots.adt.kallisto, nrow=1)
242 | ```
243 | 
244 | ## Load CSC ADT data
245 | 
246 | ```{r}
247 | data.10X.datasets.adt.csc.dir <- sapply(data.10X.datasets.dir,function(datasetDir)dir(path=file.path(datasetDir,"cite-seq-count"), pattern="umi_count", recursive=TRUE, full.names=TRUE, include.dirs=TRUE))
248 | 
249 | data.10X.datasets.adt.csc.dense.dir <- sapply(data.10X.datasets.dir,function(datasetDir)list.files(path=file.path(datasetDir,"cite-seq-count"), pattern="dense_umis.tsv", recursive=TRUE, full.names=TRUE, include.dirs=FALSE))
250 | 
251 | data.10X.datasets.adt.csc_nc.dir <- grep("_nocorrect",data.10X.datasets.adt.csc.dir, value=TRUE)
252 | data.10X.datasets.adt.csc_nc.dense.dir <- grep("_nocorrect",data.10X.datasets.adt.csc.dense.dir, value=TRUE)
253 | names(data.10X.datasets.adt.csc_nc.dir) <- data.10X.datasets
254 | names(data.10X.datasets.adt.csc_nc.dense.dir) <- data.10X.datasets
255 | 
256 | data.10X.datasets.adt.csc.dir <- setdiff(data.10X.datasets.adt.csc.dir, data.10X.datasets.adt.csc_nc.dir)
257 | data.10X.datasets.adt.csc.dense.dir <- setdiff(data.10X.datasets.adt.csc.dense.dir, data.10X.datasets.adt.csc_nc.dense.dir)
258 | names(data.10X.datasets.adt.csc.dir) <- data.10X.datasets
259 | names(data.10X.datasets.adt.csc.dense.dir) <- data.10X.datasets
260 | 
261 | data.10X.datasets.adt.csc <- lapply(data.10X.datasets.adt.csc.dir,function(dir)Read10X(data.dir=dir, gene.column=1))
262 | data.10X.datasets.adt.csc.dense <- lapply(data.10X.datasets.adt.csc.dense.dir,function(dir)read.table(file=dir))
263 | data.10X.datasets.adt.csc <- lapply(data.10X.datasets,function(dataset)Matrix::cbind2(data.10X.datasets.adt.csc[[dataset]],Matrix::Matrix(as.matrix(data.10X.datasets.adt.csc.dense[[dataset]]))))
264 | names(data.10X.datasets.adt.csc) <- data.10X.datasets
265 | data.10X.datasets.adt.csc <- lapply(data.10X.datasets.adt.csc,function(data)data[rownames(data) != "unmapped",])
266 | 
267 | data.10X.datasets.adt.csc[grep("_v3$",data.10X.datasets)] <- lapply(data.10X.datasets.adt.csc[grep("_v3$",data.10X.datasets)],function(data){colnames(data) <- translateV3[colnames(data)]; return(data)})
268 | 
269 | data.10X.datasets.adt.csc_nc <- lapply(data.10X.datasets.adt.csc_nc.dir,function(dir)Read10X(data.dir=dir, gene.column=1))
270 | data.10X.datasets.adt.csc_nc.dense <- lapply(data.10X.datasets.adt.csc_nc.dense.dir,function(dir)read.table(file=dir))
271 | data.10X.datasets.adt.csc_nc <- lapply(data.10X.datasets,function(dataset)Matrix::cbind2(data.10X.datasets.adt.csc_nc[[dataset]],Matrix::Matrix(as.matrix(data.10X.datasets.adt.csc_nc.dense[[dataset]]))))
272 | names(data.10X.datasets.adt.csc_nc) <- data.10X.datasets
273 | data.10X.datasets.adt.csc_nc <- lapply(data.10X.datasets.adt.csc_nc,function(data)data[rownames(data) != "unmapped",])
274 | 
275 | data.10X.datasets.adt.csc_nc[grep("_v3$",data.10X.datasets)] <- lapply(data.10X.datasets.adt.csc_nc[grep("_v3$",data.10X.datasets)],function(data){colnames(data) <- translateV3[colnames(data)]; return(data)})
276 | 
277 | data.10X.datasets.adt.csc.bc_rank <- lapply(data.10X.datasets.adt.csc,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10))
278 | 
279 | knee_plots.adt.csc <- lapply(data.10X.datasets.adt.csc.bc_rank,knee_plot)
280 | cowplot::plot_grid(plotlist=knee_plots.adt.csc, nrow=1)
281 | ```
282 | 
283 | 
284 | ## Load CellRanger featureOnly ADT data
285 | 
286 | ```{r}
287 | data.10X.datasets.adt.cellranger.dir <- sapply(data.10X.datasets.dir,function(datasetDir)dir(path=file.path(datasetDir), pattern="raw_feature_bc_matrix", recursive=TRUE, full.names=TRUE, include.dirs=TRUE)[1])
288 | 
289 | data.10X.datasets.adt.cellranger <- lapply(data.10X.datasets.adt.cellranger.dir,function(dir)Read10X(dir))
290 | lapply(data.10X.datasets.adt.cellranger,dim)
291 | 
292 | data.10X.datasets.adt.cellranger.bc_rank <- lapply(data.10X.datasets.adt.cellranger,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10))
293 | 
294 | knee_plots.adt.cellranger <- lapply(data.10X.datasets.adt.cellranger.bc_rank,knee_plot)
295 | cowplot::plot_grid(plotlist=knee_plots.adt.cellranger, nrow=1)
296 | 
297 | cowplot::plot_grid(plotlist=c(knee_plots.adt.cellranger,knee_plots.adt.csc,knee_plots.adt.kallisto), labels=data.10X.datasets, nrow=3)
298 | ```
299 | 
300 | ## Save data
301 | 
302 | ```{r}
303 | 
304 | save(file="data/data.10X.datasets.Rdata",
305 |      data.10X.datasets, 
306 |      data.10X.datasets.adt.kallisto, 
307 |      data.10X.datasets.adt.csc, 
308 |      data.10X.datasets.adt.csc_nc, 
309 |      data.10X.datasets.adt.cellranger, 
310 |      data.10X.datasets.gex.aboveInf,
311 |      data.10X.datasets.knee_plots)
312 | 
313 | save(file="data/data.HTO.Rdata",
314 |      kallisto.HTO,
315 |      cellranger.HTO,
316 |      CSC.HTO, 
317 |      CSC.HTO.uncorrected, 
318 |      gex.aboveInf)
319 | 
320 | save(file="data/data.ADT.Rdata",
321 |      kallisto.ADT,
322 |      cellranger.ADT,
323 |      CSC.ADT, 
324 |      CSC.ADT.uncorrected, 
325 |      gex.aboveInf)
326 | 
327 | ```
328 | 


--------------------------------------------------------------------------------
/Load-unfiltered-data.md:
--------------------------------------------------------------------------------
  1 | CITE-seq optimization - Load unfiltered data
  2 | ================
  3 | Terkild Brink Buus
  4 | 30/3/2020
  5 | 
  6 | ## Load utilities
  7 | 
  8 | Including libraries, plotting and color settings and custom utility
  9 | functions
 10 | 
 11 | ``` r
 12 | set.seed(114)
 13 | require("Seurat", quietly=T)
 14 | require("tidyverse", quietly=T)
 15 | library("Matrix", quietly=T)
 16 | library("DropletUtils", quietly=T)
 17 | 
 18 | ## Load ggplot theme and defaults
 19 | source("R/ggplot_settings.R")
 20 | 
 21 | ## Load helper functions
 22 | source("R/Utilities.R")
 23 | 
 24 | read_kallisto_data <- function(file.path){
 25 |   ## Load mtx and transpose it
 26 |   res_mat <- as(t(readMM(file.path(file.path,"cells_x_genes.mtx"))), 'CsparseMatrix') 
 27 |   ## Attach genes
 28 |   rownames(res_mat) <- read.csv(file.path(file.path,"cells_x_genes.genes.txt"), sep = '\t', header = F)[,1]
 29 |   ## Attach barcodes
 30 |   colnames(res_mat) <- read.csv(file.path(file.path,"cells_x_genes.barcodes.txt"), header = F, sep = '\t')[,1]
 31 |   
 32 |   return(res_mat)
 33 | }
 34 | ```
 35 | 
 36 | ## Set file paths
 37 | 
 38 | How the different aligned and counted read outputs from various
 39 | algorithms were generated using Snakemake and can be seen in the
 40 | included [Snakefile](Snakefile)
 41 | 
 42 | ``` r
 43 | data.drive <- "F:/"
 44 | data.project.dir <- "Projects/ECCITE-seq/TotalSeqC_TitrationA"
 45 | outdir <- "figures"
 46 | t2g.file <- file.path(data.drive,data.project.dir,"/kallisto/t2g_cellranger.txt")
 47 | kallistobusDir <- file.path(data.drive,data.project.dir,"kallisto/gex/c1/counts_unfiltered")
 48 | 
 49 | ## ADT data
 50 | kallistobusDirADT <- file.path(data.drive,data.project.dir,"kallisto/features/A1_S5.ADT_15/counts_unfiltered")
 51 | data10XADTDir <- file.path(data.drive,data.project.dir,"cellranger_A1/outs/raw_feature_bc_matrix")
 52 | dataCSCADTDir <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT/umi_count")
 53 | dataCSCADTDir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT/uncorrected_cells/dense_umis.tsv")
 54 | dataCSCADTnocorrectDir <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT_nocorrect/umi_count")
 55 | dataCSCADTnocorrectDir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT_nocorrect/uncorrected_cells/dense_umis.tsv")
 56 | 
 57 | ## HTO data
 58 | kallistobusDirHTO <- file.path(data.drive,data.project.dir,"kallisto/features/H1_S6.HTO_A_13/counts_unfiltered")
 59 | data10XHTODir <- file.path(data.drive,data.project.dir,"cellranger_H1/outs/raw_feature_bc_matrix")
 60 | dataCSCHTODir <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO/umi_count")
 61 | dataCSCHTODir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO/uncorrected_cells/dense_umis.tsv")
 62 | dataCSCHTOnocorrectDir <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO_nocorrect/umi_count")
 63 | dataCSCHTOnocorrectDir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO_nocorrect/uncorrected_cells/dense_umis.tsv")
 64 | 
 65 | ## 10X datasets
 66 | data.10X.dir <- file.path(data.drive,"data","10XDatasets")
 67 | ```
 68 | 
 69 | # TITRATION DATASET
 70 | 
 71 | ## Load GEX data
 72 | 
 73 | From kallisto-bustools output. Modified from
 74 | <https://github.com/Sarah145/scRNA_pre_process>
 75 | 
 76 | ``` r
 77 | raw_mtx <- read_kallisto_data(kallistobusDir)
 78 | 
 79 | t2g <- unique(read.csv(t2g.file, sep = '\t', header=F)[,2:3]) # load t2g file
 80 | t2g <- data.frame(t2g[,2], row.names = t2g[,1])
 81 | gene_sym <- t2g[as.character(rownames(raw_mtx)),1] # get symbols for gene ids
 82 | 
 83 | ## Which rows have same gene symbol (but different Ensembl gene id)
 84 | gene_sym.duplicated <- which(gene_sym %in% gene_sym[which(duplicated(gene_sym))])
 85 | 
 86 | ## Which genes are have duplicated entries
 87 | gene_sym.duplicated.unique <- unique(gene_sym[gene_sym.duplicated])
 88 | 
 89 | ## Make placeholder matrix for duplicate gene symbols
 90 | raw_mtx_dedup <- Matrix(data=0,nrow=length(gene_sym.duplicated.unique),ncol=ncol(raw_mtx))
 91 | rownames(raw_mtx_dedup) <- gene_sym.duplicated.unique
 92 | colnames(raw_mtx_dedup) <- colnames(raw_mtx)
 93 | 
 94 | ## Combine counts from genes with same gene symbol (but different Ensembl gene id)
 95 | for(i in seq_along(gene_sym.duplicated)){
 96 |   curGene <- gene_sym[gene_sym.duplicated[i]]
 97 |   curRow <- gene_sym.duplicated.unique == curGene
 98 |   raw_mtx_dedup[curRow,] <- raw_mtx_dedup[curRow,] + raw_mtx[gene_sym.duplicated[i],]
 99 | }
100 | 
101 | ## Merged combined counts duplicate gene symbol with matrix of unique gene symbol counts
102 | raw_mtx <- raw_mtx[-gene_sym.duplicated,]
103 | rownames(raw_mtx) <- gene_sym[-gene_sym.duplicated]
104 | raw_mtx <- rbind(raw_mtx,raw_mtx_dedup)
105 | 
106 | tot_counts <- Matrix::colSums(raw_mtx)
107 | bc_rank <- DropletUtils::barcodeRanks(raw_mtx, lower = 10)
108 | gex.inflection <- S4Vectors::metadata(bc_rank)[["inflection"]]
109 | gex.aboveInf <- names(which(tot_counts > gex.inflection))
110 | 
111 | GEX.knee_plot <- knee_plot_auc(bc_rank)
112 | GEX.knee_plot
113 | ```
114 | 
115 | ![](Load-unfiltered-data_files/figure-gfm/loadGEX-1.png)<!-- -->
116 | 
117 | ``` r
118 | kallisto.GEX <- raw_mtx
119 | ```
120 | 
121 | ## Load ADT data from titration dataset
122 | 
123 | ``` r
124 | ADT.res_mat <- read_kallisto_data(kallistobusDirADT)
125 | 
126 | ADT.tot_counts <- Matrix::colSums(ADT.res_mat)
127 | ADT.bc_rank <- DropletUtils::barcodeRanks(ADT.res_mat, lower = 10)
128 | 
129 | ADT.knee_plot <- knee_plot_highlight(ADT.bc_rank, highlight=gex.aboveInf)
130 | ADT.knee_plot
131 | ```
132 | 
133 | ![](Load-unfiltered-data_files/figure-gfm/loadADT-1.png)<!-- -->
134 | 
135 | ``` r
136 | kallisto.ADT <- ADT.res_mat
137 | 
138 | cellranger.ADT <- Read10X(data.dir=data10XADTDir)
139 | 
140 | CSC.ADT <- Read10X(data.dir=dataCSCADTDir, gene.column=1)
141 | CSC.ADT.dense <- read.table(file=dataCSCADTDir.dense)
142 | CSC.ADT <- cbind(CSC.ADT,CSC.ADT.dense)
143 | CSC.ADT <- CSC.ADT[rownames(CSC.ADT) != "unmapped",]
144 | 
145 | CSC.ADT.uncorrected <- Read10X(data.dir=dataCSCADTnocorrectDir, gene.column=1)
146 | CSC.ADT.uncorrected.dense <- read.table(file=dataCSCADTnocorrectDir.dense)
147 | CSC.ADT.uncorrected <- cbind(CSC.ADT.uncorrected,CSC.ADT.uncorrected.dense)
148 | CSC.ADT.uncorrected <- CSC.ADT.uncorrected[rownames(CSC.ADT.uncorrected) != "unmapped",]
149 | ```
150 | 
151 | ## Load HTO data from titration dataset
152 | 
153 | ``` r
154 | HTO.res_mat <- read_kallisto_data(kallistobusDirHTO)
155 | 
156 | HTO.tot_counts <- Matrix::colSums(HTO.res_mat)
157 | HTO.bc_rank <- DropletUtils::barcodeRanks(HTO.res_mat, lower = 10)
158 | 
159 | HTO.knee_plot <- knee_plot_highlight(HTO.bc_rank, highlight=gex.aboveInf)
160 | HTO.knee_plot
161 | ```
162 | 
163 | ![](Load-unfiltered-data_files/figure-gfm/loadHTO-1.png)<!-- -->
164 | 
165 | ``` r
166 | kallisto.HTO <- HTO.res_mat
167 | 
168 | knee_plots_combined <- cowplot::plot_grid(GEX.knee_plot, ADT.knee_plot, HTO.knee_plot, labels=c("mRNA","ADT","HTO"), nrow=1, label_size=panel.label_size-1, vjust=panel.label_vjust, hjust=panel.label_hjust)
169 | 
170 | 
171 | png(file=file.path(outdir,"Supplementary Figure S4.png"), width=figure.width.full, height=2.5, units=figure.unit, res=figure.resolution, antialias=figure.antialias)
172 | knee_plots_combined
173 | dev.off()
174 | ```
175 | 
176 |     ## png 
177 |     ##   2
178 | 
179 | ``` r
180 | cellranger.HTO <- Read10X(data.dir=data10XHTODir)
181 | 
182 | CSC.HTO <- Read10X(data.dir=dataCSCHTODir, gene.column=1)
183 | CSC.HTO.dense <- read.table(file=dataCSCHTODir.dense)
184 | CSC.HTO <- cbind(CSC.HTO,CSC.HTO.dense)
185 | CSC.HTO <- CSC.HTO[rownames(CSC.HTO) != "unmapped",]
186 | 
187 | CSC.HTO.uncorrected <- Read10X(data.dir=dataCSCHTOnocorrectDir, gene.column=1)
188 | CSC.HTO.uncorrected.dense <- read.table(file=dataCSCHTOnocorrectDir.dense)
189 | CSC.HTO.uncorrected <- cbind(CSC.HTO.uncorrected,CSC.HTO.uncorrected.dense)
190 | CSC.HTO.uncorrected <- CSC.HTO.uncorrected[rownames(CSC.HTO.uncorrected) != "unmapped",]
191 | ```
192 | 
193 | # 10X DATASETS
194 | 
195 | ## Load GEX data from 10X datasets
196 | 
197 | ``` r
198 | data.10X.datasets <- c("PBMC_1k_GEXFeature_v3","PBMC_10k_GEXFeature_v3","PBMC_GEXFeatureVDJ_v1")
199 | data.10X.datasets.dir <- file.path(data.10X.dir,data.10X.datasets)
200 | names(data.10X.datasets.dir) <- data.10X.datasets
201 | 
202 | ## Load gene expression data to distinguish cell-containing droplets from empty-droplets
203 | data.10X.datasets.gex.dir <- file.path(data.10X.datasets.dir,"raw_feature_bc_matrix")
204 | names(data.10X.datasets.gex.dir) <- data.10X.datasets
205 | data.10X.datasets.gex <- lapply(data.10X.datasets.gex.dir, function(dir)Read10X(data.dir=dir)$`Gene Expression`)
206 | data.10X.datasets.gex.bc_rank <- lapply(data.10X.datasets.gex,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10))
207 | data.10X.datasets.gex.tot_counts <- lapply(data.10X.datasets.gex,function(raw_mtx)tot_counts <- Matrix::colSums(raw_mtx))
208 | 
209 | ## Get inflection points
210 | data.10X.datasets.gex.inflection <- sapply(data.10X.datasets.gex.bc_rank,function(bc_rank)S4Vectors::metadata(bc_rank)[["inflection"]])
211 | 
212 | ## Extract barcodes above inflection point
213 | data.10X.datasets.gex.aboveInf <- sapply(data.10X.datasets, function(dataset)names(which(data.10X.datasets.gex.tot_counts[[dataset]] > data.10X.datasets.gex.inflection[dataset])))
214 | 
215 | data.10X.datasets.gex.aboveInf.index <- sapply(data.10X.datasets, function(dataset)which(data.10X.datasets.gex.tot_counts[[dataset]] > data.10X.datasets.gex.inflection[dataset]))
216 | 
217 | identical(colnames(data.10X.datasets.gex[[1]])[data.10X.datasets.gex.aboveInf.index[[1]]],data.10X.datasets.gex.aboveInf[[1]])
218 | ```
219 | 
220 |     ## [1] TRUE
221 | 
222 | ``` r
223 | sapply(data.10X.datasets.gex.aboveInf,length)
224 | ```
225 | 
226 |     ##  PBMC_1k_GEXFeature_v3 PBMC_10k_GEXFeature_v3  PBMC_GEXFeatureVDJ_v1 
227 |     ##                    740                   7960                   7627
228 | 
229 | ``` r
230 | ## Draw knee plots
231 | data.10X.datasets.knee_plots <- lapply(data.10X.datasets.gex.bc_rank,function(x)knee_plot_auc(x)+theme(legend.position="none"))
232 | 
233 | rm(data.10X.datasets.gex)
234 | rm(data.10X.datasets.gex.bc_rank)
235 | rm(data.10X.datasets.gex.tot_counts)
236 | ```
237 | 
238 | ## Load Kallisto ADT data
239 | 
240 | 10Xv3 chemestry needs to translate feature barcodes to GEX barcodes to
241 | be compatible. Traslation matrix can be downloaded here:
242 | <https://github.com/10XGenomics/cellranger/blob/master/lib/python/cellranger/barcodes/translation/3M-february-2018.txt.gz>
243 | 
244 | ``` r
245 | ## Translate V3 feature barcodes into cell barcodes for using 
246 | translateV3 <- read.table("F:/data/10XDatasets/10xv3_feature_to_gex_barcode_translation.txt", header=FALSE)
247 | translateV3.names <- translateV3[,1]
248 | translateV3 <- translateV3[,2]
249 | names(translateV3) <- translateV3.names
250 | 
251 | data.10X.datasets.adt.kallisto.dir <- sapply(data.10X.datasets.dir,function(datasetDir)dir(path=file.path(datasetDir,"kallisto","features"), pattern="counts_unfiltered", recursive=TRUE, full.names=TRUE, include.dirs=TRUE))
252 | 
253 | data.10X.datasets.adt.kallisto <- lapply(data.10X.datasets.adt.kallisto.dir,function(dir)read_kallisto_data(dir))
254 | 
255 | lapply(data.10X.datasets.adt.kallisto,dim)
256 | ```
257 | 
258 |     ## $PBMC_1k_GEXFeature_v3
259 |     ## [1]     17 124716
260 |     ## 
261 |     ## $PBMC_10k_GEXFeature_v3
262 |     ## [1]     17 674603
263 |     ## 
264 |     ## $PBMC_GEXFeatureVDJ_v1
265 |     ## [1]     17 734856
266 | 
267 | ``` r
268 | data.10X.datasets.adt.kallisto[grep("_v3$",data.10X.datasets)] <- lapply(data.10X.datasets.adt.kallisto[grep("_v3$",data.10X.datasets)],function(data){colnames(data) <- translateV3[colnames(data)]; return(data)})
269 | 
270 | data.10X.datasets.adt.kallisto.bc_rank <- lapply(data.10X.datasets.adt.kallisto,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10))
271 | 
272 | adt.kallist.aboveInf <- names(which(Matrix::colSums(data.10X.datasets.adt.kallisto[[1]]) > S4Vectors::metadata(data.10X.datasets.adt.kallisto.bc_rank[[1]])[["inflection"]]))
273 | 
274 | knee_plots.adt.kallisto <- lapply(data.10X.datasets.adt.kallisto.bc_rank,knee_plot)
275 | cowplot::plot_grid(plotlist=knee_plots.adt.kallisto, nrow=1)
276 | ```
277 | 
278 | ![](Load-unfiltered-data_files/figure-gfm/unnamed-chunk-3-1.png)<!-- -->
279 | 
280 | ## Load CSC ADT data
281 | 
282 | ``` r
283 | data.10X.datasets.adt.csc.dir <- sapply(data.10X.datasets.dir,function(datasetDir)dir(path=file.path(datasetDir,"cite-seq-count"), pattern="umi_count", recursive=TRUE, full.names=TRUE, include.dirs=TRUE))
284 | 
285 | data.10X.datasets.adt.csc.dense.dir <- sapply(data.10X.datasets.dir,function(datasetDir)list.files(path=file.path(datasetDir,"cite-seq-count"), pattern="dense_umis.tsv", recursive=TRUE, full.names=TRUE, include.dirs=FALSE))
286 | 
287 | data.10X.datasets.adt.csc_nc.dir <- grep("_nocorrect",data.10X.datasets.adt.csc.dir, value=TRUE)
288 | data.10X.datasets.adt.csc_nc.dense.dir <- grep("_nocorrect",data.10X.datasets.adt.csc.dense.dir, value=TRUE)
289 | names(data.10X.datasets.adt.csc_nc.dir) <- data.10X.datasets
290 | names(data.10X.datasets.adt.csc_nc.dense.dir) <- data.10X.datasets
291 | 
292 | data.10X.datasets.adt.csc.dir <- setdiff(data.10X.datasets.adt.csc.dir, data.10X.datasets.adt.csc_nc.dir)
293 | data.10X.datasets.adt.csc.dense.dir <- setdiff(data.10X.datasets.adt.csc.dense.dir, data.10X.datasets.adt.csc_nc.dense.dir)
294 | names(data.10X.datasets.adt.csc.dir) <- data.10X.datasets
295 | names(data.10X.datasets.adt.csc.dense.dir) <- data.10X.datasets
296 | 
297 | data.10X.datasets.adt.csc <- lapply(data.10X.datasets.adt.csc.dir,function(dir)Read10X(data.dir=dir, gene.column=1))
298 | data.10X.datasets.adt.csc.dense <- lapply(data.10X.datasets.adt.csc.dense.dir,function(dir)read.table(file=dir))
299 | data.10X.datasets.adt.csc <- lapply(data.10X.datasets,function(dataset)Matrix::cbind2(data.10X.datasets.adt.csc[[dataset]],Matrix::Matrix(as.matrix(data.10X.datasets.adt.csc.dense[[dataset]]))))
300 | names(data.10X.datasets.adt.csc) <- data.10X.datasets
301 | data.10X.datasets.adt.csc <- lapply(data.10X.datasets.adt.csc,function(data)data[rownames(data) != "unmapped",])
302 | 
303 | data.10X.datasets.adt.csc[grep("_v3$",data.10X.datasets)] <- lapply(data.10X.datasets.adt.csc[grep("_v3$",data.10X.datasets)],function(data){colnames(data) <- translateV3[colnames(data)]; return(data)})
304 | 
305 | data.10X.datasets.adt.csc_nc <- lapply(data.10X.datasets.adt.csc_nc.dir,function(dir)Read10X(data.dir=dir, gene.column=1))
306 | data.10X.datasets.adt.csc_nc.dense <- lapply(data.10X.datasets.adt.csc_nc.dense.dir,function(dir)read.table(file=dir))
307 | data.10X.datasets.adt.csc_nc <- lapply(data.10X.datasets,function(dataset)Matrix::cbind2(data.10X.datasets.adt.csc_nc[[dataset]],Matrix::Matrix(as.matrix(data.10X.datasets.adt.csc_nc.dense[[dataset]]))))
308 | names(data.10X.datasets.adt.csc_nc) <- data.10X.datasets
309 | data.10X.datasets.adt.csc_nc <- lapply(data.10X.datasets.adt.csc_nc,function(data)data[rownames(data) != "unmapped",])
310 | 
311 | data.10X.datasets.adt.csc_nc[grep("_v3$",data.10X.datasets)] <- lapply(data.10X.datasets.adt.csc_nc[grep("_v3$",data.10X.datasets)],function(data){colnames(data) <- translateV3[colnames(data)]; return(data)})
312 | 
313 | data.10X.datasets.adt.csc.bc_rank <- lapply(data.10X.datasets.adt.csc,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10))
314 | 
315 | knee_plots.adt.csc <- lapply(data.10X.datasets.adt.csc.bc_rank,knee_plot)
316 | cowplot::plot_grid(plotlist=knee_plots.adt.csc, nrow=1)
317 | ```
318 | 
319 | ![](Load-unfiltered-data_files/figure-gfm/unnamed-chunk-4-1.png)<!-- -->
320 | 
321 | ## Load CellRanger featureOnly ADT data
322 | 
323 | ``` r
324 | data.10X.datasets.adt.cellranger.dir <- sapply(data.10X.datasets.dir,function(datasetDir)dir(path=file.path(datasetDir), pattern="raw_feature_bc_matrix", recursive=TRUE, full.names=TRUE, include.dirs=TRUE)[1])
325 | 
326 | data.10X.datasets.adt.cellranger <- lapply(data.10X.datasets.adt.cellranger.dir,function(dir)Read10X(dir))
327 | lapply(data.10X.datasets.adt.cellranger,dim)
328 | ```
329 | 
330 |     ## $PBMC_1k_GEXFeature_v3
331 |     ## [1]      17 6794880
332 |     ## 
333 |     ## $PBMC_10k_GEXFeature_v3
334 |     ## [1]      17 6794880
335 |     ## 
336 |     ## $PBMC_GEXFeatureVDJ_v1
337 |     ## [1]     17 737280
338 | 
339 | ``` r
340 | data.10X.datasets.adt.cellranger.bc_rank <- lapply(data.10X.datasets.adt.cellranger,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10))
341 | 
342 | knee_plots.adt.cellranger <- lapply(data.10X.datasets.adt.cellranger.bc_rank,knee_plot)
343 | cowplot::plot_grid(plotlist=knee_plots.adt.cellranger, nrow=1)
344 | ```
345 | 
346 | ![](Load-unfiltered-data_files/figure-gfm/unnamed-chunk-5-1.png)<!-- -->
347 | 
348 | ``` r
349 | cowplot::plot_grid(plotlist=c(knee_plots.adt.cellranger,knee_plots.adt.csc,knee_plots.adt.kallisto), labels=data.10X.datasets, nrow=3)
350 | ```
351 | 
352 | ![](Load-unfiltered-data_files/figure-gfm/unnamed-chunk-5-2.png)<!-- -->
353 | 
354 | ## Save data
355 | 
356 | ``` r
357 | save(file="data/data.10X.datasets.Rdata",
358 |      data.10X.datasets, 
359 |      data.10X.datasets.adt.kallisto, 
360 |      data.10X.datasets.adt.csc, 
361 |      data.10X.datasets.adt.csc_nc, 
362 |      data.10X.datasets.adt.cellranger, 
363 |      data.10X.datasets.gex.aboveInf,
364 |      data.10X.datasets.knee_plots)
365 | 
366 | save(file="data/data.HTO.Rdata",
367 |      kallisto.HTO,
368 |      cellranger.HTO,
369 |      CSC.HTO, 
370 |      CSC.HTO.uncorrected, 
371 |      gex.aboveInf)
372 | 
373 | save(file="data/data.ADT.Rdata",
374 |      kallisto.ADT,
375 |      cellranger.ADT,
376 |      CSC.ADT, 
377 |      CSC.ADT.uncorrected, 
378 |      gex.aboveInf)
379 | ```
380 | 


--------------------------------------------------------------------------------
/Load-unfiltered-data_files/figure-gfm/loadADT-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Load-unfiltered-data_files/figure-gfm/loadADT-1.png


--------------------------------------------------------------------------------
/Load-unfiltered-data_files/figure-gfm/loadGEX-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Load-unfiltered-data_files/figure-gfm/loadGEX-1.png


--------------------------------------------------------------------------------
/Load-unfiltered-data_files/figure-gfm/loadHTO-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Load-unfiltered-data_files/figure-gfm/loadHTO-1.png


--------------------------------------------------------------------------------
/Load-unfiltered-data_files/figure-gfm/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Load-unfiltered-data_files/figure-gfm/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/Load-unfiltered-data_files/figure-gfm/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Load-unfiltered-data_files/figure-gfm/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/Load-unfiltered-data_files/figure-gfm/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Load-unfiltered-data_files/figure-gfm/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/Load-unfiltered-data_files/figure-gfm/unnamed-chunk-5-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Load-unfiltered-data_files/figure-gfm/unnamed-chunk-5-2.png


--------------------------------------------------------------------------------
/R/Utilities.R:
--------------------------------------------------------------------------------
  1 | #' Knee plot for filtering empty droplets
  2 | #' 
  3 | #' Visualizes the inflection point to filter empty droplets. This function plots 
  4 | #' different datasets with a different color. 
  5 | #' 
  6 | #' @param bc_rank A `DataFrame` output from `DropletUtil::barcodeRanks`.
  7 | #' @return A ggplot2 object.
  8 | #' 
  9 | knee_plot <- function(bc_rank) {
 10 |     library("ggplot2")
 11 |     knee_plt <- tibble(rank = bc_rank[["rank"]],
 12 |                        total = bc_rank[["total"]]) %>% 
 13 |         distinct() %>% 
 14 |         dplyr::filter(total > 0)
 15 |     annot <- tibble(inflection = S4Vectors::metadata(bc_rank)[["inflection"]],
 16 |                     rank_cutoff = max(bc_rank$rank[bc_rank$total > S4Vectors::metadata(bc_rank)[["inflection"]]]),
 17 |                     knee = S4Vectors::metadata(bc_rank)[["knee"]],
 18 |                     knee_cutoff = max(bc_rank$rank[bc_rank$total > S4Vectors::metadata(bc_rank)[["knee"]]]))
 19 |     p <- ggplot(knee_plt, aes(total, rank)) +
 20 |         geom_line() +
 21 |         geom_hline(aes(yintercept = rank_cutoff), data = annot, linetype = 2) +
 22 |         geom_vline(aes(xintercept = inflection), data = annot, linetype = 2) +
 23 |         geom_hline(aes(yintercept = knee_cutoff), data = annot, linetype = "dotted", col="red") +
 24 |         geom_vline(aes(xintercept = knee), data = annot, linetype = "dotted", col="red") +
 25 |         geom_label(aes(y=0,x=knee,label=knee), data = annot, hjust=0, vjust=0, col="red") + 
 26 |         geom_label(aes(y=0,x=inflection,label=inflection), data = annot, hjust=1, vjust=0) + 
 27 |         geom_label(aes(y=knee_cutoff,x=Inf,label=knee_cutoff), data = annot, hjust=1, vjust=1, col="red") + 
 28 |         geom_label(aes(y=rank_cutoff,x=Inf,label=rank_cutoff), data = annot, hjust=1, vjust=0) + 
 29 |         scale_x_log10() +
 30 |         scale_y_log10() +
 31 |         annotation_logticks() +
 32 |         labs(y = "Rank", x = "Total UMIs")
 33 |     return(p)
 34 | }
 35 | 
 36 | knee_plot_auc <- function(bc_rank) {
 37 |   library("ggplot2")
 38 |   knee_plt <- tibble(rank = bc_rank[["rank"]],
 39 |                      total = bc_rank[["total"]]) %>% 
 40 |     distinct() %>% 
 41 |     dplyr::filter(total > 0)
 42 |   annot <- tibble(inflection = S4Vectors::metadata(bc_rank)[["inflection"]],
 43 |                   rank_cutoff = length(which(bc_rank$total > S4Vectors::metadata(bc_rank)[["inflection"]])),
 44 |                   knee = S4Vectors::metadata(bc_rank)[["knee"]],
 45 |                   knee_cutoff = length(which(bc_rank$total > S4Vectors::metadata(bc_rank)[["knee"]])))
 46 |   p <- ggplot(knee_plt, aes(total, rank)) +
 47 |     geom_line() +
 48 |     geom_ribbon(aes(xmin = 0, xmax = total, fill = rank > annot$rank_cutoff), alpha=0.5) + 
 49 |     geom_hline(data=annot,aes(yintercept = rank_cutoff), linetype = 2) +
 50 |     geom_label(data=annot,aes(y=rank_cutoff,x=Inf,label=rank_cutoff), hjust=1, vjust=1) + 
 51 |     scale_fill_manual(values=c("black","grey"), labels=c("Cell","EmptyDrop")) + 
 52 |     scale_x_log10(expand=c(0,0,0.05,0)) +
 53 |     scale_y_log10(expand=c(0,0,0.05,0)) +
 54 |     annotation_logticks() +
 55 |     labs(y = "Rank", x = "Total UMIs") + 
 56 |     guides(fill=guide_legend(override.aes=list(alpha=1, color="black"))) + 
 57 |     theme(legend.position=c(1,1), 
 58 |           legend.justification=c(1,1), 
 59 |           legend.title=element_blank(),
 60 |           legend.direction="vertical",
 61 |           legend.key.size=unit(0.3,"cm"),
 62 |           legend.background=element_blank())
 63 |   return(p)
 64 | }
 65 | 
 66 | knee_plot_highlight <- function(bc_rank, highlight=c()) {
 67 |   library("ggplot2")
 68 |   knee_plt <- tibble(rank = bc_rank[["rank"]],
 69 |                      total = bc_rank[["total"]], 
 70 |                      barcode=rownames(bc_rank)) %>% 
 71 |     distinct() %>% 
 72 |     dplyr::filter(total > 0)
 73 |   
 74 |   annot <- tibble(inflection = S4Vectors::metadata(bc_rank)[["inflection"]],
 75 |                   rank_cutoff = max(bc_rank$rank[bc_rank$total > S4Vectors::metadata(bc_rank)[["inflection"]]]),
 76 |                   knee = S4Vectors::metadata(bc_rank)[["knee"]],
 77 |                   knee_cutoff = max(bc_rank$rank[bc_rank$total > S4Vectors::metadata(bc_rank)[["knee"]]]))
 78 |   
 79 |   cutoff <- 18000
 80 |   data.highlight <- knee_plt[knee_plt$barcode %in% highlight,]
 81 |   data.highlight <- rbind(data.highlight[data.highlight$rank <= cutoff,],data.highlight[sample(nrow(data.highlight[data.highlight$rank > cutoff,]),1000),])
 82 |   
 83 |   p <- ggplot(knee_plt, aes(total, rank)) +
 84 |     geom_line(color="black") +    
 85 |     geom_hline(yintercept=length(highlight), linetype="dashed", color="red", size=0.25, alpha=0.5) + 
 86 |     geom_label(data=annot,aes(y=length(highlight),x=Inf,label=length(highlight)), hjust=1, vjust=1) + 
 87 |     scale_x_log10(expand=c(0,0,0.05,0)) +
 88 |     scale_y_log10(expand=c(0,0,0.05,0)) + 
 89 |     annotation_logticks() +
 90 |     labs(y = "Rank", x = "Total UMIs") + 
 91 |     theme(legend.position=c(1,.99), 
 92 |           legend.justification=c(1,1), 
 93 |           legend.title=element_blank(),
 94 |           legend.direction="vertical")
 95 |   return(p)
 96 | }
 97 | 
 98 | ## nth function extracts the value at a set fractile or median if fractile "rank" is less than a set "nth" threshhold
 99 | nth <- function(value, nth=10, fractile=0.9){
100 |   if(length(value)*(1-fractile) <= nth){
101 |     newvalue <- median(value)
102 |   } else {
103 |     newvalue <- quantile(value, probs=c(fractile))
104 |   }
105 |   return(newvalue)
106 | }
107 | 
108 | ## Biexponential transformation (inspired by flowJo)
109 | biexp_trans <- function(lim = 5, decade.size = lim){
110 |   trans <- function(x){
111 |     ifelse(x <= lim,
112 |            x,
113 |            lim + decade.size * (suppressWarnings(log(x, 10)) -
114 |                                   log(lim, 10)))
115 |   }
116 |   inv <- function(x) {
117 |     ifelse(x <= lim,
118 |            x,
119 |            10^(((x-lim)/decade.size) + log(lim,10)))
120 |   }
121 |   breaks <- function(x) {
122 |     if (all(x <= lim)) {
123 |       scales::pretty_breaks()(x)
124 |     } else if (all(x > lim)) {
125 |       scales::breaks_log(10)(x)
126 |     } else {
127 |       unique(c(scales::pretty_breaks()(c(x[1],lim)),
128 |                scales::breaks_log(10)(c(lim, x[2]))))
129 |     }
130 |   }
131 |   scales::trans_new(paste0("biexp-",format(lim)), trans, inv, breaks)
132 | }


--------------------------------------------------------------------------------
/R/color.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/R/color.R


--------------------------------------------------------------------------------
/R/feature_rankplot.R:
--------------------------------------------------------------------------------
  1 | fractile.line <- function(x, Q=0.9, trans="log2",add.n=0.5){
  2 |   q <- quantile(x,probs=c(Q))
  3 |   transrev <- ggforce::trans_reverser(trans)$inverse
  4 |   data.frame(y=q, yend=q,label=signif(transrev(-q),2)-add.n)
  5 | }
  6 | 
  7 | #' Feature-Rankplot
  8 | #' 
  9 | #' Draws a normalized rank plot for a given feature. 
 10 | #' Can include barcode plot to annotate cells along the ranking (such as cell type).
 11 | #' 
 12 | #' @param data `Double` used for ranking the cells
 13 | #' @param group `Factor` used for grouping comparitors (each having independt ranking)
 14 | #' @param color `Factor` used for coloring the graphs
 15 | #' @param linetype `Factor` used for annotating line and smooth line plots
 16 | #' @param wrap `Factor` used for wrapping the plot
 17 | #' @param draw.points `Boolean` of whether to draw ranked points
 18 | #' @param draw.line `Boolean` of whether to connect ranked points with a line
 19 | #' @param draw.smooth `Boolean` of whether to draw a smoothed rank plot line
 20 | #' @param draw.fractile `Boolean` of whether to draw a fractile lines and values
 21 | #' @param draw.barcode `Boolean` of whether to draw a barcode plot by clusters
 22 | #' @param trans Transformation method for visualizing data
 23 | #' @param add.n `Double` constant to add for showing "0" in log transformation
 24 | #' @param colors Named vector of colors for manual coloring
 25 | #' @param fractile.upper `Double` fractile for upper line
 26 | #' @param fractile.lower `Double` fractile for lower line
 27 | #' @param barcodeGroup `Factor` used grouping the barcode plot
 28 | #' @param barcode.stepSize `Double` of the relative size of each barcode "column" (until a better way is determined)
 29 | #' @param barcode.downsample=500 `Integer` number of cells to be included in random downsampling within each barcodeGroup
 30 | #' @param barcode.alpha `Double` opacity of each "bar" in the barcode plot
 31 | #' @param barcode.refGroups `Character` from which groups (from "group" input) should barcodes be shown. (If NULL all groups will be included)
 32 | #'
 33 | #' @return A ggplot object
 34 | 
 35 | feature_rankplot <- function(data, 
 36 |                              group=NULL, 
 37 |                              color=NULL, 
 38 |                              linetype=NULL, 
 39 |                              wrap=NULL, 
 40 |                              draw.points=TRUE, 
 41 |                              draw.line=FALSE, 
 42 |                              draw.smooth=FALSE, 
 43 |                              draw.fractile=FALSE,
 44 |                              draw.barcode=FALSE, 
 45 |                              trans="biexp", 
 46 |                              add.n=0, 
 47 |                              colors=NULL,
 48 |                              fractile.upper=0.8, 
 49 |                              fractile.lower=0.2, 
 50 |                              barcodeGroup=NULL, 
 51 |                              barcode.stepSize=0.3, 
 52 |                              barcode.downsample=500,
 53 |                              barcode.alpha=0.3,
 54 |                              barcode.refGroups=NULL,
 55 |                              barcode.colors=NULL
 56 |                             ){
 57 |   require("ggrepel")
 58 |   require("ggplot2")
 59 |   
 60 |   ## Make a combined dataframe before ordering by rank
 61 |   data.combined <- data.frame(value=data,include=1)
 62 |   if(!is.null(group)) data.combined$group <- group
 63 |   if(!is.null(color)) data.combined$color <- color
 64 |   if(!is.null(linetype)) data.combined$linetype <- linetype
 65 |   if(!is.null(wrap)) data.combined$wrap <- wrap
 66 |   if(!is.null(barcodeGroup)) data.combined$barcodeGroup <- barcodeGroup
 67 |   
 68 |   ## Split data into groups for which ranks should be independent
 69 |   if(!is.null(group) | !is.null(wrap)){
 70 |     if(is.null(group)){
 71 |       data.grouped <- split(data.combined,list(wrap))
 72 |     } else if(is.null(wrap)){
 73 |       data.grouped <- split(data.combined,list(group))
 74 |     } else {
 75 |       data.grouped <- split(data.combined,list(group,wrap))
 76 |     }
 77 |   } else {
 78 |     data.grouped <- list(data.combined)
 79 |   }
 80 |   
 81 |   ## Calculate normalized ranks for each group
 82 |   rankedList <- lapply(data.grouped,FUN=function(x){
 83 |     x <- x[order(x$value),]
 84 |     x$rank <- seq_along(x$value)/length(x$value)
 85 |     return(x)
 86 |   })
 87 |   
 88 |   ## Define transformation functions
 89 |   transFun <- ggforce::trans_reverser(trans)$transform
 90 |   transFun.inverse <- ggforce::trans_reverser(trans)$inverse
 91 |   
 92 |   ## Merge the rankedList into a single dataframe for plotting
 93 |   plotData <- do.call("rbind",rankedList)
 94 |   plotData$value <- plotData$value+add.n
 95 | 
 96 |   p <- ggplot(plotData,aes(x=rank,y=value))
 97 |   
 98 |   if(draw.smooth == TRUE) p <- p + geom_line(stat="smooth",method="auto",se=FALSE, aes(color=color, group=group, linetype=linetype), alpha=0.5)
 99 |   if(draw.points == TRUE) p <- p + geom_point(aes(color=color),alpha=1,pch=19)
100 |   if(draw.line == TRUE) p <- p + geom_line(aes(color=color, group=group, linetype=linetype),alpha=0.5)
101 |   
102 |   ## Include "barcode plot"
103 |   if(draw.barcode == TRUE & !is.null(barcodeGroup)){
104 |     value.max <- max(plotData$value)
105 |     
106 |     # Would be nice to have the barcode plot as a seperate plot
107 |     # But at the same time, we would like to keep the ability to
108 |     # make facets and keep the barcode information for each facet. 
109 |     # To achieve this, we need to define a set "step" size for each
110 |     # barcode line. As this varies depending on the range of values
111 |     # we are using a bit of a hack (seems to work for our data):
112 |     ## Step denotes the "column" width of each barcode column.
113 |     step <- barcode.stepSize*(log(max(plotData$value))/log(300))
114 |     
115 |     ## Where should the first barcode column start (x-axis value)
116 |     step.max <- transFun(value.max)+(step/2)
117 |     barcodeGroups <- unique(plotData$barcodeGroup)
118 |     
119 |     ## Transform steps according to transformation function
120 |     steps <- transFun.inverse((step.max-step+seq_along(barcodeGroups)*step))
121 |     names(steps) <- barcodeGroups
122 |     
123 |     subset <- lapply(barcodeGroups,FUN=function(x){
124 |       ## We allow basing barcodes on specific groups as this allows us to only show
125 |       ## cells from DF1 (as these are most likely to show expression - if detectable)
126 |       if(!is.null(barcode.refGroups)){
127 |         subset <- which(plotData$barcodeGroup == x & plotData$group %in% barcode.refGroups)
128 |       } else {
129 |         subset <- which(plotData$barcodeGroup == x)
130 |       }
131 |       
132 |       ## Barcodes easily saturate with, to avoid this, we do random downsampling within each barcodeGroup
133 |       downsample <- ifelse(length(subset) > barcode.downsample, barcode.downsample, length(subset))
134 |       subset <- subset[sample(x=length(subset), size=downsample, replace=FALSE)]
135 |     })
136 |     subset <- do.call("c",subset)
137 |     plotData.barcode <- plotData[subset,]
138 |     
139 |     ## Allow another color scale
140 |     p <- p + ggnewscale::new_scale_color()
141 |     p <- p + geom_point(data=plotData.barcode, aes(y=steps[barcodeGroup], col=barcodeGroup, alpha=barcode.alpha),shape="-", size=2)
142 |     
143 |     ## Set manual color scheme for barcode groups
144 |     if(!is.null(barcode.colors)){
145 |       p <- p + scale_color_manual(values=barcode.colors)
146 |     }
147 |   }
148 |   
149 |   ## ADD fractile STATS
150 |   if(draw.fractile == TRUE){
151 |     if(fractile.upper > 0){
152 |       ## Add line segments for upper fractile
153 |       # a bit of a hack to get positions to align? is there a better solution?
154 |       p <- p + stat_summary_bin(geom = "segment", binwidth=2, fun.data = fractile.line,  fun.args=list(Q=fractile.upper,trans=trans,add.n=add.n), aes(x=1, xend=fractile.upper, group=group), linetype="dashed")
155 |       
156 |       ## Add text labels for upper fractile
157 |       p <- p + stat_summary_bin(geom = "text_repel", binwidth=2, fun.data = fractile.line,  fun.args=list(Q=fractile.upper,trans=trans,add.n=add.n), aes(x=fractile.upper, group=group), position=position_nudge(x=(1-fractile.upper)), col="black", direction="y",hjust=1,nudge_x=fractile.upper, fontface="bold",segment.alpha=0.25)
158 |       
159 |       ## a bit of a hack to get the lines in all facets - not sure why its needed?
160 |       #,purpose=unique(plotData[,wrap.by])
161 |       p <- p + geom_vline(data=data.frame(expand.grid(list(q=c(fractile.lower,fractile.upper)))),aes(xintercept=q),alpha=0.35,linetype="dotted")
162 |     }
163 |     
164 |     if(fractile.lower > 0){
165 |       ## Add line segments for lower fractile
166 |       p <- p + stat_summary_bin(geom = "segment", binwidth=2, fun.data = fractile.line,  fun.args=list(Q=fractile.lower,trans=trans,add.n=add.n), aes(x=1, xend=fractile.lower, group=group),linetype="dashed",alpha=0.5)
167 |       
168 |       
169 |       ## Add text labels for lower fractile
170 |       p <- p + stat_summary_bin(geom = "text", binwidth=2, fun.data = fractile.line,  fun.args=list(Q=fractile.lower,trans=trans,add.n=add.n), aes(x=0.95, group=group), col="black", hjust=0, vjust=-0.5, fontface="italic")
171 |     }
172 |   }
173 |   
174 |   ## Scale
175 |   if(trans == "biexp"){
176 |     p <- p + scale_y_continuous(trans=trans, limits=c(-1,max(plotData$value)), expand=c(0.01,0.01))
177 |   } else {
178 |     p <- p + scale_y_continuous(trans=trans, expand=c(0.01,0.01))
179 |   }
180 |   
181 |   p <- p + scale_x_continuous(expand=c(0.01,0.01))
182 |   
183 |   ## Facet
184 |   if(!is.null(wrap)){
185 |     p <- p + facet_grid(~wrap)
186 |   }
187 |   
188 |   ## Layout
189 |   p <- p + labs(col="Sample") + theme_bw() + ylab("Count") + xlab("Rank fraction") + guides(alpha=FALSE)
190 |   p <- p + theme_get() + coord_flip()
191 |   
192 |   ## Manual colors
193 |   if(!is.null(colors))p <- p + scale_color_manual(values=colors)
194 |   
195 |   return(p)
196 | }


--------------------------------------------------------------------------------
/R/feature_rankplot_hist.R:
--------------------------------------------------------------------------------
  1 | feature_rankplot_hist <- function(data, 
  2 |                                   group=NULL, 
  3 |                                   color=NULL, 
  4 |                                   linetype=NULL, 
  5 |                                   wrap=NULL, 
  6 |                                   barcodeGroup=NULL, 
  7 |                                   draw.histogram=TRUE, 
  8 |                                   trans="biexp", 
  9 |                                   add.n=0, 
 10 |                                   histogram.colors=NULL,
 11 |                                   title="",
 12 |                                   gates=NULL, 
 13 |                                   legend=TRUE, 
 14 |                                   yaxis.text=FALSE, ...){
 15 |   library("cowplot")
 16 |   
 17 |   ## Make a combined data.matrix
 18 |   data.combined <- data.frame(value=data)
 19 |   if(!is.null(group)) data.combined$group <- group
 20 |   if(!is.null(color)) data.combined$color <- color
 21 |   if(!is.null(linetype)) data.combined$linetype <- linetype
 22 |   if(!is.null(wrap)){
 23 |     data.combined$wrap <- wrap
 24 |   } else {
 25 |     data.combined$wrap <- 1
 26 |   }
 27 |   if(!is.null(barcodeGroup)) data.combined$barcodeGroup <- barcodeGroup
 28 |   
 29 |   ## Calculate (UMI) sum values
 30 |   data.combined.sum <- data.combined %>% 
 31 |     group_by(wrap=wrap, group=group) %>% 
 32 |     summarise(sum=sum(value)) %>% 
 33 |     arrange(wrap, sum)
 34 |   
 35 |   ## Make "nice" labels with group name and UMI sum for each wrap
 36 |   data.combined.sum.label <- data.combined.sum %>% 
 37 |     group_by(wrap) %>% 
 38 |     summarise(label=paste(paste0(group,": ",sprintf("%05s",as.character(sum))),collapse="\n"))
 39 | 
 40 |   ## Make histograms  
 41 |   if(draw.histogram == TRUE){
 42 |     p.hist <- ggplot(data.combined, aes(x=value)) + 
 43 |       scale_x_continuous(trans=trans,limits=c(-1,max(data.combined$value)), expand=c(0.01,0.01)) + 
 44 |       geom_density(aes(y=..density.. ,linetype=group, fill=group), alpha=0.5, bw=0.35) + 
 45 |       guides(fill=guide_legend(reverse = TRUE), linetype=guide_legend(reverse = TRUE)) + 
 46 |       scale_y_continuous(expand=c(0,0)) + scale_fill_manual(values=histogram.colors) + 
 47 |       theme(axis.title=element_blank(),
 48 |             axis.text.y=element_blank(), 
 49 |             axis.text.x=element_blank(), 
 50 |             axis.ticks=element_blank(), 
 51 |             panel.border=element_blank(), 
 52 |             panel.grid=element_blank(), 
 53 |             plot.margin=unit(c(0,0,0,0),"cm"), 
 54 |             legend.direction="vertical", 
 55 |             legend.title=element_blank(),
 56 |             legend.background=element_blank(),
 57 |             legend.box.margin=unit(c(0,0,0,0),"mm"), 
 58 |             legend.key.width=unit(0.15,"cm"),
 59 |             legend.key.height=unit(0.10,"cm"),
 60 |             legend.position=c(0.4,2), 
 61 |             legend.justification=c(0,1))
 62 |     
 63 |     if(!is.null(wrap)){
 64 |       p.hist <- p.hist + geom_text(data=data.combined.sum.label, x = Inf, y = Inf, hjust=1, vjust=1.5, aes(label=label), size=1.5)
 65 |       p.hist <- p.hist + facet_wrap( ~wrap)
 66 |     } else {
 67 |       scale_label <- with(data.combined.sum[order(factor(data.combined.sum$group, levels=levels(data.combined$group))),],paste0(group,": ",sprintf("%05s",as.character(sum))))
 68 |       p.hist <- p.hist + 
 69 |         scale_linetype_discrete(labels=scale_label) + 
 70 |         scale_fill_manual(values=histogram.colors, labels=scale_label) + 
 71 |         theme(legend.position=c(1,1.5), legend.justification=c(1,1), legend.text.align=1, plot.margin=unit(c(0.3,0,0,0),"cm"))
 72 |     }
 73 |   }
 74 |   
 75 |   ## Draw feature_rankplot
 76 |   p.feature_rankplot <- feature_rankplot(data=data.combined$value, 
 77 |                                          group=data.combined$group, 
 78 |                                          linetype=data.combined$group, 
 79 |                                          wrap=data.combined$wrap, 
 80 |                                          barcodeGroup=data.combined$barcodeGroup, 
 81 |                                          barcode.stepSize=0.4, 
 82 |                                          draw.points = F, 
 83 |                                          draw.barcode = T, 
 84 |                                          draw.line = T, 
 85 |                                          trans=trans, ...) + 
 86 |   theme(strip.text = element_blank(), 
 87 |         plot.margin = unit(c(0,0.3,0,0),"cm"), 
 88 |         legend.direction = "vertical",
 89 |         legend.position = c(0.90,0.02), 
 90 |         legend.key.size=unit(0.2,"cm"),
 91 |         legend.title=element_blank(),
 92 |         legend.justification=c(1,0),
 93 |         axis.title=element_blank(),
 94 |         axis.text.y=element_blank()) + 
 95 |   guides(linetype=F, col=guide_legend(override.aes = list(shape = 15)), group=F) + ylab("UMI count") + xlab("Cell ranking")
 96 | 
 97 |   if(legend == FALSE){
 98 |     p.feature_rankplot <- p.feature_rankplot + theme(legend.position="none")
 99 |   }
100 |   
101 |   if(yaxis.text == TRUE){
102 |     p.feature_rankplot <- p.feature_rankplot + theme(axis.title.y=element_text(size=6))
103 |   }
104 |   
105 |   if(!is.null(gates)){
106 |     p.feature_rankplot <- p.feature_rankplot + geom_vline(data=gates,aes(xintercept=gate), col="red", alpha=0.5, linetype="dashed")
107 |   }
108 |   
109 |   p <- plot_grid(p.hist, p.feature_rankplot, ncol=1, align="v", axis="lr", label_size=7, labels=c(title,""), hjust = 0, vjust=1.1, rel_heights=c(5,15,2))
110 |   
111 |   return(p)
112 | }


--------------------------------------------------------------------------------
/R/feature_rankplot_hist_custom.R:
--------------------------------------------------------------------------------
 1 | feature_rankplot_hist_custom <- function(data,marker,group=NULL,barcodeGroup=NULL,wrap=NULL,conc=NULL,title=NULL,histogram.colors=c("red","blue"),barcode.colors=color.supercluster,...){
 2 | 
 3 |   data <- FetchData(data, vars=c(marker,barcodeGroup,group,wrap), slot = "counts")
 4 |   colnames(data)[1:3] <- c("value","barcodeGroup","group")
 5 | 
 6 |   color.manual <- histogram.colors
 7 | 
 8 |   if(is.null(wrap)){
 9 |     curWrap <- NULL
10 |   } else {
11 |     colnames(data)[4] <- "wrap"
12 |   }
13 | 
14 |   if(group == "dilution"){
15 |     if(!is.null(conc)){
16 |       data$conc <- conc
17 |       data$conc[data$group == "DF4"] <- conc/4
18 |       data$conc <- factor(data$conc, levels=rev(sort(unique(data$conc))))
19 |       levels(data$conc) <- sprintf("%2.2fug/mL",as.double(levels(data$conc)))
20 |     } else {
21 |       data$conc <- data$group
22 |     }
23 |     
24 |     data$group <- data$conc
25 |     curWrap <- data$wrap
26 |     names(color.manual) <- levels(data$group)
27 |   }
28 | 
29 |     p <- feature_rankplot_hist(data=data$value, 
30 |                              group=data$group, 
31 |                              wrap=curWrap, 
32 |                              barcodeGroup=data$barcodeGroup, 
33 |                              title=ifelse(!is.null(title),title,marker),
34 |                              barcode.refGroups=levels(data$group)[1],
35 |                              histogram.colors=color.manual,
36 |                              barcode.colors=barcode.colors,
37 |                              ...)
38 | }


--------------------------------------------------------------------------------
/R/ggplot_settings.R:
--------------------------------------------------------------------------------
 1 | require("ggplot2")
 2 | 
 3 | text.size <- 7
 4 | text.axis.size <- 6
 5 | panel.label_size <- 10
 6 | panel.label_vjust <- 0.98
 7 | panel.label_hjust <- 0
 8 | figure.resolution <- 600
 9 | figure.antialias <- "cleartype"
10 | figure.width.full <- 7
11 | figure.unit <- "in"
12 | 
13 | 
14 | theme_set(theme_bw(base_size=text.size) + 
15 |             theme(
16 |               text=element_text(size=text.size),
17 |               axis.text.y=element_text(size=text.axis.size),
18 |               axis.text.x=element_text(angle=45, hjust=1, size=text.axis.size), 
19 |               panel.grid.minor=element_blank(), 
20 |               strip.background=element_blank(), 
21 |               strip.text=element_text(face="bold", size=text.size), 
22 |               legend.position = "bottom",
23 |               plot.margin = unit(c(1,1,1,1),"mm")))
24 | 
25 | update_geom_defaults("line", list(size=0.35))
26 | update_geom_defaults("bar", list(size=0.25))
27 | update_geom_defaults("tile", list(size=0.25))
28 | update_geom_defaults("rect", list(size=0.25))
29 | update_geom_defaults("density", list(size=0.25))
30 | update_geom_defaults("vline", list(size=0.25))
31 | update_geom_defaults("hline", list(size=0.25))
32 | update_geom_defaults("point", list(size=1))
33 | 
34 | library("ggalluvial")
35 | update_geom_defaults("stratum", list(size=0.25))
36 | update_geom_defaults("flow", list(size=0.25))
37 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CITE-seq optimization
 2 | Code and results from TotalSeqC antibody titration and pipeline benchmarking for CITE-seq experiments.
 3 | 
 4 | Data and Seurat V3 objects from the study is deposited at FigShare under this DOI: [10.6084/m9.figshare.c.5018987](https://doi.org/10.6084/m9.figshare.c.5018987)
 5 | 
 6 | This repository contains all the code used in the processing of the aligned data and data analysis (used for generating all figures) included in the manuscript at BioRxiv.org: 
 7 | 
 8 | **Manuscript**
 9 | 
10 | [Improving oligo-conjugated antibody signal in multimodal single-cell analysis](https://www.biorxiv.org/content/10.1101/2020.06.15.153080v1).
11 | Terkild Brink Buus, Alberto Herrera, Ellie Ivanova, Eleni Mimitou, Anthony Cheng, Thales Papagiannakopoulos, Peter Smibert, Niels Odum, Sergei B Koralov. bioRxiv 2020.06.15.153080; doi: https://doi.org/10.1101/2020.06.15.153080
12 | 
13 | ---
14 | 
15 | **Pre-processing:**
16 | * [Loading data, Demultiplexing, Preprocessing and down-sampling](Demux_Preprocess_Downsample.md) - Supplementary Figure S1
17 | * [Load unfiltered data and determine cell-containing vs. empty droplets](Load-unfiltered-data.md) - Supplementary Figure S6
18 | 
19 | **Data analysis:**
20 | * [Antibody concentration titration](Antibody-titration.md) - Figure 1, 2 and Supplementary Figure S2
21 | * [Reducing staining volume](Volume-titration.md) - Figure 3 and Supplementary Figure S3
22 | * [Reducing cell number at staining](Cell-number-titration.md) - Figure 4 and Supplementary Figure S4
23 | * [Reducing cell number mitigates reduced staining volume](Volume-and-cell-number-titration.md) - Supplementary Figure S5
24 | * [ADT signal in cell-containing vs. empty droplets](ADT-reads-in-cells-vs-empty-drops.md) - Figure 5 and Supplementary Figure S8
25 | * [10X Datasets: UMI per marker plots](10X-Datasets-UMI-per-marker.md) - Supplementary Figure S7
26 | * [Comparison of ADT counting methods](ADT-counting-methods.md) - Figure 6 and Supplementary Figure S9
27 | 
28 | We also included the [Snakefiles](Snakemake/) used with Snakemake to generate the alignment and counting data from our dataset and for the 10X datasets.
29 | 


--------------------------------------------------------------------------------
/Sequencing satuation.R:
--------------------------------------------------------------------------------
 1 | set.seed(114)
 2 | require("Seurat", quietly=T)
 3 | require("tidyverse", quietly=T)
 4 | 
 5 | data.Seurat <- "data/5P-CITE-seq_Titration.rds"
 6 | 
 7 | dataCSCADTDirReads <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT_nocorrect/read_count")
 8 | dataCSCADTDir <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT_nocorrect/umi_count")
 9 | 
10 | CSC.ADT.uncorrected <- Read10X(data.dir=dataCSCADTDir, gene.column=1)
11 | CSC.ADT.uncorrected <- CSC.ADT.uncorrected[rownames(CSC.ADT.uncorrected) != "unmapped",]
12 | 
13 | CSC.ADT.uncorrected.reads <- Read10X(data.dir=dataCSCADTDirReads, gene.column=1)
14 | CSC.ADT.uncorrected.reads <- CSC.ADT.uncorrected.reads[rownames(CSC.ADT.uncorrected.reads) != "unmapped",]
15 | 
16 | object <- readRDS(file=data.Seurat)
17 | 
18 | ## Show number of cells from each sample
19 | table(object$group)
20 | 
21 | object <- subset(object, subset=volume == "50µl")
22 | object
23 | 
24 | UMI <- Matrix::colSums(CSC.ADT.uncorrected)[colnames(object)]
25 | reads <- Matrix::colSums(CSC.ADT.uncorrected.reads)[colnames(object)]
26 | 
27 | df <- data.frame(barcode=colnames(object),sample=object$group,UMI=UMI,reads=reads)
28 | 
29 | df %>% group_by(sample) %>% summarize(UMI=sum(UMI), reads=sum(reads)) %>% mutate(saturation=(1-(UMI/reads)))
30 | 


--------------------------------------------------------------------------------
/Snakemake/10X_VDJ/Snakefile:
--------------------------------------------------------------------------------
  1 | cells=8000
  2 | cores=16
  3 | memory="64G"
  4 | whitelist="include/10xv2_whitelist.txt"
  5 | featureRef="include/feature-ref.csv"
  6 | chemistry="10xv2"
  7 | HTO_num=0
  8 | HTO_skip=HTO_num+2
  9 | 
 10 | def get_trim_length(wcs):
 11 | 	Trim = "10"
 12 | 	if wcs.type == 'HTO':
 13 | 		Trim = "0"
 14 | 	return [Trim]
 15 | 
 16 | rule concat:
 17 | 	input:
 18 | 		R11="data/fastq/{sample}_L001_R1_001.fastq.gz",
 19 | 		R12="data/fastq/{sample}_L002_R1_001.fastq.gz",
 20 | 		R21="data/fastq/{sample}_L001_R2_001.fastq.gz",
 21 | 		R22="data/fastq/{sample}_L002_R2_001.fastq.gz"	
 22 | 	output:
 23 | 		R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
 24 | 		R2="fastq_merged/{sample}_L001_R2_001.fastq.gz"
 25 | 	shell:
 26 | 		"cat {input.R11} {input.R12} > {output.R1} & "
 27 | 		"cat {input.R21} {input.R22} > {output.R2}"
 28 | 
 29 | rule citeseqcount:
 30 | 	input:
 31 | 		R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
 32 | 		R2="fastq_merged/{sample}_L001_R2_001.fastq.gz",
 33 | 		features="include/cite-seq-count/CSC_{type}-ref.csv",
 34 | 		whitelist={whitelist}
 35 | 	params:
 36 | 		Trim=get_trim_length
 37 | 	output:
 38 | 		directory("cite-seq-count/{sample}_d{hamming}_{type}")
 39 | 	shell:
 40 | 		"CITE-seq-Count -T {cores} --max-error {wildcards.hamming} -R1 {input.R1} -R2 {input.R2} -t {input.features} -cbf 1 -cbl 16 -umif 17 -umil 26 -o {output} -wl {input.whitelist} -cells {cells} -u unmapped_{wildcards.sample}.csv --start-trim {params.Trim}"
 41 | 
 42 | rule citeseqcount_nocorrect:
 43 |         input:
 44 |                 R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
 45 |                 R2="fastq_merged/{sample}_L001_R2_001.fastq.gz",
 46 |                 features="include/cite-seq-count/CSC_{type}-ref.csv",
 47 |                 whitelist={whitelist}
 48 |         params:
 49 |                 Trim=get_trim_length
 50 |         output:
 51 |                 directory("cite-seq-count/{sample}_d{hamming}_{type}_nocorrect")
 52 |         shell:
 53 |                 "CITE-seq-Count -T {cores} --no_umi_correction --max-error {wildcards.hamming} -R1 {input.R1} -R2 {input.R2} -t {input.features} -cbf 1 -cbl 16 -umif 17 -umil 26 -o {output} -wl {input.whitelist} -cells {cells} -u unmapped_{wildcards.sample}.csv --start-trim {params.Trim}"
 54 | 
 55 | rule featureList_ADT:
 56 | 	input:
 57 | 		{featureRef}
 58 | 	output:
 59 | 		"include/ADT-ref.csv"
 60 | 	shell:
 61 | 		"tail -n +{HTO_skip} {input} > {output}"
 62 | 
 63 | rule featureList_HTO:
 64 | 	input:
 65 | 		{featureRef}
 66 | 	output:
 67 | 		"include/HTO-ref.csv"
 68 | 	shell:
 69 | 		"tail -n +2 {input} | head -n {HTO_num} > {output}"
 70 | 
 71 | rule featureList_citeseqcount:
 72 | 	input:	
 73 | 		"include/{type}-ref.csv"
 74 | 	output:
 75 | 		"include/cite-seq-count/CSC_{type}-ref.csv"
 76 | 	shell:
 77 | 		"awk -F, '{{print $5\",\"$1}}' {input} > {output}"
 78 | 
 79 | rule featureList_kallisto:
 80 |         input:
 81 |                 "include/{type}-ref.csv"
 82 |         output:
 83 |                 "include/kallisto/kallisto_{type}-ref.csv"
 84 |         shell:
 85 |                 "awk -F, '{{print $1\",\"$5}}' {input} > {output}"
 86 | 
 87 | rule featureList_kallisto_addBase:
 88 |         input:
 89 |                 "include/{type}-ref.csv"
 90 |         output:
 91 |                 "include/kallisto/kallisto_{type}_{addBase}-ref.csv"
 92 |         shell:
 93 |                 "awk -F, '{{print $1\",\"$5\"{wildcards.addBase}\"}}' {input} > {output}"
 94 | 
 95 | rule kallisto_GEX_count:
 96 | 	input:
 97 | 		index="include/kallisto/idx_human.idx",
 98 | 		t2g="include/kallisto/t2g_human.txt",
 99 | 		R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
100 |                 R2="fastq_merged/{sample}_L001_R2_001.fastq.gz"
101 | 	output:
102 | 		directory("kallisto/{sample}")
103 | 	shell:
104 | 		"kb count -i {input.index} -g {input.t2g} -x {chemistry} -t {cores} -m {memory} -w {whitelist} -o {output} {input.R1} {input.R2}"
105 | 
106 | rule kallisto_kite:
107 | 	input:
108 | 		"include/kallisto/kallisto_{type}-ref.csv"
109 | 	output:
110 | 		t2g="include/kallisto/mismatch_t2g_{type}_{k}.txt",
111 | 		fa="include/kallisto/mismatch_{type}_{k}.fa"
112 | 	shell:
113 | 		"python3 ~/kite/featuremap/featuremap.py {input} --t2g {output.t2g} --fa {output.fa}"
114 | 		
115 | rule kallisto_kite_index:
116 | 	input:
117 | 		"include/kallisto/mismatch_{type}_{k}.fa"
118 | 	output:
119 | 		"include/kallisto/mismatch_{type}_{k}.idx"
120 | 	shell:
121 | 		"module load kallisto; "
122 | 		"kallisto index -i {output} -k {wildcards.k} {input}"
123 | 
124 | rule kallisto_feature_count:
125 | 	input:
126 | 		index="include/kallisto/mismatch_{type}_{k}.idx",
127 | 		t2g="include/kallisto/mismatch_t2g_{type}_{k}.txt",
128 |                 R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
129 |                 R2="fastq_merged/{sample}_L001_R2_001.fastq.gz"
130 | 	output:
131 | 		directory("kallisto/features/{sample}.{type}_{k}")
132 | 	shell:
133 |                 "kb count -i {input.index} -g {input.t2g} -x {chemistry} -t {cores} -w {whitelist} -o {output} {input.R1} {input.R2}"
134 | 
135 | rule cellranger_feature:
136 | 	input:	
137 | 		transcriptome="/gpfs/data/sequence/cellranger-refdata/refdata-cellranger-GRCh38-3.0.0",
138 | 		libraries="include/cellranger/libraries_{id}.csv",
139 | 		featureRef="include/cellranger/feature-ref_{id}.csv"
140 | 	params:
141 | 		chemistry="SC5P-R2"
142 | 	output:
143 | 		directory("cellranger_{id}")
144 | 	shell:
145 | 		"module unload miniconda3; module load cellranger/3.1.0; "
146 | 		"cellranger count --id={output} --transcriptome={input.transcriptome} --expect-cells={cells} --libraries={input.libraries} --feature-ref={input.featureRef} --chemistry={params.chemistry} --nosecondary --nopreflight --disable-ui --localcores={cores}"
147 | 


--------------------------------------------------------------------------------
/Snakemake/10X_VDJ/include/feature-ref.csv:
--------------------------------------------------------------------------------
 1 | id,name,read,pattern,sequence,feature_type
 2 | CD3,CD3_UCHT1_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,CTCATTGTAACTCCT,Antibody Capture
 3 | CD19,CD19_HIB19_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,CTGGGCAATTACTCG,Antibody Capture
 4 | CD45RA,CD45RA_HI100_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,TCAATCCTTCCGCTT,Antibody Capture
 5 | CD4,CD4_RPA-T4_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,TGTTCCCGCTCAACT,Antibody Capture
 6 | CD8a,CD8a_RPA-T8_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,GCTGCGCTTTCCATT,Antibody Capture
 7 | CD14,CD14_M5E2_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,TCTCAGACCTCCGTA,Antibody Capture
 8 | CD16,CD16_3G8_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,AAGTTCACTCTTTGC,Antibody Capture
 9 | CD56,CD56_QA17A16_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,TTCGCCGCATTGAGT,Antibody Capture
10 | CD25,CD25_BC96_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,TTTGTCCTGTACGCC,Antibody Capture
11 | CD45RO,CD45RO_UCHL1_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,CTCCGAATCATGTTG,Antibody Capture
12 | PD-1,PD-1_EH12.2H7_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,ACAGCGCCGTATTTA,Antibody Capture
13 | TIGIT,TIGIT_A15153G_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,TTGCTTACCGCCAGA,Antibody Capture
14 | isotype_control_IgG1,isotype_control_IgG1_MOPC-21_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,GCCGGACGACATTAA,Antibody Capture
15 | isotype_control_IgG2a,isotype_control_IgG2a_MOPC-173_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,CTCCTACCTAAACTG,Antibody Capture
16 | isotype_control_IgG2b,isotype_control_IgG2b_MPC-11_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,ATATGTATCACGCGA,Antibody Capture
17 | CD127,CD127_A019D5_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,GTGTGTTGTCCTATG,Antibody Capture
18 | CD15,CD15_W6D3_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,TCACCAGTACCTAGT,Antibody Capture


--------------------------------------------------------------------------------
/Snakemake/10X_VDJ/runSnakemake.sh:
--------------------------------------------------------------------------------
1 | time snakemake kallisto/features/vdj_v1_hs_pbmc3_5gex_protein_antibody_S1.ADT_15
2 | time snakemake cite-seq-count/vdj_v1_hs_pbmc3_5gex_protein_antibody_S1_d1_ADT
3 | time snakemake cellranger_vdj_v1_hs_pbmc3_5gex_protein_antibody
4 | time snakemake cite-seq-count/vdj_v1_hs_pbmc3_5gex_protein_antibody_S1_d1_ADT_nocorrect
5 | 


--------------------------------------------------------------------------------
/Snakemake/10X_v3/Snakefile:
--------------------------------------------------------------------------------
  1 | cells=10000
  2 | cores=16
  3 | memory="64G"
  4 | whitelist="include/10xv3_whitelist.txt"
  5 | featureRef="include/feature-ref.csv"
  6 | chemistry="10xv3"
  7 | HTO_num=0
  8 | HTO_skip=HTO_num+2
  9 | 
 10 | def get_trim_length(wcs):
 11 | 	Trim = "10"
 12 | 	if wcs.type == 'HTO':
 13 | 		Trim = "0"
 14 | 	return [Trim]
 15 | 
 16 | rule concat:
 17 | 	input:
 18 | 		R11="fastq/{sample}_L001_R1_001.fastq.gz",
 19 | 		R12="fastq/{sample}_L002_R1_001.fastq.gz",
 20 | 		R21="fastq/{sample}_L001_R2_001.fastq.gz",
 21 | 		R22="fastq/{sample}_L002_R2_001.fastq.gz"	
 22 | 	output:
 23 | 		R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
 24 | 		R2="fastq_merged/{sample}_L001_R2_001.fastq.gz"
 25 | 	shell:
 26 | 		"cat {input.R11} {input.R12} > {output.R1} & "
 27 | 		"cat {input.R21} {input.R22} > {output.R2}"
 28 | 
 29 | rule citeseqcount:
 30 | 	input:
 31 | 		R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
 32 | 		R2="fastq_merged/{sample}_L001_R2_001.fastq.gz",
 33 | 		features="include/cite-seq-count/CSC_{type}-ref.csv",
 34 | 		whitelist={whitelist}
 35 | 	params:
 36 | 		Trim=get_trim_length
 37 | 	output:
 38 | 		directory("cite-seq-count/{sample}_d{hamming}_{type}")
 39 | 	shell:
 40 | 		"CITE-seq-Count -T {cores} --max-error {wildcards.hamming} -R1 {input.R1} -R2 {input.R2} -t {input.features} -cbf 1 -cbl 16 -umif 17 -umil 28 -o {output} -wl {input.whitelist} -cells {cells} -u unmapped_{wildcards.sample}.csv --start-trim {params.Trim}"
 41 | 
 42 | rule citeseqcount_nocorrect:
 43 |         input:
 44 |                 R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
 45 |                 R2="fastq_merged/{sample}_L001_R2_001.fastq.gz",
 46 |                 features="include/cite-seq-count/CSC_{type}-ref.csv",
 47 |                 whitelist={whitelist}
 48 |         params:
 49 |                 Trim=get_trim_length
 50 |         output:
 51 |                 directory("cite-seq-count/{sample}_d{hamming}_{type}_nocorrect")
 52 |         shell:
 53 |                 "CITE-seq-Count -T {cores} --no_umi_correction --max-error {wildcards.hamming} -R1 {input.R1} -R2 {input.R2} -t {input.features} -cbf 1 -cbl 16 -umif 17 -umil 28 -o {output} -wl {input.whitelist} -cells {cells} -u unmapped_{wildcards.sample}.csv --start-trim {params.Trim}"
 54 | 
 55 | rule featureList_ADT:
 56 | 	input:
 57 | 		{featureRef}
 58 | 	output:
 59 | 		"include/ADT-ref.csv"
 60 | 	shell:
 61 | 		"tail -n +{HTO_skip} {input} > {output}"
 62 | 
 63 | rule featureList_HTO:
 64 | 	input:
 65 | 		{featureRef}
 66 | 	output:
 67 | 		"include/HTO-ref.csv"
 68 | 	shell:
 69 | 		"tail -n +2 {input} | head -n {HTO_num} > {output}"
 70 | 
 71 | rule featureList_citeseqcount:
 72 | 	input:	
 73 | 		"include/{type}-ref.csv"
 74 | 	output:
 75 | 		"include/cite-seq-count/CSC_{type}-ref.csv"
 76 | 	shell:
 77 | 		"awk -F, '{{print $5\",\"$1}}' {input} > {output}"
 78 | 
 79 | rule featureList_kallisto:
 80 |         input:
 81 |                 "include/{type}-ref.csv"
 82 |         output:
 83 |                 "include/kallisto/kallisto_{type}-ref.csv"
 84 |         shell:
 85 |                 "awk -F, '{{print $1\",\"$5}}' {input} > {output}"
 86 | 
 87 | rule featureList_kallisto_addBase:
 88 |         input:
 89 |                 "include/{type}-ref.csv"
 90 |         output:
 91 |                 "include/kallisto/kallisto_{type}_{addBase}-ref.csv"
 92 |         shell:
 93 |                 "awk -F, '{{print $1\",\"$5\"{wildcards.addBase}\"}}' {input} > {output}"
 94 | 
 95 | rule kallisto_GEX_count:
 96 | 	input:
 97 | 		index="include/kallisto/idx_human.idx",
 98 | 		t2g="include/kallisto/t2g_human.txt",
 99 | 		R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
100 |                 R2="fastq_merged/{sample}_L001_R2_001.fastq.gz"
101 | 	output:
102 | 		directory("kallisto/{sample}")
103 | 	shell:
104 | 		"kb count -i {input.index} -g {input.t2g} -x {chemistry} -t {cores} -m {memory} -w {whitelist} -o {output} {input.R1} {input.R2}"
105 | 
106 | rule kallisto_kite:
107 | 	input:
108 | 		"include/kallisto/kallisto_{type}-ref.csv"
109 | 	output:
110 | 		t2g="include/kallisto/mismatch_t2g_{type}_{k}.txt",
111 | 		fa="include/kallisto/mismatch_{type}_{k}.fa"
112 | 	shell:
113 | 		"python3 ~/kite/featuremap/featuremap.py {input} --t2g {output.t2g} --fa {output.fa}"
114 | 		
115 | rule kallisto_kite_index:
116 | 	input:
117 | 		"include/kallisto/mismatch_{type}_{k}.fa"
118 | 	output:
119 | 		"include/kallisto/mismatch_{type}_{k}.idx"
120 | 	shell:
121 | 		"module load kallisto; "
122 | 		"kallisto index -i {output} -k {wildcards.k} {input}"
123 | 
124 | rule kallisto_feature_count:
125 | 	input:
126 | 		index="include/kallisto/mismatch_{type}_{k}.idx",
127 | 		t2g="include/kallisto/mismatch_t2g_{type}_{k}.txt",
128 |                 R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
129 |                 R2="fastq_merged/{sample}_L001_R2_001.fastq.gz"
130 | 	output:
131 | 		directory("kallisto/features/{sample}.{type}_{k}")
132 | 	shell:
133 |                 "kb count -i {input.index} -g {input.t2g} -x {chemistry} -t {cores} -w {whitelist} -o {output} {input.R1} {input.R2}"
134 | 
135 | rule cellranger_feature:
136 | 	input:	
137 | 		transcriptome="/gpfs/data/sequence/cellranger-refdata/refdata-cellranger-GRCh38-3.0.0",
138 | 		libraries="include/cellranger/libraries_{id}.csv",
139 | 		featureRef="include/cellranger/feature-ref_{id}.csv"
140 | 	params:
141 | 		chemistry="SC3Pv3"
142 | 	output:
143 | 		directory("cellranger_{id}")
144 | 	shell:
145 | 		"module unload miniconda3; module load cellranger/3.1.0; "
146 | 		"cellranger count --id={output} --transcriptome={input.transcriptome} --expect-cells={cells} --libraries={input.libraries} --feature-ref={input.featureRef} --chemistry={params.chemistry} --nosecondary --nopreflight --disable-ui --localcores={cores}"
147 | 


--------------------------------------------------------------------------------
/Snakemake/10X_v3/include/feature-ref.csv:
--------------------------------------------------------------------------------
 1 | id,name,read,pattern,sequence,feature_type
 2 | CD3,CD3_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,AACAAGACCCTTGAG,Antibody Capture
 3 | CD4,CD4_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,TACCCGTAATAGCGT,Antibody Capture
 4 | CD8a,CD8a_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,ATTGGCACTCAGATG,Antibody Capture
 5 | CD14,CD14_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,GAAAGTCAAAGCACT,Antibody Capture
 6 | CD15,CD15_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,ACGAATCAATCTGTG,Antibody Capture
 7 | CD16,CD16_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,GTCTTTGTCAGTGCA,Antibody Capture
 8 | CD56,CD56_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,GTTGTCCGACAATAC,Antibody Capture
 9 | CD19,CD19_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,TCAACGCTTGGCTAG,Antibody Capture
10 | CD25,CD25_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,GTGCATTCAACAGTA,Antibody Capture
11 | CD45RA,CD45RA_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,GATGAGAACAGGTTT,Antibody Capture
12 | CD45RO,CD45RO_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,TGCATGTCATCGGTG,Antibody Capture
13 | PD-1,PD-1_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,AAGTCGTGAGGCATG,Antibody Capture
14 | TIGIT,TIGIT_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,TGAAGGCTCATTTGT,Antibody Capture
15 | CD127,CD127_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,ACATTGACGCAACTA,Antibody Capture
16 | IgG2a,IgG2a_control_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,CTCTATTCAGACCAG,Antibody Capture
17 | IgG1,IgG1_control_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,ACTCACTGGAGTCTC,Antibody Capture
18 | IgG2b,IgG2b_control_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,ATCACATCGTTGCCA,Antibody Capture
19 | 


--------------------------------------------------------------------------------
/Snakemake/10X_v3/runSnakemake.sh:
--------------------------------------------------------------------------------
1 | time snakemake kallisto/features/pbmc_10k_protein_v3_antibody_S2.ADT_15
2 | time snakemake cite-seq-count/pbmc_10k_protein_v3_antibody_S2_d1_ADT
3 | time snakemake cellranger_pbmc_10k_protein_v3_antibody
4 | time snakemake cite-seq-count/pbmc_10k_protein_v3_antibody_S2_d1_ADT_nocorrect
5 | 


--------------------------------------------------------------------------------
/Snakemake/Snakefile:
--------------------------------------------------------------------------------
  1 | cells=15000
  2 | cores=16
  3 | memory="64G"
  4 | whitelist="include/10xv2_whitelist.txt"
  5 | featureRef="include/feature-ref.csv"
  6 | chemistry="10xv2"
  7 | HTO_num=6
  8 | HTO_skip=HTO_num+2
  9 | 
 10 | def get_trim_length(wcs):
 11 | 	Trim = "10"
 12 | 	if wcs.type == 'HTO':
 13 | 		Trim = "0"
 14 | 	return [Trim]
 15 | 
 16 | rule concat:
 17 | 	input:
 18 | 		R11="data/fastq/{sample}_L001_R1_001.fastq.gz",
 19 | 		R12="data/fastq/{sample}_L002_R1_001.fastq.gz",
 20 | 		R21="data/fastq/{sample}_L001_R2_001.fastq.gz",
 21 | 		R22="data/fastq/{sample}_L002_R2_001.fastq.gz"	
 22 | 	output:
 23 | 		R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
 24 | 		R2="fastq_merged/{sample}_L001_R2_001.fastq.gz"
 25 | 	shell:
 26 | 		"cat {input.R11} {input.R12} > {output.R1} & "
 27 | 		"cat {input.R21} {input.R22} > {output.R2}"
 28 | 
 29 | rule citeseqcount:
 30 | 	input:
 31 | 		R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
 32 | 		R2="fastq_merged/{sample}_L001_R2_001.fastq.gz",
 33 | 		features="include/cite-seq-count/CSC_{type}-ref.csv",
 34 | 		whitelist={whitelist}
 35 | 	params:
 36 | 		Trim=get_trim_length
 37 | 	output:
 38 | 		directory("cite-seq-count/{sample}_d{hamming}_{type}")
 39 | 	shell:
 40 | 		"CITE-seq-Count -T {cores} --max-error {wildcards.hamming} -R1 {input.R1} -R2 {input.R2} -t {input.features} -cbf 1 -cbl 16 -umif 17 -umil 26 -o {output} -wl {input.whitelist} -cells {cells} -u unmapped_{wildcards.sample}.csv --start-trim {params.Trim}"
 41 | 
 42 | rule citeseqcount_nocorrect:
 43 |         input:
 44 |                 R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
 45 |                 R2="fastq_merged/{sample}_L001_R2_001.fastq.gz",
 46 |                 features="include/cite-seq-count/CSC_{type}-ref.csv",
 47 |                 whitelist={whitelist}
 48 |         params:
 49 |                 Trim=get_trim_length
 50 |         output:
 51 |                 directory("cite-seq-count/{sample}_d{hamming}_{type}_nocorrect")
 52 |         shell:
 53 |                 "CITE-seq-Count -T {cores} --no_umi_correction --max-error {wildcards.hamming} -R1 {input.R1} -R2 {input.R2} -t {input.features} -cbf 1 -cbl 16 -umif 17 -umil 26 -o {output} -wl {input.whitelist} -cells {cells} -u unmapped_{wildcards.sample}.csv --start-trim {params.Trim}"
 54 | 
 55 | rule featureList_ADT:
 56 | 	input:
 57 | 		{featureRef}
 58 | 	output:
 59 | 		"include/ADT-ref.csv"
 60 | 	shell:
 61 | 		"tail -n +{HTO_skip} {input} > {output}"
 62 | 
 63 | rule featureList_HTO:
 64 | 	input:
 65 | 		{featureRef}
 66 | 	output:
 67 | 		"include/HTO-ref.csv"
 68 | 	shell:
 69 | 		"tail -n +2 {input} | head -n {HTO_num} > {output}"
 70 | 
 71 | rule featureList_citeseqcount:
 72 | 	input:	
 73 | 		"include/{type}-ref.csv"
 74 | 	output:
 75 | 		"include/cite-seq-count/CSC_{type}-ref.csv"
 76 | 	shell:
 77 | 		"awk -F, '{{print $5\",\"$1}}' {input} > {output}"
 78 | 
 79 | rule featureList_kallisto:
 80 |         input:
 81 |                 "include/{type}-ref.csv"
 82 |         output:
 83 |                 "include/kallisto/kallisto_{type}-ref.csv"
 84 |         shell:
 85 |                 "awk -F, '{{print $1\",\"$5}}' {input} > {output}"
 86 | 
 87 | rule featureList_kallisto_addBase:
 88 |         input:
 89 |                 "include/{type}-ref.csv"
 90 |         output:
 91 |                 "include/kallisto/kallisto_{type}_{addBase}-ref.csv"
 92 |         shell:
 93 |                 "awk -F, '{{print $1\",\"$5\"{wildcards.addBase}\"}}' {input} > {output}"
 94 | 
 95 | rule kallisto_GEX_ref:
 96 | 	input:
 97 | 		gtf="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/genes/genes.gtf",
 98 | 		fa="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/fasta/genome.fa"
 99 | 	output:
100 | 		index="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/kallisto/idx_cellranger.idx",
101 | 		t2g="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/kallisto/t2g_cellranger.txt",
102 | 		fa="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/kallisto/cDNA_cellranger.fa"
103 | 	shell:
104 | 		"kb ref -i {output.index} -g {output.t2g} -f1 {output.fa} {input.fa} {input.gtf}"
105 | 
106 | rule kallisto_GEX_count:
107 | 	input:
108 | 		t2g="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/kallisto/t2g_cellranger.txt",
109 |                 index="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/kallisto/idx_cellranger.idx",
110 | 		R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
111 |                 R2="fastq_merged/{sample}_L001_R2_001.fastq.gz"
112 | 	output:
113 | 		directory("kallisto/gex/{sample}")
114 | 	shell:
115 | 		"kb count -i {input.index} -g {input.t2g} -x {chemistry} -t {cores} -w {whitelist} -o {output} {input.R1} {input.R2}"
116 | 
117 | rule kallisto_GEX_count_CD45:
118 | 	input:
119 | 		t2g="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/kallisto/t2g_cellranger_CD45.txt",
120 | 		index="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/kallisto/idx_cellranger.idx",
121 | 		R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
122 | 		R2="fastq_merged/{sample}_L001_R2_001.fastq.gz"
123 | 	output:
124 | 		directory("kallisto/gex_CD45/{sample}")
125 | 	shell:
126 | 		"kb count -i {input.index} -g {input.t2g} -x {chemistry} -t {cores} -w {whitelist} -o {output} {input.R1} {input.R2}"
127 | 
128 | rule kallisto_kite:
129 | 	input:
130 | 		"include/kallisto/kallisto_{type}-ref.csv"
131 | 	output:
132 | 		t2g="include/kallisto/mismatch_t2g_{type}_{k}.txt",
133 | 		fa="include/kallisto/mismatch_{type}_{k}.fa"
134 | 	shell:
135 | 		"python3 ~/kite/featuremap/featuremap.py {input} --t2g {output.t2g} --fa {output.fa}"
136 | 		
137 | rule kallisto_kite_index:
138 | 	input:
139 | 		"include/kallisto/mismatch_{type}_{k}.fa"
140 | 	output:
141 | 		"include/kallisto/mismatch_{type}_{k}.idx"
142 | 	shell:
143 | 		"module load kallisto; "
144 | 		"kallisto index -i {output} -k {wildcards.k} {input}"
145 | 
146 | rule kallisto_feature_count:
147 | 	input:
148 | 		index="include/kallisto/mismatch_{type}_{k}.idx",
149 | 		t2g="include/kallisto/mismatch_t2g_{type}_{k}.txt",
150 |                 R1="fastq_merged/{sample}_L001_R1_001.fastq.gz",
151 |                 R2="fastq_merged/{sample}_L001_R2_001.fastq.gz"
152 | 	output:
153 | 		directory("kallisto/features/{sample}.{type}_{k}")
154 | 	shell:
155 |                 "kb count -i {input.index} -g {input.t2g} -x {chemistry} -t {cores} -w {whitelist} -o {output} {input.R1} {input.R2}"
156 | 
157 | rule cellranger_feature:
158 | 	input:	
159 | 		transcriptome="/gpfs/data/sequence/cellranger-refdata/refdata-cellranger-GRCh38-3.0.0",
160 | 		libraries="include/cellranger/libraries_{id}.csv",
161 | 		featureRef="include/cellranger/feature-ref_{id}.csv"
162 | 	params:
163 | 		chemistry="SC5P-R2"
164 | 	output:
165 | 		directory("cellranger_{id}")
166 | 	shell:
167 | 		"module unload miniconda3; module load cellranger/3.1.0; "
168 | 		"cellranger count --id={output} --transcriptome={input.transcriptome} --expect-cells={cells} --libraries={input.libraries} --feature-ref={input.featureRef} --chemistry={params.chemistry} --nosecondary --nopreflight --disable-ui --localcores={cores}"
169 | 


--------------------------------------------------------------------------------
/Snakemake/include/feature-ref.csv:
--------------------------------------------------------------------------------
 1 | id,name,read,pattern,sequence,feature_type
 2 | 1,1,R2,^(BC),AGGACCATCCAA,Antibody Capture
 3 | 2,2,R2,^(BC),ACATGTTACCGT,Antibody Capture
 4 | 3,3,R2,^(BC),AGCTTACTATCC,Antibody Capture
 5 | 4,4,R2,^(BC),TCGATAATGCGA,Antibody Capture
 6 | 5,5,R2,^(BC),GAGGCTGAGCTA,Antibody Capture
 7 | 6,6,R2,^(BC),GTGTGACGTATT,Antibody Capture
 8 | CD103,CD103,R2,5PNNNNNNNNNN(BC),GACCTCATTGTGAAT,Antibody Capture
 9 | CD223,CD223,R2,5PNNNNNNNNNN(BC),CATTTGTCTGCCGGT,Antibody Capture
10 | CD274,CD274,R2,5PNNNNNNNNNN(BC),GTTGTCCGACAATAC,Antibody Capture
11 | CD45,CD45,R2,5PNNNNNNNNNN(BC),TGCAATTACCCGGAT,Antibody Capture
12 | CD134,CD134,R2,5PNNNNNNNNNN(BC),AACCCACCGTTGTTA,Antibody Capture
13 | CD56,CD56,R2,5PNNNNNNNNNN(BC),TCCTTTCCTGATAGG,Antibody Capture
14 | CD366,CD366,R2,5PNNNNNNNNNN(BC),TGTCCTACCCAACTT,Antibody Capture
15 | HLA-DR,HLA-DR,R2,5PNNNNNNNNNN(BC),AATAGCGAGCAAGTA,Antibody Capture
16 | TCRab,TCRab,R2,5PNNNNNNNNNN(BC),CGTAACGTAGAGCGA,Antibody Capture
17 | CD279,CD279,R2,5PNNNNNNNNNN(BC),ACAGCGCCGTATTTA,Antibody Capture
18 | CD45RO,CD45RO,R2,5PNNNNNNNNNN(BC),CTCCGAATCATGTTG,Antibody Capture
19 | CD152,CD152,R2,5PNNNNNNNNNN(BC),ATGGTTCACGTAATC,Antibody Capture
20 | CD107a,CD107a,R2,5PNNNNNNNNNN(BC),CAGCCCACTGCAATA,Antibody Capture
21 | CD194,CD194,R2,5PNNNNNNNNNN(BC),AGCTTACCTGCACGA,Antibody Capture
22 | CD11b,CD11b,R2,5PNNNNNNNNNN(BC),GACAAGTGATCTGCA,Antibody Capture
23 | CD26,CD26,R2,5PNNNNNNNNNN(BC),GGTGGCTAGATAATG,Antibody Capture
24 | CD14,CD14,R2,5PNNNNNNNNNN(BC),TCTCAGACCTCCGTA,Antibody Capture
25 | CD127,CD127,R2,5PNNNNNNNNNN(BC),GTGTGTTGTCCTATG,Antibody Capture
26 | CD28,CD28,R2,5PNNNNNNNNNN(BC),TGAGAACGACCCTAA,Antibody Capture
27 | CD183,CD183,R2,5PNNNNNNNNNN(BC),GCGATGGTAGATTAT,Antibody Capture
28 | CD62L,CD62L,R2,5PNNNNNNNNNN(BC),GTCCCTGCAACTTGA,Antibody Capture
29 | CD117,CD117,R2,5PNNNNNNNNNN(BC),AGACTAATAGCTGAC,Antibody Capture
30 | CD123,CD123,R2,5PNNNNNNNNNN(BC),CTTCACTCTGTCAGG,Antibody Capture
31 | TCRgd,TCRgd,R2,5PNNNNNNNNNN(BC),CTTCCGATTCATTCA,Antibody Capture
32 | CD138,CD138,R2,5PNNNNNNNNNN(BC),GTATAGACCAAAGCC,Antibody Capture
33 | CD1a,CD1a,R2,5PNNNNNNNNNN(BC),GATCGTGTTGTGTTA,Antibody Capture
34 | CD25,CD25,R2,5PNNNNNNNNNN(BC),TTTGTCCTGTACGCC,Antibody Capture
35 | CD197,CD197,R2,5PNNNNNNNNNN(BC),AGTTCAGTCAACCGA,Antibody Capture
36 | CD4,CD4,R2,5PNNNNNNNNNN(BC),TGTTCCCGCTCAACT,Antibody Capture
37 | IgG1,IgG1,R2,5PNNNNNNNNNN(BC),GCCGGACGACATTAA,Antibody Capture
38 | CD80,CD80,R2,5PNNNNNNNNNN(BC),ACGAATCAATCTGTG,Antibody Capture
39 | IgG2A,IgG2A,R2,5PNNNNNNNNNN(BC),CTCCTACCTAAACTG,Antibody Capture
40 | CD31,CD31,R2,5PNNNNNNNNNN(BC),ACCTTTATGCCACGG,Antibody Capture
41 | CD141,CD141,R2,5PNNNNNNNNNN(BC),GGATAACCGCGCTTT,Antibody Capture
42 | CD2,CD2,R2,5PNNNNNNNNNN(BC),TACGATTTGTCAGGG,Antibody Capture
43 | CD66b,CD66b,R2,5PNNNNNNNNNN(BC),AGCTGTAAGTTTCGG,Antibody Capture
44 | CD24,CD24,R2,5PNNNNNNNNNN(BC),AGATTCCTTCGTGTT,Antibody Capture
45 | CD3,CD3,R2,5PNNNNNNNNNN(BC),CTCATTGTAACTCCT,Antibody Capture
46 | CD1c,CD1c,R2,5PNNNNNNNNNN(BC),GAGCTACTTCACTCG,Antibody Capture
47 | CD86,CD86,R2,5PNNNNNNNNNN(BC),GTCTTTGTCAGTGCA,Antibody Capture
48 | CD5,CD5,R2,5PNNNNNNNNNN(BC),CATTAACGGGATGCC,Antibody Capture
49 | CD44,CD44,R2,5PNNNNNNNNNN(BC),AATCCTTCCGAATGT,Antibody Capture
50 | CD69,CD69,R2,5PNNNNNNNNNN(BC),GTCTCTTGGCTTAAA,Antibody Capture
51 | HLA-ABC,HLA-ABC,R2,5PNNNNNNNNNN(BC),TATGCGAGGCTTATC,Antibody Capture
52 | CD19,CD19,R2,5PNNNNNNNNNN(BC),CTGGGCAATTACTCG,Antibody Capture
53 | CD45RA,CD45RA,R2,5PNNNNNNNNNN(BC),TCAATCCTTCCGCTT,Antibody Capture
54 | CD8,CD8,R2,5PNNNNNNNNNN(BC),GCTGCGCTTTCCATT,Antibody Capture
55 | CD70,CD70,R2,5PNNNNNNNNNN(BC),CGCGAACATAAGAAG,Antibody Capture
56 | CD196,CD196,R2,5PNNNNNNNNNN(BC),GATCCCTTTGTCACT,Antibody Capture
57 | CD39,CD39,R2,5PNNNNNNNNNN(BC),TTACCTGGTATCCGT,Antibody Capture
58 | EpCAM,EpCAM,R2,5PNNNNNNNNNN(BC),TTCCGAGCAAGTATC,Antibody Capture
59 | CD30,CD30,R2,5PNNNNNNNNNN(BC),TCAGGGTGTGCTGTA,Antibody Capture
60 | 


--------------------------------------------------------------------------------
/Snakemake/runSnakemake.sh:
--------------------------------------------------------------------------------
 1 | time snakemake kallisto/gex/c1
 2 | time snakemake kallisto/features/H1_S6.HTO_A_13
 3 | time snakemake kallisto/features/A1_S5.ADT_15
 4 | time snakemake cite-seq-count/A1_S5_d1_ADT
 5 | time snakemake cite-seq-count/H1_S6_d1_HTO
 6 | time snakemake cellranger_A1
 7 | time snakemake cellranger_H1
 8 | time snakemake cite-seq-count/H1_S6_d1_HTO_nocorrect
 9 | time snakemake cite-seq-count/A1_S5_d1_ADT_nocorrect
10 | 


--------------------------------------------------------------------------------
/Volume and cell number titration.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "CITE-seq optimization - Staining volume and cell number titration"
  3 | author: "Terkild Brink Buus"
  4 | date: "30/3/2020"
  5 | output: github_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(warning=FALSE, message=FALSE)
 10 | options(stringsAsFactors=FALSE)
 11 | ```
 12 | 
 13 | ## Load utilities
 14 | 
 15 | Including libraries, plotting and color settings and custom utility functions
 16 | 
 17 | ```{r loadLibraries, results='hide', message=FALSE, warning=FALSE}
 18 | set.seed(114)
 19 | require("Seurat", quietly=T)
 20 | require("tidyverse", quietly=T)
 21 | library("Matrix", quietly=T)
 22 | library("patchwork", quietly=T)
 23 | 
 24 | ## Load ggplot theme and defaults
 25 | source("R/ggplot_settings.R")
 26 | 
 27 | ## Load helper functions
 28 | source("R/Utilities.R")
 29 | 
 30 | ## Load predefined color schemes
 31 | source("R/color.R")
 32 | 
 33 | ## Load feature_rankplot functions
 34 | source("R/feature_rankplot.R")
 35 | source("R/feature_rankplot_hist.R")
 36 | source("R/feature_rankplot_hist_custom.R")
 37 | 
 38 | outdir <- "figures"
 39 | data.Seurat <- "data/5P-CITE-seq_Titration.rds"
 40 | data.abpanel <- "data/Supplementary_Table_1.xlsx"
 41 | data.markerStats <- "data/markerByClusterStats.tsv"
 42 | 
 43 | ## Make a custom function for formatting the concentration scale
 44 | scaleFUNformat <- function(x) sprintf("%.2f", x)
 45 | ```
 46 | 
 47 | ## Load Seurat object
 48 | 
 49 | Subset to only focus on conditions with 200k or 1 mio cells and dilution factor 4 (thus comparing 50µl to 25µl staining volume with 1 mio or 200k PBMCs at staining).
 50 | 
 51 | ```{r loadSeurat}
 52 | object <- readRDS(file=data.Seurat)
 53 | 
 54 | ## Show number of cells from each sample
 55 | table(object$group)
 56 | 
 57 | object <- subset(object, subset=dilution == "DF4" & cellsAtStaining %in% c("200k","1000k"))
 58 | object
 59 | color.volnum <- c("50µl_1000k"="#0082c8","25µl_1000k"="#f58231","25µl_200k"="#911eb4")
 60 | shape.volnum <- c("50µl_1000k"=21,"25µl_1000k"=22,"25µl_200k"=23)
 61 | object$volnum <- factor(paste(object$volume,object$cellsAtStaining,sep="_"),levels=names(color.volnum))
 62 | ```
 63 | 
 64 | ## Load Ab panel annotation and concentrations
 65 | 
 66 | Marker stats is reused in other comparisons and was calculated in the end of the preprocessing vignette.
 67 | 
 68 | ```{r loadABPanel}
 69 | abpanel <- data.frame(readxl::read_excel(data.abpanel))
 70 | rownames(abpanel) <- abpanel$Marker
 71 | 
 72 | ## As we are only working with dilution factor 4 samples here, we want to show labels accordingly
 73 | # a bit of a hack...
 74 | abpanel$conc_µg_per_mL <- abpanel$conc_µg_per_mL/4
 75 | 
 76 | markerStats <- read.table(data.markerStats)
 77 | markerStats.PBMC <- markerStats[markerStats$tissue == "PBMC",]
 78 | rownames(markerStats) <- paste(markerStats$marker,markerStats$tissue,sep="_")
 79 | 
 80 | ## Make a ordering vector ordering markers per concentration and total UMI count
 81 | marker.order <- markerStats.PBMC$marker[order(markerStats.PBMC$conc_µg_per_mL, markerStats.PBMC$UMItotal, decreasing=TRUE)]
 82 | 
 83 | head(abpanel)
 84 | head(markerStats)
 85 | ```
 86 | 
 87 | ## Cell type and tissue overview
 88 | 
 89 | Make tSNE plots colored by cell type, cluster and tissue of origin.
 90 | 
 91 | ```{r tsnePlots, fig.height=3, fig.width=7}
 92 | p.tsne.volume <- DimPlot(object, group.by="volnum", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap(~"Volume") + scale_color_manual(values=color.volnum)
 93 | 
 94 | p.tsne.cluster <- DimPlot(object, group.by="supercluster", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + scale_color_manual(values=color.supercluster) + facet_wrap(~"Cell types")
 95 | 
 96 | p.tsne.finecluster <- DimPlot(object, label=TRUE, label.size=3, reduction="tsne", group.by="fineCluster", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap(  ~"Clusters") + guides(col=F)
 97 | 
 98 | p.tsne.cluster + p.tsne.finecluster + p.tsne.volume
 99 | ```
100 | 
101 | ## Overall ADT counts
102 | 
103 | Extract UMI data and calculate UMI sum per marker within each condition.
104 | 
105 | ```{r calculateUMIcountsPerMarker}
106 | ## Get the data
107 | ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts"))
108 | ADT.matrix$marker <- rownames(ADT.matrix)
109 | ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"]
110 | ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc))
111 | 
112 | ## Get cell annotations
113 | cell.annotation <- FetchData(object, vars=c("volnum"))
114 | 
115 | ## Calculate marker sum from each dilution within both tissues
116 | ADT.matrix.agg <- ADT.matrix %>% group_by(volume=cell.annotation[name,"volnum"], marker, conc) %>% summarise(sum=sum(value))
117 | 
118 | ## Order markers by concentration
119 | ADT.matrix.agg$marker.byConc <- factor(ADT.matrix.agg$marker, levels=marker.order)
120 | 
121 | ## Extract marker annotation
122 | ann.markerConc <- abpanel[marker.order,]
123 | ann.markerConc$Marker <- factor(marker.order, levels=marker.order)
124 | 
125 | ADT.matrix.agg.total <- ADT.matrix.agg
126 | ```
127 | 
128 | ## Plot overall ADT counts by conditions
129 | 
130 | Samples stained with diluted Ab panel have reduced ADT counts.
131 | 
132 | ```{r UMIcountsPerCondition, fig.width=2.5, fig.height=2}
133 | 
134 | p.UMIcountsPerCondition <- ggplot(ADT.matrix.agg.total[order(-ADT.matrix.agg$conc, -ADT.matrix.agg$sum),], aes(x=volume, y=sum/10^6, fill=conc)) + 
135 |   geom_bar(stat="identity", col=alpha(col="black",alpha=0.05)) + 
136 |   scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) + 
137 |   scale_y_continuous(expand=c(0,0,0,0.05)) + 
138 |   labs(fill="DF4\nµg/mL", y=bquote("ADT UMI counts ("~10^6~")")) + 
139 |   guides(fill=guide_colourbar(reverse=T)) + 
140 |   theme(panel.grid.major=element_blank(), axis.title.x=element_blank(), panel.border=element_blank(), axis.line = element_line(), legend.position="right")
141 | 
142 | p.UMIcountsPerCondition
143 | ```
144 | 
145 | ## Compare total UMI counts per marker
146 | 
147 | Plot total UMI counts for each marker at the investigated dilution factors (DF1 vs. DF4). To ease readability, we place dashed lines between each concentration.
148 | 
149 | ```{r plotUMIcountsPerMarker, fig.width=4.5, fig.height=5}
150 | ## Calculate "breaks" where concentration change.
151 | lines <- length(marker.order)-cumsum(sapply(split(ann.markerConc$Marker,ann.markerConc$conc_µg_per_mL),length))+0.5
152 | lines <- data.frame(breaks=lines[-length(lines)])
153 | 
154 | ## Make a marker by concentration "heatmap"
155 | p.markerByConc <- ggplot(ann.markerConc, aes(x=1, y=Marker, fill=conc_µg_per_mL)) + 
156 |   geom_tile(col=alpha(col="black",alpha=0.2)) + 
157 |   geom_hline(data=lines,aes(yintercept=breaks), linetype="dashed", alpha=0.5) + 
158 |   scale_fill_viridis_c(trans="log2") + 
159 |   labs(fill="µg/mL") + 
160 |   theme_get() + 
161 |   theme(axis.ticks.x=element_blank(), axis.title = element_blank(), axis.text.x=element_blank(), panel.grid=element_blank(), legend.position="right", plot.margin=unit(c(0.1,0.1,0.1,0.1),"mm")) + scale_x_continuous(expand=c(0,0))
162 |   
163 | ## Make UMI counts per Marker plot
164 | p.UMIcountsPerMarker <- ggplot(ADT.matrix.agg, aes(x=marker.byConc,y=log2(sum))) + 
165 |   geom_line(aes(group=marker), size=1, color="#666666", alpha=0.5) + 
166 |   ggbeeswarm::geom_quasirandom(aes(group=volume, fill=volume, pch=volume), size=1, dodge.width=-0.75) + 
167 |   geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 
168 |   scale_fill_manual(values=color.volnum) + 
169 |   scale_y_continuous(breaks=c(9:17)) + 
170 |   scale_shape_manual(values=shape.volnum) + 
171 |   ylab("log2(UMI sum)") + 
172 |   guides(fill=guide_legend(override.aes=list(size=1.5), ncol=1, reverse=FALSE)) + 
173 |   theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="bottom", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm"), legend.title=element_blank()) + 
174 |   coord_flip()
175 | 
176 | ## Combine plot with markerByConc annotation heatmap
177 | plotUMIcountsPerMarker <- p.markerByConc + guides(fill=F) + p.UMIcountsPerMarker + guides(fill=F, shape=F) + plot_spacer() + guide_area() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect')
178 | 
179 | plotUMIcountsPerMarker
180 | ```
181 | 
182 | ## Compare change in UMI/cell within expressing cluster
183 | 
184 | Using a specific percentile may be prone to outliers in small clusters (i.e. the 90th percentile of a cluster of 30 will be the #3 higest cell making it prone to outliers). We thus set a threshold of the value to only be the 90th percentile if cluster contains more than 100 cells. For smaller clusters, the median is used. Expressing cluster is identified in the "preprocessing" vignette.
185 | 
186 | ```{r UMIinExpressingCells, fig.width=4.5, fig.height=5}
187 | ## Get the data
188 | ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts"))
189 | ADT.matrix$marker <- rownames(ADT.matrix)
190 | ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"]
191 | ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc))
192 | 
193 | ## Get cell annotations
194 | cell.annotation <- FetchData(object, vars=c("volnum", "fineCluster"))
195 | 
196 | ## Calculate marker statistics from each dilution within each cluster
197 | ADT.matrix.agg <- ADT.matrix %>% group_by(volume=cell.annotation[name,"volnum"], fineCluster=cell.annotation[name,"fineCluster"], marker, conc) %>% summarise(sum=sum(value), median=quantile(value, probs=c(0.9)), nth=nth(value))
198 | ADT.matrix.agg$tissue == "PBMC"
199 | 
200 | ## Use data for the previously determined expressing cluster.
201 | Cluster.max <- markerStats[markerStats$tissue == "PBMC",c("marker","fineCluster")]
202 | Cluster.max$fineCluster <- factor(Cluster.max$fineCluster)
203 | 
204 | ADT.matrix.aggByClusterMax <- Cluster.max %>% left_join(ADT.matrix.agg)
205 | ADT.matrix.aggByClusterMax$marker.byConc <- factor(ADT.matrix.aggByClusterMax$marker, levels=marker.order)
206 | 
207 | p.UMIinExpressingCells <- ggplot(ADT.matrix.aggByClusterMax, aes(x=marker.byConc, y=log2(nth))) + 
208 |   geom_line(aes(group=marker), size=1, alpha=0.5, color="#666666") + 
209 |   ggbeeswarm::geom_quasirandom(aes(group=volume, fill=volume, pch=volume), size=1, show.legend=FALSE, dodge.width=-0.75) + 
210 |   geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 
211 |   geom_text(aes(label=paste0(fineCluster," ")), y=Inf, adj=1, size=1.5) + 
212 |   scale_fill_manual(values=color.volnum) + 
213 |   scale_shape_manual(values=shape.volnum) + 
214 |   scale_y_continuous(breaks=c(0:11), labels=2^c(0:11), expand=c(0.05,0.5)) + 
215 |   ylab("90th percentile UMI of expressing cluster") + 
216 |   theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="right", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm")) + 
217 |   coord_flip()
218 | 
219 | ## Combine plot with markerByConc annotation heatmap
220 | UMIinExpressingCells <- p.markerByConc + theme(legend.position="none") + p.UMIinExpressingCells + theme(legend.position="none") + plot_spacer() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect')
221 | 
222 | UMIinExpressingCells
223 | ```
224 | 
225 | ## Titration examples
226 | 
227 | Most markers are largely unaffected by reducing staining volume. However, some antibodies used at low concentrations and targeting abundant epitopes are affected, an example of such is CD31:
228 | 
229 | ```{r fig.width=1.4, fig.height=2.3}
230 | ## Make helper function for plotting titration plots
231 | titrationPlot <- function(marker, gate.PBMC=NULL, gate.Lung=NULL, y.axis=FALSE, show.gate=TRUE, legend=FALSE){
232 |   curMarker.name <- marker
233 |   
234 |   ## Get antibody concentration for legends
235 |   curMarker.DF1conc <- abpanel[curMarker.name, "conc_µg_per_mL"]
236 |   if(show.gate==TRUE){
237 |     ## Load gating percentages from manually set DSB thresholds
238 |     gate <- data.frame(gate=markerStats[markerStats$marker == curMarker.name & markerStats$tissue== "PBMC",c("pct")])
239 |     gate$gate <- 1-(gate$gate/100)
240 |     rownames(gate) <- gate$wrap
241 |     ## Allow manual gating
242 |     if(!is.null(gate.PBMC)) gate <- gate.PBMC
243 |   } else {
244 |     gate <- NULL
245 |   }
246 | 
247 |   p <- feature_rankplot_hist_custom(data=object, 
248 |                                     marker=paste0("adt_",curMarker.name),      
249 |                                     group="volnum",
250 |                                     barcodeGroup="supercluster",
251 |                                     conc=curMarker.DF1conc, 
252 |                                     legend=legend, 
253 |                                     yaxis.text=y.axis, 
254 |                                     gates=gate,
255 |                                     histogram.colors=color.volnum, 
256 |                                     title=curMarker.name)
257 |   
258 |   return(p)
259 | }
260 | 
261 | p.CD31 <- titrationPlot("CD31", legend=TRUE)
262 | 
263 | p.CD31
264 | ```
265 | 
266 | ## Final plot
267 | 
268 | ```{r figure3, fig.width=7, fig.height=4.5}
269 | A <- p.UMIcountsPerCondition + theme(legend.key.width=unit(0.3,"cm"), 
270 |                                      legend.key.height=unit(0.4,"cm"), 
271 |                                      legend.text=element_text(size=unit(5,"pt")),
272 |                                      plot.margin=unit(c(0.3,0,0,0),"cm"))
273 | 
274 | B1 <- p.markerByConc + theme(text = element_text(size=10), 
275 |                              plot.margin=unit(c(0.3,0,1,0),"cm"),
276 |                              legend.position="none")
277 | B2 <- p.UMIcountsPerMarker + theme(legend.position="none")
278 | C <- p.UMIinExpressingCells + theme(legend.position="none")
279 | 
280 | BC.legend <- cowplot::get_legend(p.UMIcountsPerMarker + 
281 |                                    theme(legend.position="bottom", 
282 |                                          legend.direction="horizontal", 
283 |                                          legend.background=element_blank(), 
284 |                                          legend.box.background=element_blank(), 
285 |                                          legend.key=element_blank(),
286 |                                          legend.key.height=unit(2,"mm")))
287 | 
288 | D <- p.CD31 + theme(plot.margin=unit(c(0.5,0,0,0),"cm"))
289 | 
290 | AD <- cowplot::plot_grid(A,D,NULL, 
291 |                          ncol=1, 
292 |                          rel_heights = c(14,16,1.5),
293 |                          labels=c("A","D",""), 
294 |                          label_size=panel.label_size, 
295 |                          vjust=panel.label_vjust, 
296 |                          hjust=panel.label_hjust)
297 | 
298 | BC <- cowplot::plot_grid(B1, B2, C, 
299 |                          nrow=1, 
300 |                          rel_widths=c(2,10,10), 
301 |                          align="h", 
302 |                          axis="tb", 
303 |                          labels=c("B", "", "C"), 
304 |                          label_size=panel.label_size, 
305 |                          vjust=panel.label_vjust, 
306 |                          hjust=panel.label_hjust)
307 | 
308 | p.final <- cowplot::ggdraw(plot_grid(AD, BC, nrow=1, rel_widths=c(1.2,4), align="v", axis="l")) + 
309 |     cowplot::draw_plot(BC.legend,0.27,0.023,0.2,0.00001)
310 | 
311 | png(file=file.path(outdir,"Supplementary Figure S5.png"), 
312 |     width=figure.width.full, 
313 |     height=4.7, 
314 |     units = figure.unit, 
315 |     res=figure.resolution, 
316 |     antialias=figure.antialias)
317 | 
318 |   p.final
319 |   
320 | dev.off()
321 | 
322 | p.final
323 | ```
324 | 
325 | ## Individual titration plots
326 | 
327 | For supplementary information.
328 | 
329 | ```{r suppFig1, fig.width=7, fig.height=10, eval=FALSE}
330 | plots.columns = 6
331 | rows.max <- 5
332 | 
333 | markers <- abpanel[rownames(object[["ADT.kallisto"]]),]
334 | markers <- markers[order(markers$Category, markers$Marker),]
335 | 
336 | plots <- list()
337 | 
338 | ## Make individual plots for each marker
339 | for(i in 1:nrow(markers)){
340 |   curMarker <- markers[i,]
341 |   curMarker.name <- curMarker$Marker
342 |   y.axis <- ifelse((i-1) %in% c(0,6,12,18,24,30,36,42,48),TRUE,FALSE)
343 |   plots[[curMarker.name]] <- titrationPlot(curMarker.name, y.axis=y.axis)
344 | }
345 | 
346 | # a bit of a hack to make celltype legend
347 | p.legend <- cowplot::get_legend(ggplot(data.frame(supercluster=object$supercluster), 
348 |                                            aes(color=supercluster,x=1,y=1)) + 
349 |   geom_point(shape=15, size=1.5) + 
350 |   scale_color_manual(values=color.supercluster) + 
351 |   theme(legend.title=element_blank(), 
352 |         legend.margin=margin(0,0,0,0), 
353 |         legend.key.size = unit(0.15,"cm"),
354 |         legend.position = c(0.98,1.1), 
355 |         legend.justification=c(1,1), 
356 |         legend.direction="horizontal"))
357 | 
358 | plots.num <- length(plots)
359 | plots.perPage <- plots.columns*rows.max
360 | plots.pages <- ceiling(plots.num/plots.perPage)
361 | 
362 | ## Make a supplementary figure split into pages
363 | for(i in 1:plots.pages){
364 |   start <- (i-1)*plots.perPage+1
365 |   end <- i*plots.perPage
366 |   end <- min(end,plots.num)
367 |   curPlots <- c(start:end)
368 |   plots.rows <- ceiling(length(curPlots)/plots.columns)
369 |   
370 |   curPlots <- cowplot::plot_grid(plotlist=plots[curPlots],ncol=plots.columns, rel_widths=c(1.1,1,1,1,1,1), align="h", axis="tb")
371 |   curPlots.layout <- cowplot::plot_grid(NULL, p.legend, curPlots, vjust=-0.5, hjust=panel.label_hjust, label_size=panel.label_size, ncol=1, rel_heights= c(0.5, 1.3, 70/5*plots.rows))
372 |   
373 |   png(file=file.path(outdir,paste0("Supplementary Figure X",LETTERS[i],".png")), 
374 |       units=figure.unit, 
375 |       res=figure.resolution, 
376 |       width=figure.width.full, 
377 |       height=(2*plots.rows),
378 |       antialias=figure.antialias)
379 | 
380 |   print(curPlots.layout)
381 |   
382 |   dev.off()
383 |   
384 |   print(curPlots.layout)
385 | }
386 | ```
387 | 


--------------------------------------------------------------------------------
/Volume titration.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "CITE-seq optimization - Staining volume titration"
  3 | author: "Terkild Brink Buus"
  4 | date: "30/3/2020"
  5 | output: github_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(warning=FALSE, message=FALSE)
 10 | options(stringsAsFactors=FALSE)
 11 | ```
 12 | 
 13 | ## Load utilities
 14 | 
 15 | Including libraries, plotting and color settings and custom utility functions
 16 | 
 17 | ```{r loadLibraries, results='hide', message=FALSE, warning=FALSE}
 18 | set.seed(114)
 19 | require("Seurat", quietly=T)
 20 | require("tidyverse", quietly=T)
 21 | library("Matrix", quietly=T)
 22 | library("patchwork", quietly=T)
 23 | 
 24 | ## Load ggplot theme and defaults
 25 | source("R/ggplot_settings.R")
 26 | 
 27 | ## Load helper functions
 28 | source("R/Utilities.R")
 29 | 
 30 | ## Load predefined color schemes
 31 | source("R/color.R")
 32 | 
 33 | ## Load feature_rankplot functions
 34 | source("R/feature_rankplot.R")
 35 | source("R/feature_rankplot_hist.R")
 36 | source("R/feature_rankplot_hist_custom.R")
 37 | 
 38 | outdir <- "figures"
 39 | data.Seurat <- "data/5P-CITE-seq_Titration.rds"
 40 | data.abpanel <- "data/Supplementary_Table_1.xlsx"
 41 | data.markerStats <- "data/markerByClusterStats.tsv"
 42 | 
 43 | ## Make a custom function for formatting the concentration scale
 44 | scaleFUNformat <- function(x) sprintf("%.2f", x)
 45 | ```
 46 | 
 47 | ## Load Seurat object
 48 | 
 49 | Subset to only focus on conditions with 1 mio cells and dilution factor 4 (thus comparing 50µl to 25µl staining volume in PBMCs).
 50 | 
 51 | ```{r loadSeurat}
 52 | object <- readRDS(file=data.Seurat)
 53 | 
 54 | ## Show number of cells from each sample
 55 | table(object$group)
 56 | 
 57 | object <- subset(object, subset=dilution == "DF4" & cellsAtStaining == "1000k")
 58 | object
 59 | ```
 60 | 
 61 | ## Load Ab panel annotation and concentrations
 62 | 
 63 | Marker stats is reused in other comparisons and was calculated in the end of the preprocessing vignette.
 64 | 
 65 | ```{r loadABPanel}
 66 | abpanel <- data.frame(readxl::read_excel(data.abpanel))
 67 | rownames(abpanel) <- abpanel$Marker
 68 | 
 69 | ## As we are only working with dilution factor 4 samples here, we want to show labels accordingly
 70 | # a bit of a hack...
 71 | abpanel$conc_µg_per_mL <- abpanel$conc_µg_per_mL/4
 72 | 
 73 | markerStats <- read.table(data.markerStats)
 74 | markerStats.PBMC <- markerStats[markerStats$tissue == "PBMC",]
 75 | rownames(markerStats) <- paste(markerStats$marker,markerStats$tissue,sep="_")
 76 | 
 77 | ## Make a ordering vector ordering markers per concentration and total UMI count
 78 | marker.order <- markerStats.PBMC$marker[order(markerStats.PBMC$conc_µg_per_mL, markerStats.PBMC$UMItotal, decreasing=TRUE)]
 79 | 
 80 | head(abpanel)
 81 | head(markerStats)
 82 | ```
 83 | 
 84 | ## Cell type and tissue overview
 85 | 
 86 | Make tSNE plots colored by cell type, cluster and tissue of origin.
 87 | 
 88 | ```{r tsnePlots, fig.height=3, fig.width=7}
 89 | p.tsne.volume <- DimPlot(object, group.by="volume", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap(~"Volume") + scale_color_manual(values=color.volume)
 90 | 
 91 | p.tsne.cluster <- DimPlot(object, group.by="supercluster", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + scale_color_manual(values=color.supercluster) + facet_wrap(~"Cell types")
 92 | 
 93 | p.tsne.finecluster <- DimPlot(object, label=TRUE, label.size=3, reduction="tsne", group.by="fineCluster", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap(  ~"Clusters") + guides(col=F)
 94 | 
 95 | p.tsne.cluster + p.tsne.finecluster + p.tsne.volume
 96 | ```
 97 | 
 98 | ## Overall ADT counts
 99 | 
100 | Extract UMI data and calculate UMI sum per marker within each condition.
101 | 
102 | ```{r calculateUMIcountsPerMarker}
103 | ## Get the data
104 | ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts"))
105 | ADT.matrix$marker <- rownames(ADT.matrix)
106 | ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"]
107 | ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc))
108 | 
109 | ## Get cell annotations
110 | cell.annotation <- FetchData(object, vars=c("volume"))
111 | 
112 | ## Calculate marker sum from each dilution within both tissues
113 | ADT.matrix.agg <- ADT.matrix %>% group_by(volume=cell.annotation[name,"volume"], marker, conc) %>% summarise(sum=sum(value))
114 | 
115 | ## Order markers by concentration
116 | ADT.matrix.agg$marker.byConc <- factor(ADT.matrix.agg$marker, levels=marker.order)
117 | 
118 | ## Extract marker annotation
119 | ann.markerConc <- abpanel[marker.order,]
120 | ann.markerConc$Marker <- factor(marker.order, levels=marker.order)
121 | 
122 | ADT.matrix.agg.total <- ADT.matrix.agg
123 | ```
124 | 
125 | ## Plot overall ADT counts by conditions
126 | 
127 | Samples stained with diluted Ab panel have reduced ADT counts.
128 | 
129 | ```{r UMIcountsPerCondition, fig.width=2.5, fig.height=2}
130 | 
131 | p.UMIcountsPerCondition <- ggplot(ADT.matrix.agg.total[order(-ADT.matrix.agg$conc, -ADT.matrix.agg$sum),], aes(x=volume, y=sum/10^6, fill=conc)) + 
132 |   geom_bar(stat="identity", col=alpha(col="black",alpha=0.05)) + 
133 |   scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) + 
134 |   scale_y_continuous(expand=c(0,0,0,0.05)) + 
135 |   labs(fill="DF4\nµg/mL", y=bquote("ADT UMI counts ("~10^6~")")) + 
136 |   guides(fill=guide_colourbar(reverse=T)) + 
137 |   theme(panel.grid.major=element_blank(), axis.title.x=element_blank(), panel.border=element_blank(), axis.line = element_line(), legend.position="right")
138 | 
139 | p.UMIcountsPerCondition
140 | ```
141 | 
142 | ## Compare total UMI counts per marker
143 | 
144 | Plot total UMI counts for each marker at the investigated dilution factors (DF1 vs. DF4). To ease readability, we place dashed lines between each concentration.
145 | 
146 | ```{r plotUMIcountsPerMarker, fig.width=4.5, fig.height=5}
147 | ## Calculate "breaks" where concentration change.
148 | lines <- length(marker.order)-cumsum(sapply(split(ann.markerConc$Marker,ann.markerConc$conc_µg_per_mL),length))+0.5
149 | lines <- data.frame(breaks=lines[-length(lines)])
150 | 
151 | ## Make a marker by concentration "heatmap"
152 | p.markerByConc <- ggplot(ann.markerConc, aes(x=1, y=Marker, fill=conc_µg_per_mL)) + 
153 |   geom_tile(col=alpha(col="black",alpha=0.2)) + 
154 |   geom_hline(data=lines,aes(yintercept=breaks), linetype="dashed", alpha=0.5) + 
155 |   scale_fill_viridis_c(trans="log2") + 
156 |   labs(fill="µg/mL") + 
157 |   theme_get() + 
158 |   theme(axis.ticks.x=element_blank(), axis.title = element_blank(), axis.text.x=element_blank(), panel.grid=element_blank(), legend.position="right", plot.margin=unit(c(0.1,0.1,0.1,0.1),"mm")) + scale_x_continuous(expand=c(0,0))
159 |   
160 | ## Make UMI counts per Marker plot
161 | p.UMIcountsPerMarker <- ggplot(ADT.matrix.agg, aes(x=marker.byConc,y=log2(sum))) + 
162 |   geom_line(aes(group=marker), size=1.2, color="#666666") + 
163 |   geom_point(aes(group=volume, fill=volume), pch=21, size=0.7) + 
164 |   geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 
165 |   scale_fill_manual(values=color.volume) + 
166 |   scale_y_continuous(breaks=c(9:17)) + 
167 |   ylab("log2(UMI sum)") + 
168 |   guides(fill=guide_legend(override.aes=list(size=1.5), reverse=TRUE)) + 
169 |   theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="bottom", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm"), legend.title=element_blank()) + 
170 |   coord_flip()
171 | 
172 | ## Combine plot with markerByConc annotation heatmap
173 | plotUMIcountsPerMarker <- p.markerByConc + guides(fill=F) + p.UMIcountsPerMarker + guides(fill=F) + plot_spacer() + guide_area() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect')
174 | 
175 | plotUMIcountsPerMarker
176 | ```
177 | 
178 | ## Compare change in UMI/cell within expressing cluster
179 | 
180 | Using a specific percentile may be prone to outliers in small clusters (i.e. the 90th percentile of a cluster of 30 will be the #3 higest cell making it prone to outliers). We thus set a threshold of the value to only be the 90th percentile if cluster contains more than 100 cells. For smaller clusters, the median is used. Expressing cluster is identified in the "preprocessing" vignette.
181 | 
182 | ```{r UMIinExpressingCells, fig.width=4.5, fig.height=5}
183 | ## Get the data
184 | ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts"))
185 | ADT.matrix$marker <- rownames(ADT.matrix)
186 | ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"]
187 | ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc))
188 | 
189 | ## Get cell annotations
190 | cell.annotation <- FetchData(object, vars=c("volume", "fineCluster"))
191 | 
192 | ## Calculate marker statistics from each dilution within each cluster
193 | ADT.matrix.agg <- ADT.matrix %>% group_by(volume=cell.annotation[name,"volume"], fineCluster=cell.annotation[name,"fineCluster"], marker, conc) %>% summarise(sum=sum(value), median=quantile(value, probs=c(0.9)), nth=nth(value))
194 | ADT.matrix.agg$tissue == "PBMC"
195 | 
196 | ## Use data for the previously determined expressing cluster.
197 | Cluster.max <- markerStats[markerStats$tissue == "PBMC",c("marker","fineCluster")]
198 | Cluster.max$fineCluster <- factor(Cluster.max$fineCluster)
199 | 
200 | ADT.matrix.aggByClusterMax <- Cluster.max %>% left_join(ADT.matrix.agg)
201 | ADT.matrix.aggByClusterMax$marker.byConc <- factor(ADT.matrix.aggByClusterMax$marker, levels=marker.order)
202 | 
203 | p.UMIinExpressingCells <- ggplot(ADT.matrix.aggByClusterMax, aes(x=marker.byConc, y=log2(nth))) + 
204 |   geom_line(aes(group=marker), size=1.2, color="#666666") + 
205 |   geom_point(aes(group=volume, fill=volume), pch=21, size=0.7) + 
206 |   geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 
207 |   geom_text(aes(label=paste0(fineCluster," ")), y=Inf, adj=1, size=1.5) + 
208 |   scale_fill_manual(values=color.volume) + 
209 |   scale_y_continuous(breaks=c(0:11), labels=2^c(0:11), expand=c(0.05,0.5)) + 
210 |   ylab("90th percentile UMI of expressing cluster") + 
211 |   theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="right", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm")) + 
212 |   coord_flip()
213 | 
214 | ## Combine plot with markerByConc annotation heatmap
215 | UMIinExpressingCells <- p.markerByConc + theme(legend.position="none") + p.UMIinExpressingCells + theme(legend.position="none") + plot_spacer() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect')
216 | 
217 | UMIinExpressingCells
218 | ```
219 | 
220 | ## Titration examples
221 | 
222 | Most markers are largely unaffected by reducing staining volume. However, some antibodies used at low concentrations and targeting abundant epitopes are affected, an example of such is CD31:
223 | 
224 | ```{r fig.width=1.4, fig.height=2.3}
225 | ## Make helper function for plotting titration plots
226 | titrationPlot <- function(marker, gate.PBMC=NULL, gate.Lung=NULL, y.axis=FALSE, show.gate=TRUE, legend=FALSE){
227 |   curMarker.name <- marker
228 |   
229 |   ## Get antibody concentration for legends
230 |   curMarker.DF1conc <- abpanel[curMarker.name, "conc_µg_per_mL"]
231 |   if(show.gate==TRUE){
232 |     ## Load gating percentages from manually set DSB thresholds
233 |     gate <- data.frame(gate=markerStats[markerStats$marker == curMarker.name & markerStats$tissue== "PBMC",c("pct")])
234 |     gate$gate <- 1-(gate$gate/100)
235 |     rownames(gate) <- gate$wrap
236 |     ## Allow manual gating
237 |     if(!is.null(gate.PBMC)) gate <- gate.PBMC
238 |   } else {
239 |     gate <- NULL
240 |   }
241 | 
242 |   p <- feature_rankplot_hist_custom(data=object, 
243 |                                     marker=paste0("adt_",curMarker.name),      
244 |                                     group="volume",
245 |                                     barcodeGroup="supercluster",
246 |                                     conc=curMarker.DF1conc, 
247 |                                     legend=legend, 
248 |                                     yaxis.text=y.axis, 
249 |                                     gates=gate,
250 |                                     histogram.colors=color.volume, 
251 |                                     title=curMarker.name)
252 |   
253 |   return(p)
254 | }
255 | 
256 | p.CD31 <- titrationPlot("CD31", legend=TRUE)
257 | 
258 | p.CD31
259 | ```
260 | 
261 | ## tSNE plots
262 | 
263 | Make tSNE plots with raw UMI counts. Use rainbow color scheme to show dynamic range in expression levels.
264 | 
265 | ```{r, fig.height=2, fig.width=7}
266 | show_tsne_markers <- c("CD31","CD8")
267 | f.tsne.format <- function(x){
268 |     x + 
269 |     scale_color_gradientn(colours = c("#000033","#3333FF","#3377FF","#33AAFF","#33CC33","orange","red"), 
270 |                           limits=c(0,NA)) + 
271 |     scale_y_continuous(expand=c(0,0,0.05,0), limits=c(-45.52796,37.94770)) + 
272 |     xlim(c(-40.83170,49.63832)) + 
273 |     theme_get() + 
274 |     theme(plot.title=element_text(size=7, face="bold", hjust=0.5),
275 |           plot.background=element_blank(),
276 |           panel.background=element_blank(),
277 |           axis.title=element_blank(),
278 |           axis.text.x=element_blank(),
279 |           axis.text.y=element_blank(),
280 |           legend.key.width=unit(3,"mm"),
281 |           legend.key.height=unit(2,"mm"),
282 |           legend.position=c(1,-0.03),
283 |           legend.justification=c(1,0),
284 |           legend.background=element_blank(),
285 |           legend.direction="horizontal")
286 | }
287 | 
288 | maximum <- apply(FetchData(object, vars=paste0("adt_",show_tsne_markers), slot="counts"),2,quantile,probs=c(0.95))
289 | 
290 | p.tsne.1 <- f.tsne.format(FeaturePlot(subset(object, subset=volume=="25µl"), reduction="tsne", sort=TRUE,  combine=FALSE, features=paste0("adt_",show_tsne_markers[1]), slot="counts", max.cutoff=maximum[1], pt.size=0.1)[[1]])
291 | p.tsne.2 <- f.tsne.format(FeaturePlot(subset(object, subset=volume=="50µl"), reduction="tsne", sort=TRUE,  combine=FALSE, features=paste0("adt_",show_tsne_markers[1]), slot="counts", max.cutoff=maximum[1], pt.size=0.1)[[1]])
292 | p.tsne.3 <- f.tsne.format(FeaturePlot(subset(object, subset=volume=="25µl"), reduction="tsne", sort=TRUE,  combine=FALSE, features=paste0("adt_",show_tsne_markers[2]), slot="counts", max.cutoff=maximum[2], pt.size=0.1)[[1]])
293 | p.tsne.4 <- f.tsne.format(FeaturePlot(subset(object, subset=volume=="50µl"), reduction="tsne", sort=TRUE,  combine=FALSE, features=paste0("adt_",show_tsne_markers[2]), slot="counts", max.cutoff=maximum[2], pt.size=0.1)[[1]])
294 | 
295 | p.tsne <- list(p.tsne.1 + ggtitle("25µl"),p.tsne.2 + ggtitle("50µl"),p.tsne.3 + ggtitle("25µl"),p.tsne.4 + ggtitle("50µl"))
296 | ## Get common y-axis label
297 | p.tsne[[1]] <- p.tsne[[1]] + theme(axis.title.y=element_text())
298 | # a bit of a hack to get a common x-axis label
299 | p.tsne[[2]] <- p.tsne[[2]] + theme(axis.title.x=element_text(hjust=1.2))
300 | 
301 | p.UMI.tsne <- cowplot::plot_grid(plotlist=p.tsne, 
302 |                                  align="h", 
303 |                                  axis="tb", 
304 |                                  nrow=1, 
305 |                                  rel_widths=c(1.05,1,1,1),
306 |                                  labels=c("E",show_tsne_markers[1],"F",show_tsne_markers[2]),
307 |                                  label_size=panel.label_size, 
308 |                                  vjust=panel.label_vjust, 
309 |                                  hjust=c(panel.label_hjust,0.5,panel.label_hjust,0.5))
310 | 
311 | p.UMI.tsne
312 | ```
313 | 
314 | ## Final plot
315 | 
316 | ```{r figure3, fig.width=7, fig.height=6}
317 | A <- p.UMIcountsPerCondition + theme(legend.key.width=unit(0.3,"cm"), 
318 |                                      legend.key.height=unit(0.4,"cm"), 
319 |                                      legend.text=element_text(size=unit(5,"pt")),
320 |                                      plot.margin=unit(c(0.3,0,0.5,0),"cm"))
321 | 
322 | B1 <- p.markerByConc + theme(text = element_text(size=10), 
323 |                              plot.margin=unit(c(0.3,0,0,0),"cm"),
324 |                              legend.position="none")
325 | B2 <- p.UMIcountsPerMarker + theme(legend.position="none")
326 | C <- p.UMIinExpressingCells + theme(legend.position="none")
327 | 
328 | BC.legend <- cowplot::get_legend(p.UMIcountsPerMarker + 
329 |                                    theme(legend.position="bottom", 
330 |                                          legend.direction="horizontal", 
331 |                                          legend.background=element_blank(), 
332 |                                          legend.box.background=element_blank(), legend.key=element_blank()))
333 | 
334 | D <- p.CD31 + theme(plot.margin=unit(c(0.5,0,0,0),"cm"))
335 | 
336 | AD <- cowplot::plot_grid(A,D,NULL, 
337 |                          ncol=1, 
338 |                          rel_heights = c(13,17,1.5),
339 |                          labels=c("A","D",""), 
340 |                          label_size=panel.label_size, 
341 |                          vjust=panel.label_vjust, 
342 |                          hjust=panel.label_hjust)
343 | 
344 | BC <- cowplot::plot_grid(B1, B2, C, 
345 |                          nrow=1, 
346 |                          rel_widths=c(2,10,10), 
347 |                          align="h", 
348 |                          axis="tb", 
349 |                          labels=c("B", "", "C"), 
350 |                          label_size=panel.label_size, 
351 |                          vjust=panel.label_vjust, 
352 |                          hjust=panel.label_hjust)
353 | 
354 | p.figure <- cowplot::plot_grid(cowplot::ggdraw(plot_grid(AD, BC, 
355 |                                       nrow=1, 
356 |                                       rel_widths=c(1,4), 
357 |                                       align="v", 
358 |                                       axis="l")) + 
359 |     cowplot::draw_plot(BC.legend,0.27,0.020,0.2,0.00001),
360 |     p.UMI.tsne, rel_heights=c(3,1.35), align="v", axis="lr", ncol=1)
361 | 
362 | 
363 | png(file=file.path(outdir,"Figure 3.png"), 
364 |     width=figure.width.full, 
365 |     height=6, 
366 |     units = figure.unit, 
367 |     res=figure.resolution, 
368 |     antialias=figure.antialias)
369 | 
370 |   p.figure
371 |   
372 | dev.off()
373 | 
374 | p.figure
375 | ```
376 | 
377 | ## Individual titration plots
378 | 
379 | For supplementary information.
380 | 
381 | ```{r suppFig1, fig.width=7, fig.height=10}
382 | plots.columns = 6
383 | rows.max <- 5
384 | 
385 | markers <- abpanel[rownames(object[["ADT.kallisto"]]),]
386 | markers <- markers[order(markers$Category, markers$Marker),]
387 | 
388 | plots <- list()
389 | 
390 | ## Make individual plots for each marker
391 | for(i in 1:nrow(markers)){
392 |   curMarker <- markers[i,]
393 |   curMarker.name <- curMarker$Marker
394 |   y.axis <- ifelse((i-1) %in% c(0,6,12,18,24,30,36,42,48),TRUE,FALSE)
395 |   plots[[curMarker.name]] <- titrationPlot(curMarker.name, y.axis=y.axis)
396 | }
397 | 
398 | # a bit of a hack to make celltype legend
399 | p.legend <- cowplot::get_legend(ggplot(data.frame(supercluster=object$supercluster), 
400 |                                            aes(color=supercluster,x=1,y=1)) + 
401 |   geom_point(shape=15, size=1.5) + 
402 |   scale_color_manual(values=color.supercluster) + 
403 |   theme(legend.title=element_blank(), 
404 |         legend.margin=margin(0,0,0,0), 
405 |         legend.key.size = unit(0.15,"cm"),
406 |         legend.position = c(0.98,1.1), 
407 |         legend.justification=c(1,1), 
408 |         legend.direction="horizontal"))
409 | 
410 | plots.num <- length(plots)
411 | plots.perPage <- plots.columns*rows.max
412 | plots.pages <- ceiling(plots.num/plots.perPage)
413 | 
414 | ## Make a supplementary figure split into pages
415 | for(i in 1:plots.pages){
416 |   start <- (i-1)*plots.perPage+1
417 |   end <- i*plots.perPage
418 |   end <- min(end,plots.num)
419 |   curPlots <- c(start:end)
420 |   plots.rows <- ceiling(length(curPlots)/plots.columns)
421 |   
422 |   curPlots <- cowplot::plot_grid(plotlist=plots[curPlots],ncol=plots.columns, rel_widths=c(1.1,1,1,1,1,1), align="h", axis="tb")
423 |   curPlots.layout <- cowplot::plot_grid(NULL, p.legend, curPlots, vjust=-0.5, hjust=panel.label_hjust, label_size=panel.label_size, ncol=1, rel_heights= c(0.5, 1.3, 70/5*plots.rows))
424 |   
425 |   png(file=file.path(outdir,paste0("Supplementary Figure 3",LETTERS[i],".png")), 
426 |       units=figure.unit, 
427 |       res=figure.resolution, 
428 |       width=figure.width.full, 
429 |       height=(2*plots.rows),
430 |       antialias=figure.antialias)
431 | 
432 |   print(curPlots.layout)
433 |   
434 |   dev.off()
435 |   
436 |   print(curPlots.layout)
437 | }
438 | ```
439 | 


--------------------------------------------------------------------------------
/Volume-and-cell-number-titration_files/figure-gfm/UMIcountsPerCondition-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-and-cell-number-titration_files/figure-gfm/UMIcountsPerCondition-1.png


--------------------------------------------------------------------------------
/Volume-and-cell-number-titration_files/figure-gfm/UMIinExpressingCells-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-and-cell-number-titration_files/figure-gfm/UMIinExpressingCells-1.png


--------------------------------------------------------------------------------
/Volume-and-cell-number-titration_files/figure-gfm/figure3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-and-cell-number-titration_files/figure-gfm/figure3-1.png


--------------------------------------------------------------------------------
/Volume-and-cell-number-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-and-cell-number-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png


--------------------------------------------------------------------------------
/Volume-and-cell-number-titration_files/figure-gfm/tsnePlots-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-and-cell-number-titration_files/figure-gfm/tsnePlots-1.png


--------------------------------------------------------------------------------
/Volume-and-cell-number-titration_files/figure-gfm/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-and-cell-number-titration_files/figure-gfm/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/Volume-titration_files/figure-gfm/UMIcountsPerCondition-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/UMIcountsPerCondition-1.png


--------------------------------------------------------------------------------
/Volume-titration_files/figure-gfm/UMIinExpressingCells-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/UMIinExpressingCells-1.png


--------------------------------------------------------------------------------
/Volume-titration_files/figure-gfm/figure3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/figure3-1.png


--------------------------------------------------------------------------------
/Volume-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png


--------------------------------------------------------------------------------
/Volume-titration_files/figure-gfm/suppFig1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/suppFig1-1.png


--------------------------------------------------------------------------------
/Volume-titration_files/figure-gfm/suppFig1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/suppFig1-2.png


--------------------------------------------------------------------------------
/Volume-titration_files/figure-gfm/tsnePlots-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/tsnePlots-1.png


--------------------------------------------------------------------------------
/Volume-titration_files/figure-gfm/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/Volume-titration_files/figure-gfm/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/data/5P-CITE-seq_Titration.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/data/5P-CITE-seq_Titration.rds


--------------------------------------------------------------------------------
/data/5P-CITE-seq_Titration_full.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/data/5P-CITE-seq_Titration_full.rds


--------------------------------------------------------------------------------
/data/Supplementary_Table_1.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/data/Supplementary_Table_1.xlsx


--------------------------------------------------------------------------------
/data/markerByClusterStats.tsv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/data/markerByClusterStats.tsv


--------------------------------------------------------------------------------
/figures/Figure 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Figure 1.png


--------------------------------------------------------------------------------
/figures/Figure 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Figure 2.png


--------------------------------------------------------------------------------
/figures/Figure 3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Figure 3.png


--------------------------------------------------------------------------------
/figures/Figure 4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Figure 4.png


--------------------------------------------------------------------------------
/figures/Figure 5 wMule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Figure 5 wMule.png


--------------------------------------------------------------------------------
/figures/Figure 5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Figure 5.png


--------------------------------------------------------------------------------
/figures/Figure 6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Figure 6.png


--------------------------------------------------------------------------------
/figures/Supplementary Fig S5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Fig S5.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure 2A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 2A.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure 2B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 2B.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure 2C.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 2C.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure 2D.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 2D.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure 2E.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 2E.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure 3A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 3A.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure 3B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 3B.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure 4A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 4A.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure 4B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 4B.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure S1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure S1.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure S5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure S5.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure S6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure S6.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure S7A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure S7A.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure S7B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure S7B.png


--------------------------------------------------------------------------------
/figures/Supplementary Figure S8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure S8.png


--------------------------------------------------------------------------------
/figures/review_CD8_protein_rna_correlation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/review_CD8_protein_rna_correlation.png


--------------------------------------------------------------------------------
/figures/review_protein_rna_correlations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/review_protein_rna_correlations.png


--------------------------------------------------------------------------------
/figures/review_washing_test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/review_washing_test.png


--------------------------------------------------------------------------------