├── .gitignore ├── 10X Datasets UMI per marker.Rmd ├── 10X-Datasets-UMI-per-marker.md ├── 10X-Datasets-UMI-per-marker_files └── figure-gfm │ └── unnamed-chunk-3-1.png ├── ADT counting methods.Rmd ├── ADT reads in cells vs empty drops.Rmd ├── ADT-counting-methods.md ├── ADT-counting-methods_files └── figure-gfm │ ├── unnamed-chunk-1-1.png │ ├── unnamed-chunk-1-2.png │ ├── unnamed-chunk-4-1.png │ ├── unnamed-chunk-6-1.png │ └── unnamed-chunk-9-1.png ├── ADT-reads-in-cells-vs-empty-drops.md ├── ADT-reads-in-cells-vs-empty-drops_files └── figure-gfm │ ├── DSBfigure-1.png │ ├── alluvial-1.png │ ├── barplot-1.png │ ├── figure-1.png │ ├── plotRatioDSB-1.png │ ├── ratio-1.png │ ├── unnamed-chunk-2-1.png │ └── unnamed-chunk-3-1.png ├── Antibody titration.Rmd ├── Antibody-titration.md ├── Antibody-titration_files └── figure-gfm │ ├── Figure1-1.png │ ├── UMIcountsPerCondition-1.png │ ├── UMIinExpressingCells-1.png │ ├── plotUMIcountsPerMarker-1.png │ ├── suppFig1-1.png │ ├── suppFig1-2.png │ ├── suppFig1-3.png │ ├── suppFig1-4.png │ ├── suppFig1-5.png │ ├── titrationExamples-1.png │ ├── tsnePlots-1.png │ └── unnamed-chunk-3-1.png ├── CITE-seq_optimization.Rproj ├── Cell number titration.Rmd ├── Cell-number-titration.md ├── Cell-number-titration_files └── figure-gfm │ ├── UMIcountsPerCondition-1.png │ ├── UMIinExpressingCells-1.png │ ├── figure-1.png │ ├── plotUMIcountsPerMarker-1.png │ ├── suppFig-1.png │ ├── suppFig-2.png │ ├── suppFig1-1.png │ ├── suppFig1-2.png │ ├── tsnePlots-1.png │ ├── unnamed-chunk-1-1.png │ └── unnamed-chunk-2-1.png ├── Demux_Preprocess_Downsample.Rmd ├── Demux_Preprocess_Downsample.md ├── Demux_Preprocess_Downsample_files └── figure-gfm │ ├── demux-1.png │ ├── demux-2.png │ ├── downsample-1.png │ ├── dsbnorm-1.png │ ├── filter-1.png │ ├── fineClusters-1.png │ ├── fineClusters-2.png │ ├── preprocessRNA-1.png │ ├── preprocessRNA-2.png │ ├── preprocessRNA-3.png │ ├── superclustering-1.png │ ├── superclustering-2.png │ ├── unnamed-chunk-1-1.png │ ├── unnamed-chunk-2-1.png │ ├── unnamed-chunk-3-1.png │ ├── unnamed-chunk-3-2.png │ └── unnamed-chunk-5-1.png ├── Load unfiltered data.Rmd ├── Load-unfiltered-data.md ├── Load-unfiltered-data_files └── figure-gfm │ ├── loadADT-1.png │ ├── loadGEX-1.png │ ├── loadHTO-1.png │ ├── unnamed-chunk-3-1.png │ ├── unnamed-chunk-4-1.png │ ├── unnamed-chunk-5-1.png │ └── unnamed-chunk-5-2.png ├── R ├── Utilities.R ├── color.R ├── feature_rankplot.R ├── feature_rankplot_hist.R ├── feature_rankplot_hist_custom.R └── ggplot_settings.R ├── README.md ├── Sequencing satuation.R ├── Snakemake ├── 10X_VDJ │ ├── Snakefile │ ├── include │ │ └── feature-ref.csv │ └── runSnakemake.sh ├── 10X_v3 │ ├── Snakefile │ ├── include │ │ └── feature-ref.csv │ └── runSnakemake.sh ├── Snakefile ├── include │ └── feature-ref.csv └── runSnakemake.sh ├── Volume and cell number titration.Rmd ├── Volume titration.Rmd ├── Volume-and-cell-number-titration.md ├── Volume-and-cell-number-titration_files └── figure-gfm │ ├── UMIcountsPerCondition-1.png │ ├── UMIinExpressingCells-1.png │ ├── figure3-1.png │ ├── plotUMIcountsPerMarker-1.png │ ├── tsnePlots-1.png │ └── unnamed-chunk-1-1.png ├── Volume-titration.md ├── Volume-titration_files └── figure-gfm │ ├── UMIcountsPerCondition-1.png │ ├── UMIinExpressingCells-1.png │ ├── figure3-1.png │ ├── plotUMIcountsPerMarker-1.png │ ├── suppFig1-1.png │ ├── suppFig1-2.png │ ├── tsnePlots-1.png │ ├── unnamed-chunk-1-1.png │ └── unnamed-chunk-2-1.png ├── data ├── 5P-CITE-seq_Titration.rds ├── 5P-CITE-seq_Titration_full.rds ├── Supplementary_Table_1.xlsx └── markerByClusterStats.tsv └── figures ├── Figure 1.png ├── Figure 2.png ├── Figure 3.png ├── Figure 4.png ├── Figure 5 wMule.png ├── Figure 5.png ├── Figure 6.png ├── Supplementary Fig S5.png ├── Supplementary Figure 2A.png ├── Supplementary Figure 2B.png ├── Supplementary Figure 2C.png ├── Supplementary Figure 2D.png ├── Supplementary Figure 2E.png ├── Supplementary Figure 3A.png ├── Supplementary Figure 3B.png ├── Supplementary Figure 4A.png ├── Supplementary Figure 4B.png ├── Supplementary Figure S1.png ├── Supplementary Figure S5.png ├── Supplementary Figure S6.png ├── Supplementary Figure S7A.png ├── Supplementary Figure S7B.png ├── Supplementary Figure S8.png ├── review_CD8_protein_rna_correlation.png ├── review_protein_rna_correlations.png └── review_washing_test.png /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /10X Datasets UMI per marker.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "CITE-seq optimization - 10X datasets: UMI per marker plot" 3 | author: "Terkild Brink Buus" 4 | date: "30/3/2020" 5 | output: github_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(warning=FALSE, message=FALSE) 10 | options(stringsAsFactors=FALSE) 11 | ``` 12 | 13 | ## Load utilities 14 | 15 | Including libraries, plotting and color settings and custom utility functions 16 | 17 | ```{r loadLibraries, results='hide', message=FALSE, warning=FALSE} 18 | set.seed(114) 19 | require("tidyverse", quietly=T) 20 | library("Matrix", quietly=T) 21 | 22 | ## Load ggplot theme and defaults 23 | source("R/ggplot_settings.R") 24 | 25 | ## Load helper functions 26 | source("R/Utilities.R") 27 | 28 | outdir <- "figures" 29 | data.10X <- "data/data.10X.datasets.Rdata" 30 | ``` 31 | 32 | ## Load data 33 | 34 | 10X datasets were preprocessed in the [Load unfiltered data vignette](Load-unfiltered-data.md) 35 | 36 | ```{r} 37 | load(file=data.10X) 38 | ``` 39 | 40 | ## Draw UMI per marker 41 | 42 | These three 10X dataset used the same panel of antibodies at three conditions, 3' V3 chemistry at ~1,000 and ~10,000 cells or 5' at 10,000 cells using TotalSeqB or TotalSeqC antibodies, respectively. 43 | 44 | ```{r, fig.height=3, fig.width=7} 45 | ## Extract data from list into a combined data.frame 46 | for(i in seq_along(data.10X.datasets)){ 47 | dataset <- data.10X.datasets[i] 48 | 49 | kallisto <- data.10X.datasets.adt.kallisto[[dataset]] 50 | cells <- intersect(data.10X.datasets.gex.aboveInf[[dataset]],colnames(kallisto)) 51 | 52 | 53 | total <- Matrix::rowSums(kallisto) 54 | Cell <- Matrix::rowSums(kallisto[,cells]) 55 | EmptyDrop <- total-Cell 56 | 57 | add <- data.frame(Dataset=dataset,Marker=names(Cell),Cell,EmptyDrop) 58 | 59 | if(i == 1){ 60 | plotData <- add 61 | } else { 62 | plotData <- rbind(plotData,add) 63 | } 64 | } 65 | 66 | ## Convert data into "long format" for plotting with ggplot 67 | plotData <- plotData %>% pivot_longer(c(-Marker, -Dataset)) 68 | 69 | ## Rename isotype controls to get shorter names 70 | plotData$Marker <- gsub("isotype_control_","",plotData$Marker) 71 | plotData$subset <- factor(as.character(plotData$name), levels=c("EmptyDrop","Cell")) 72 | plotData$Dataset <- factor(as.character(plotData$Dataset), levels=data.10X.datasets) 73 | 74 | ## Make plot 75 | data.10X.markerBarplot <- ggplot(plotData, aes(x=Marker, y=value/10^6, fill=subset)) + 76 | geom_bar(stat="identity", position="dodge", color="black", width=0.65) + 77 | scale_y_continuous(expand=c(0,0,0.05,0)) + 78 | scale_fill_manual(values=c("lightgrey","black")) + 79 | labs(y=bquote("ADT UMI counts ("~10^6~")")) + 80 | coord_flip() + 81 | facet_wrap(~Dataset, nrow=1, scales="free_x") + 82 | theme(axis.title.y=element_blank(), 83 | legend.position=c(1,0.98), 84 | legend.justification=c(1,1), 85 | legend.title=element_blank(), 86 | legend.background=element_blank()) 87 | 88 | ``` 89 | 90 | ## Final figure 91 | 92 | ```{r, fig.height=5, fig.width=7} 93 | ## Include knee_plots from preprocessing in the figure 94 | data.10X.GEXrank <- cowplot::plot_grid(plotlist=data.10X.datasets.knee_plots, 95 | labels=data.10X.datasets, 96 | hjust=-0.65, 97 | vjust=1.6, 98 | label_size=7, 99 | nrow=1) 100 | 101 | p.figure <- cowplot::plot_grid(data.10X.GEXrank, data.10X.markerBarplot, 102 | labels=c("A", "B"), 103 | ncol=1, 104 | rel_heights=c(2,3), 105 | label_size=panel.label_size, 106 | vjust=panel.label_vjust, 107 | hjust=panel.label_hjust) 108 | 109 | 110 | png(file=file.path(outdir,"Supplementary Figure S6.png"), 111 | width=figure.width.full, 112 | height=5, 113 | units = figure.unit, 114 | res=figure.resolution, 115 | antialias=figure.antialias) 116 | 117 | p.figure 118 | 119 | dev.off() 120 | 121 | p.figure 122 | 123 | ``` 124 | -------------------------------------------------------------------------------- /10X-Datasets-UMI-per-marker.md: -------------------------------------------------------------------------------- 1 | CITE-seq optimization - 10X datasets: UMI per marker plot 2 | ================ 3 | Terkild Brink Buus 4 | 30/3/2020 5 | 6 | ## Load utilities 7 | 8 | Including libraries, plotting and color settings and custom utility 9 | functions 10 | 11 | ``` r 12 | set.seed(114) 13 | require("tidyverse", quietly=T) 14 | library("Matrix", quietly=T) 15 | 16 | ## Load ggplot theme and defaults 17 | source("R/ggplot_settings.R") 18 | 19 | ## Load helper functions 20 | source("R/Utilities.R") 21 | 22 | outdir <- "figures" 23 | data.10X <- "data/data.10X.datasets.Rdata" 24 | ``` 25 | 26 | ## Load data 27 | 28 | 10X datasets were preprocessed in the [Load unfiltered data 29 | vignette](Load-unfiltered-data.md) 30 | 31 | ``` r 32 | load(file=data.10X) 33 | ``` 34 | 35 | ## Draw UMI per marker 36 | 37 | These three 10X dataset used the same panel of antibodies at three 38 | conditions, 3’ V3 chemistry at \~1,000 and \~10,000 cells or 5’ at 39 | 10,000 cells using TotalSeqB or TotalSeqC antibodies, respectively. 40 | 41 | ``` r 42 | ## Extract data from list into a combined data.frame 43 | for(i in seq_along(data.10X.datasets)){ 44 | dataset <- data.10X.datasets[i] 45 | 46 | kallisto <- data.10X.datasets.adt.kallisto[[dataset]] 47 | cells <- intersect(data.10X.datasets.gex.aboveInf[[dataset]],colnames(kallisto)) 48 | 49 | 50 | total <- Matrix::rowSums(kallisto) 51 | Cell <- Matrix::rowSums(kallisto[,cells]) 52 | EmptyDrop <- total-Cell 53 | 54 | add <- data.frame(Dataset=dataset,Marker=names(Cell),Cell,EmptyDrop) 55 | 56 | if(i == 1){ 57 | plotData <- add 58 | } else { 59 | plotData <- rbind(plotData,add) 60 | } 61 | } 62 | 63 | ## Convert data into "long format" for plotting with ggplot 64 | plotData <- plotData %>% pivot_longer(c(-Marker, -Dataset)) 65 | 66 | ## Rename isotype controls to get shorter names 67 | plotData$Marker <- gsub("isotype_control_","",plotData$Marker) 68 | plotData$subset <- factor(as.character(plotData$name), levels=c("EmptyDrop","Cell")) 69 | plotData$Dataset <- factor(as.character(plotData$Dataset), levels=data.10X.datasets) 70 | 71 | ## Make plot 72 | data.10X.markerBarplot <- ggplot(plotData, aes(x=Marker, y=value/10^6, fill=subset)) + 73 | geom_bar(stat="identity", position="dodge", color="black", width=0.65) + 74 | scale_y_continuous(expand=c(0,0,0.05,0)) + 75 | scale_fill_manual(values=c("lightgrey","black")) + 76 | labs(y=bquote("ADT UMI counts ("~10^6~")")) + 77 | coord_flip() + 78 | facet_wrap(~Dataset, nrow=1, scales="free_x") + 79 | theme(axis.title.y=element_blank(), 80 | legend.position=c(1,0.98), 81 | legend.justification=c(1,1), 82 | legend.title=element_blank(), 83 | legend.background=element_blank()) 84 | ``` 85 | 86 | ## Final figure 87 | 88 | ``` r 89 | ## Include knee_plots from preprocessing in the figure 90 | data.10X.GEXrank <- cowplot::plot_grid(plotlist=data.10X.datasets.knee_plots, 91 | labels=data.10X.datasets, 92 | hjust=-0.65, 93 | vjust=1.6, 94 | label_size=7, 95 | nrow=1) 96 | 97 | p.figure <- cowplot::plot_grid(data.10X.GEXrank, data.10X.markerBarplot, 98 | labels=c("A", "B"), 99 | ncol=1, 100 | rel_heights=c(2,3), 101 | label_size=panel.label_size, 102 | vjust=panel.label_vjust, 103 | hjust=panel.label_hjust) 104 | 105 | 106 | png(file=file.path(outdir,"Supplementary Figure S6.png"), 107 | width=figure.width.full, 108 | height=5, 109 | units = figure.unit, 110 | res=figure.resolution, 111 | antialias=figure.antialias) 112 | 113 | p.figure 114 | 115 | dev.off() 116 | ``` 117 | 118 | ## png 119 | ## 2 120 | 121 | ``` r 122 | p.figure 123 | ``` 124 | 125 | ![](10X-Datasets-UMI-per-marker_files/figure-gfm/unnamed-chunk-3-1.png) 126 | -------------------------------------------------------------------------------- /10X-Datasets-UMI-per-marker_files/figure-gfm/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/10X-Datasets-UMI-per-marker_files/figure-gfm/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /ADT reads in cells vs empty drops.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "CITE-seq optimization - ADT in cell-containing vs empty drops" 3 | author: "Terkild Brink Buus" 4 | date: "30/3/2020" 5 | output: github_document 6 | --- 7 | 8 | Background signal in CITE-seq has been proposed to be primarily caused by free-floating antibodies and can be assessed by measuring reads from Non-cell-containing (empty) droplets (Mulé et al. 2020). In this vignette, we compare UMI counts from cell-containing vs. empty drops 9 | 10 | ```{r setup, include=FALSE} 11 | knitr::opts_chunk$set(warning=FALSE, message=FALSE) 12 | options(stringsAsFactors=FALSE) 13 | ``` 14 | 15 | ## Load utilities 16 | 17 | Including libraries, plotting and color settings and custom utility functions 18 | 19 | ```{r loadLibraries, results='hide', message=FALSE, warning=FALSE} 20 | set.seed(114) 21 | require("Seurat", quietly=T) 22 | require("tidyverse", quietly=T) 23 | library("Matrix", quietly=T) 24 | 25 | ## Load ggplot theme and defaults 26 | source("R/ggplot_settings.R") 27 | 28 | ## Load helper functions 29 | source("R/Utilities.R") 30 | 31 | ## Load color schemes 32 | source("R/color.R") 33 | 34 | outdir <- "figures" 35 | data.drive <- "F:/" 36 | data.abpanel <- "data/Supplementary_Table_1.xlsx" 37 | data.markerStats <- "data/markerByClusterStats.tsv" 38 | data.Seurat <- "data/5P-CITE-seq_Titration.rds" 39 | 40 | show_tsne_markers <- c("CD4", "CD19", "CD86", "CD279", "TCRgd") 41 | 42 | ## Make a custom function for formatting the concentration scale 43 | scaleFUNformat <- function(x) sprintf("%.2f", x) 44 | ``` 45 | 46 | ## Load the data 47 | 48 | The ADT UMI count data has already been loaded and filtered in the "ADT counting methods" vignette. We'll load it from there. This includes the kallisto.ADT UMI count matrix as well as a list of barcodes that have been filtered to have gene expression UMI counts above the inflection point in the rank-barcode plot (used for calling cell-containing vs. empty droplets). 49 | 50 | ```{r loadADTdata} 51 | load("data/data.ADT.Rdata") 52 | 53 | ## ADT UMI counts 54 | kallisto.ADT[1:5,1:5] 55 | 56 | ## Barcodes for cell-containing droplet 57 | head(gex.aboveInf) 58 | ``` 59 | 60 | ## Load antibody panel data 61 | 62 | Antibody panel concentration data is loaded from the supplementary data excel sheet. 63 | 64 | ```{r abdata} 65 | abpanel <- data.frame(readxl::read_excel(data.abpanel)) 66 | rownames(abpanel) <- abpanel$Marker 67 | 68 | head(abpanel) 69 | ``` 70 | 71 | ## Preprocess data for plotting 72 | 73 | Make sums of ADT UMI counts within cell-containing and empty droplets. 74 | 75 | ```{r preprocess} 76 | ADT.matrix <- kallisto.ADT 77 | 78 | ## Calculate total UMI count per marker 79 | markerUMI <- apply(ADT.matrix,1,sum) 80 | 81 | ## Calculate UMI count within cell-containing and empty droplets 82 | markerUMI.inCell <- apply(ADT.matrix[,gex.aboveInf],1,sum) 83 | markerUMI.inCell.freq <- markerUMI.inCell/sum(markerUMI.inCell) 84 | markerUMI.inDrop <- markerUMI-markerUMI.inCell 85 | markerUMI.inDrop.freq <- markerUMI.inDrop/sum(markerUMI.inDrop) 86 | 87 | ## Make DF to allow combination of the data into a "long" format 88 | df.inCell <- data.frame(count=markerUMI.inCell, freq=markerUMI.inCell.freq, subset="Cell", marker=names(markerUMI.inCell.freq)) 89 | df.inDrop <- data.frame(count=markerUMI.inDrop, freq=markerUMI.inDrop.freq, subset="EmptyDrop", marker=names(markerUMI.inDrop.freq)) 90 | 91 | plotData <- rbind(df.inCell, df.inDrop) 92 | 93 | ## Add "metadata 94 | plotData$conc <- abpanel[plotData$marker,"conc_µg_per_mL"] 95 | 96 | plotData$subset <- factor(plotData$subset, levels=c("EmptyDrop","Cell")) 97 | 98 | ## Order markers according to antibody concentration and UMI frequency within empty droplets (by setting levels) 99 | plotData$marker <- factor(plotData$marker, 100 | levels=plotData$marker[order(plotData$conc[plotData$subset=="EmptyDrop"], 101 | plotData$freq[plotData$subset=="EmptyDrop"])]) 102 | 103 | head(plotData) 104 | ``` 105 | 106 | ## Draw cell-containing to empty droplet frequency ratio plot 107 | 108 | ```{r ratio, fig.width=2, fig.height=5} 109 | data.ratio <- data.frame(ratio=markerUMI.inCell.freq/markerUMI.inDrop.freq) %>% mutate(Marker=rownames(.), conc=abpanel[rownames(.),"conc_µg_per_mL"]) %>% arrange(conc, ratio) 110 | 111 | data.ratio$Marker <- factor(data.ratio$Marker, levels=data.ratio$Marker) 112 | 113 | p.ratio <- ggplot(data.ratio, aes(x=Marker, y=log2(ratio))) + 114 | geom_rect(aes(xmin=-Inf,xmax=Inf,ymin=-1,ymax=-1.25,fill=conc), col="black") + 115 | scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) + 116 | ggnewscale::new_scale_fill() + 117 | geom_bar(stat="identity", aes(fill=log2(ratio)>0), color="black", width=0.4) + 118 | geom_hline(yintercept=0) + 119 | scale_fill_manual(values=c(`FALSE`="lightgrey",`TRUE`="black")) + 120 | scale_x_discrete(expand=c(0, 0.5)) + 121 | scale_y_continuous(expand=c(0,0.05,0,0.05)) + 122 | coord_flip() + 123 | facet_grid(rows="conc", scales="free_y", space="free_y") + 124 | labs(title="Cell:Empty ratio", y="log2(Cells:Empty ratio)", fill="µg/mL") + 125 | theme(plot.title=element_text(size=7, face="bold", hjust=0.5), 126 | panel.spacing=unit(0.5,"mm"), 127 | axis.line=element_line(), 128 | axis.title.y=element_blank(), 129 | strip.placement="outside", 130 | strip.text=element_blank(), 131 | panel.border=element_rect(color=alpha("black",0.25)), 132 | legend.position="none", 133 | legend.justification=c(0,1), 134 | legend.direction="horizontal", 135 | legend.text.align=0, 136 | legend.key.width=unit(0.3,"cm"), 137 | legend.key.height=unit(0.4,"cm"), 138 | legend.text=element_text(size=unit(5,"pt"))) 139 | 140 | p.ratio 141 | ``` 142 | 143 | ## Draw barplot of UMI counts in cell-containing and empty-droplets 144 | 145 | ```{r barplot, fig.height=5, fig.width=3} 146 | plotData$marker <- factor(as.character(plotData$marker), levels=levels(data.ratio$Marker)) 147 | 148 | p.barplot <- ggplot(plotData, aes(x=marker, y=count/10^6)) + 149 | geom_rect(aes(xmin=-Inf,xmax=Inf,ymin=-0.050000,ymax=-0.010000,fill=conc), col="black") + 150 | scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) + 151 | ggnewscale::new_scale_fill() + 152 | geom_bar(aes(fill=subset),stat="identity", position="dodge", color="black", width=0.65) + 153 | geom_hline(yintercept=0, col="black") + 154 | scale_fill_manual(values=c("Cell"="black","EmptyDrop"="lightgrey")) + 155 | scale_x_discrete(expand=c(0, 0.5)) + 156 | scale_y_continuous(expand=c(0,0,0,0.05)) + 157 | coord_flip() + 158 | facet_grid(rows="conc", scales="free_y", space="free_y") + 159 | guides(fill=guide_legend(reverse=TRUE)) + 160 | labs(title="UMI counts", y=bquote("UMI count ("~10^6~")"), fill="Compartment") + 161 | theme(plot.title=element_text(size=7, face="bold", hjust=0.5), 162 | panel.border=element_blank(), 163 | panel.grid.major.y=element_blank(), 164 | panel.spacing=unit(0.5,"mm"), 165 | axis.line=element_line(), 166 | axis.title.y=element_blank(), 167 | #axis.text.y=element_blank(), 168 | strip.placement="outside", 169 | strip.text=element_blank(), 170 | legend.position=c(1,1), 171 | legend.justification=c(1,1), 172 | legend.text.align=0, 173 | legend.key.width=unit(0.3,"cm"), 174 | legend.key.height=unit(0.4,"cm"), 175 | legend.text=element_text(size=unit(5,"pt"))) 176 | 177 | p.barplot 178 | ``` 179 | 180 | # Highlight markers 181 | 182 | Determine which markers should be highlighted due to their differences between cell-containing and empty droplets. 183 | 184 | ```{r highlight} 185 | freq.threshold <- 0.05 186 | 187 | plotData$highlight <- ifelse(plotData$marker %in% plotData$marker[plotData$freq >= freq.threshold],1,0) 188 | 189 | ## Determine which compartment has the highest frequency for the markers above the threshold and assign the labels accordingly 190 | max.label <- plotData[plotData$freq >= freq.threshold,] %>% group_by(marker) %>% summarize(subset.max=subset[which.max(freq)]) 191 | 192 | plotData$label <- ifelse((paste(plotData$marker,plotData$subset) %in% 193 | paste(max.label$marker,max.label$subset.max))==FALSE | 194 | plotData$freq < freq.threshold, 195 | NA,as.character(plotData$marker)) 196 | ``` 197 | 198 | ## Make alluvial "river" plot of markers in each compartment 199 | 200 | To allow labelling the markers, we need to calculate the cummulativeFrequency. 201 | 202 | ```{r alluvial, fig.height=5, fig.width=1.3} 203 | ## Order the dataframe 204 | plotData$marker.conc <- factor(as.character(plotData$marker), levels=unique(plotData$marker[order(-plotData$conc, plotData$marker, decreasing=TRUE)])) 205 | plotData <- plotData[order(plotData$marker.conc, decreasing=TRUE),] 206 | 207 | plotData$cummulativeFreq <- 0 208 | plotData$cummulativeFreq[plotData$subset=="EmptyDrop"] <- cumsum(plotData$freq[plotData$subset=="EmptyDrop"]) 209 | plotData$cummulativeFreq[plotData$subset=="Cell"] <- cumsum(plotData$freq[plotData$subset=="Cell"]) 210 | 211 | ## A bit of a hack to get the columns in order 212 | #plotData$subset.rev <- factor(as.character(plotData$subset), levels=c("Cell","EmptyDrop")) 213 | 214 | p.alluvial <- ggplot(plotData, aes(y=freq, x=subset, fill=conc, stratum = marker.conc, alluvium = marker.conc)) + 215 | ggalluvial::geom_flow(width = 1/2, color=alpha("black",0.25), alpha=0.75) + 216 | ggalluvial::geom_stratum(width = 1/2) + 217 | geom_text(aes(y=cummulativeFreq-(freq/2),label=label), na.rm=TRUE, vjust=0.5, hjust=0.5, angle=30, size=1.5, fontface="bold") + 218 | scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) + 219 | scale_y_continuous(expand=c(0,0)) + 220 | scale_x_discrete(expand=c(0,0), limits=rev(levels(plotData$subset))) + 221 | labs(title="Frequency", y="UMI frequency", fill="DF1 µg/mL") + 222 | theme(plot.title=element_text(size=7, face="bold", hjust=0.5), 223 | legend.position="none", 224 | axis.title.x=element_blank(), 225 | panel.grid=element_blank()) 226 | 227 | p.alluvial 228 | ``` 229 | 230 | # Specific signals despite background 231 | 232 | Despite high background (as assayed by high number of reads in empty droplets), most markers provide specific signal. However, the number of UMIs neede to achieve this signal is much lower in the markers with high signal-to-noise. 233 | 234 | ```{r} 235 | 236 | object <- readRDS(file=data.Seurat) 237 | 238 | ## Show number of cells from each sample 239 | table(object$group) 240 | 241 | object <- subset(object, subset=volume == "50µl" & dilution == "DF1") 242 | object 243 | 244 | DefaultAssay(object) <- "ADT.kallisto" 245 | ``` 246 | 247 | ## Show "positive" cutoff according to concentration 248 | 249 | Another way to show this is to show the number of UMIs required to get above the background threshold (defined in Supplementary Figure S1) 250 | 251 | ```{r, fig.height=5, fig.width=3} 252 | markerStats <- read.table(data.markerStats) 253 | rownames(markerStats) <- paste(markerStats$marker,markerStats$tissue,sep="_") 254 | 255 | ## Determine which tissue has highest percentage positive cells and use this to set cutoff. 256 | markerStats.max <- markerStats %>% group_by(marker) %>% filter(pct==max(pct)) 257 | 258 | data.UMI <- GetAssayData(object, assay="ADT.kallisto", slot="counts") 259 | data.meta <- FetchData(object, vars=c("tissue")) 260 | 261 | marker.data <- as.data.frame(data.UMI) %>% 262 | mutate(marker=rownames(.)) %>% 263 | pivot_longer(-marker) %>% 264 | group_by(marker, tissue=data.meta[name,"tissue"]) %>% 265 | summarize(pos.cutoff=quantile(value, probs=(1-min(0.95,(markerStats[paste(marker[1],tissue[1],sep="_"),"pct"]+20)/100)))) %>% left_join(markerStats) 266 | 267 | marker.data$marker <- factor(as.character(marker.data$marker), levels=levels(data.ratio$Marker)) 268 | 269 | p.UMIcutoff <- ggplot(marker.data, aes(x=marker, y=pos.cutoff, group=tissue, fill=tissue)) + 270 | geom_bar(position="dodge", stat="identity", color="black", width=0.65) + 271 | scale_fill_manual(values=color.tissue) + 272 | scale_x_discrete(expand=c(0, 0.5)) + 273 | scale_y_continuous(expand=c(0,0.05,0,0.05)) + 274 | coord_flip() + 275 | facet_grid(rows="conc_µg_per_mL", scales="free_y", space="free_y") + 276 | labs(title="UMI cutoff", y="Above-background cutoff (UMI)", fill="Tissue") + 277 | theme(plot.title=element_text(size=7, face="bold", hjust=0.5), 278 | panel.border=element_blank(), 279 | panel.grid.major.y=element_blank(), 280 | panel.spacing=unit(0.5,"mm"), 281 | axis.line=element_line(), 282 | axis.title.y=element_blank(), 283 | axis.text.y=element_blank(), 284 | strip.placement="outside", 285 | strip.text=element_blank(), 286 | legend.position=c(1,1), 287 | legend.justification=c(1,1), 288 | legend.text.align=0, 289 | legend.key.width=unit(0.3,"cm"), 290 | legend.key.height=unit(0.4,"cm"), 291 | legend.text=element_text(size=unit(5,"pt"))) 292 | 293 | p.UMIcutoff 294 | 295 | ``` 296 | 297 | Make tSNE plots with raw UMI counts. Use rainbow color scheme to show dynamic range in expression levels. 298 | 299 | ```{r, fig.height=1.6, fig.width=7} 300 | f.tsne.format <- function(x){ 301 | x + 302 | scale_color_gradientn(colours = c("#000033","#3333FF","#3377FF","#33AAFF","#33CC33","orange","red"), 303 | limits=c(0,NA)) + 304 | scale_y_continuous(expand=c(0.15,0,0.05,0)) + 305 | theme_get() + 306 | theme(plot.title=element_text(size=7, face="bold", hjust=0.5), 307 | plot.background=element_blank(), 308 | panel.background=element_blank(), 309 | axis.title=element_blank(), 310 | axis.text.x=element_blank(), 311 | axis.text.y=element_blank(), 312 | legend.key.width=unit(3,"mm"), 313 | legend.key.height=unit(2,"mm"), 314 | legend.position=c(1,-0.03), 315 | legend.justification=c(1,0), 316 | legend.background=element_blank(), 317 | legend.direction="horizontal") 318 | } 319 | 320 | p.tsne <- lapply(FeaturePlot(object, reduction="tsne", sort=TRUE, combine=FALSE, 321 | features=show_tsne_markers, 322 | slot="counts", 323 | max.cutoff='q90', 324 | pt.size=0.1), 325 | FUN=f.tsne.format) 326 | 327 | ## Get common y-axis label 328 | p.tsne[[1]] <- p.tsne[[1]] + theme(axis.title.y=element_text()) 329 | # a bit of a hack to get a common x-axis label 330 | p.tsne[[3]] <- p.tsne[[3]] + theme(axis.title.x=element_text(hjust=0.5)) 331 | 332 | p.UMI.tsne <- cowplot::plot_grid(plotlist=p.tsne, 333 | align="h", 334 | axis="tb", 335 | nrow=1, 336 | rel_widths=c(1.07,1,1,1,1), 337 | labels=c("E","","F","","G"), 338 | label_size=panel.label_size, 339 | vjust=panel.label_vjust, 340 | hjust=panel.label_hjust) 341 | 342 | p.UMI.tsne 343 | ``` 344 | 345 | Make similar plots for all markers 346 | 347 | ```{r} 348 | markers <- sort(rownames(object[["ADT.kallisto"]])) 349 | 350 | p.tsne.all <- lapply(FeaturePlot(object, reduction="tsne", sort=TRUE, combine=FALSE, 351 | features=markers, 352 | slot="counts", 353 | max.cutoff='q90', 354 | pt.size=0.1), 355 | FUN=f.tsne.format) 356 | 357 | names(p.tsne.all) <- markers 358 | 359 | p.tsne.all <- lapply(markers, function(x) p.tsne.all[[x]] + ggtitle(paste0(x," (",markerStats[paste0(x,"_PBMC"),"conc_µg_per_mL"]," µg/mL)"))) 360 | 361 | plot.columns <- 5 362 | plot.num <- length(p.tsne.all) 363 | plot.rows <- ceiling(plot.num/plot.columns) 364 | plot.rowSplit <- 6 365 | 366 | ## Reduce margins 367 | p.tsne.all <- lapply(p.tsne.all, function(x) x + 368 | theme(plot.margin=unit(c(0.1,0.1,0.3,0.1),"mm"))) 369 | 370 | ## Get common y-axis label 371 | p.tsne.all[(c(0:(plot.rows-1))*plot.columns+1)] <- lapply(p.tsne.all[(c(0:(plot.rows-1))*plot.columns+1)], function(x) x + theme(axis.title.y=element_text())) 372 | 373 | ## Show axis label for the center plot of the last row 374 | p.tsne.all[[(plot.columns*plot.rowSplit-floor(plot.columns/2))]] <- p.tsne.all[[(plot.columns*plot.rowSplit-floor(plot.columns/2))]] + theme(axis.title.x=element_text(hjust=0.5)) 375 | # a bit of a hack to get a common x-axis label on the last row (hardcoded) 376 | p.tsne.all[[52]] <- p.tsne.all[[52]] + theme(axis.title.x=element_text(hjust=2)) 377 | 378 | p.UMI.tsne.all.1 <- cowplot::plot_grid(plotlist=p.tsne.all[1:(plot.rowSplit*plot.columns)], align="h", axis="tb", ncol=plot.columns, rel_widths=c(1.1,1,1,1,1)) 379 | p.UMI.tsne.all.2 <- cowplot::plot_grid(plotlist=p.tsne.all[(plot.rowSplit*plot.columns+1):52], align="h", axis="tb", ncol=plot.columns, rel_widths=c(1.1,1,1,1,1)) 380 | 381 | png(file=file.path(outdir,paste0("Supplementary Figure S7A.png")), 382 | units=figure.unit, 383 | res=figure.resolution, 384 | width=figure.width.full, 385 | height=(figure.width.full/plot.columns*plot.rowSplit)*1.1, 386 | antialias=figure.antialias) 387 | 388 | p.UMI.tsne.all.1 389 | 390 | dev.off() 391 | 392 | png(file=file.path(outdir,paste0("Supplementary Figure S7B.png")), 393 | units=figure.unit, 394 | res=figure.resolution, 395 | width=figure.width.full, 396 | height=(figure.width.full/plot.columns*(plot.rows-plot.rowSplit))*1.1, 397 | antialias=figure.antialias) 398 | 399 | p.UMI.tsne.all.2 400 | 401 | dev.off() 402 | ``` 403 | 404 | ## Combine figure 405 | 406 | ```{r figure, fig.height=5.9, fig.width=figure.width.full} 407 | p.row1 <- cowplot::plot_grid(p.barplot + theme(plot.margin=unit(c(0.02,0,0,0),"npc")), 408 | p.alluvial, 409 | p.ratio + theme(plot.margin=unit(c(0,0,0,0.05),"npc")), 410 | p.UMIcutoff + theme(plot.margin=unit(c(0,0,0,-0.007),"npc")), 411 | nrow=1, 412 | rel_widths=c(1.75,0.75,1.2,1.3), 413 | align="h", 414 | axis="tb", 415 | labels=c("A", "B", "C", "D"), 416 | label_size=panel.label_size, 417 | vjust=panel.label_vjust, 418 | hjust=panel.label_hjust) 419 | 420 | p.final <- cowplot::plot_grid(p.row1, p.UMI.tsne, 421 | ncol=1, 422 | rel_heights=c(3,1.05)) 423 | 424 | p.final 425 | 426 | png(file=file.path(outdir,"Figure 5.png"), width=figure.width.full, height=5.9, units=figure.unit, res=figure.resolution, antialias=figure.antialias) 427 | p.final 428 | dev.off() 429 | ``` -------------------------------------------------------------------------------- /ADT-counting-methods_files/figure-gfm/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-counting-methods_files/figure-gfm/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /ADT-counting-methods_files/figure-gfm/unnamed-chunk-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-counting-methods_files/figure-gfm/unnamed-chunk-1-2.png -------------------------------------------------------------------------------- /ADT-counting-methods_files/figure-gfm/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-counting-methods_files/figure-gfm/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /ADT-counting-methods_files/figure-gfm/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-counting-methods_files/figure-gfm/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /ADT-counting-methods_files/figure-gfm/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-counting-methods_files/figure-gfm/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/DSBfigure-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/DSBfigure-1.png -------------------------------------------------------------------------------- /ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/alluvial-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/alluvial-1.png -------------------------------------------------------------------------------- /ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/barplot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/barplot-1.png -------------------------------------------------------------------------------- /ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/figure-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/figure-1.png -------------------------------------------------------------------------------- /ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/plotRatioDSB-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/plotRatioDSB-1.png -------------------------------------------------------------------------------- /ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/ratio-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/ratio-1.png -------------------------------------------------------------------------------- /ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/ADT-reads-in-cells-vs-empty-drops_files/figure-gfm/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /Antibody-titration_files/figure-gfm/Figure1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/Figure1-1.png -------------------------------------------------------------------------------- /Antibody-titration_files/figure-gfm/UMIcountsPerCondition-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/UMIcountsPerCondition-1.png -------------------------------------------------------------------------------- /Antibody-titration_files/figure-gfm/UMIinExpressingCells-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/UMIinExpressingCells-1.png -------------------------------------------------------------------------------- /Antibody-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png -------------------------------------------------------------------------------- /Antibody-titration_files/figure-gfm/suppFig1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/suppFig1-1.png -------------------------------------------------------------------------------- /Antibody-titration_files/figure-gfm/suppFig1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/suppFig1-2.png -------------------------------------------------------------------------------- /Antibody-titration_files/figure-gfm/suppFig1-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/suppFig1-3.png -------------------------------------------------------------------------------- /Antibody-titration_files/figure-gfm/suppFig1-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/suppFig1-4.png -------------------------------------------------------------------------------- /Antibody-titration_files/figure-gfm/suppFig1-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/suppFig1-5.png -------------------------------------------------------------------------------- /Antibody-titration_files/figure-gfm/titrationExamples-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/titrationExamples-1.png -------------------------------------------------------------------------------- /Antibody-titration_files/figure-gfm/tsnePlots-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/tsnePlots-1.png -------------------------------------------------------------------------------- /Antibody-titration_files/figure-gfm/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Antibody-titration_files/figure-gfm/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /CITE-seq_optimization.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /Cell number titration.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "CITE-seq optimization - Reducing cell number at staining" 3 | author: "Terkild Brink Buus" 4 | date: "30/3/2020" 5 | output: github_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(warning=FALSE, message=FALSE) 10 | options(stringsAsFactors=FALSE) 11 | ``` 12 | 13 | ## Load utilities 14 | 15 | Including libraries, plotting and color settings and custom utility functions 16 | 17 | ```{r loadLibraries, results='hide', message=FALSE, warning=FALSE} 18 | set.seed(114) 19 | require("Seurat", quietly=T) 20 | require("tidyverse", quietly=T) 21 | library("Matrix", quietly=T) 22 | library("patchwork", quietly=T) 23 | 24 | ## Load ggplot theme and defaults 25 | source("R/ggplot_settings.R") 26 | 27 | ## Load helper functions 28 | source("R/Utilities.R") 29 | 30 | ## Load predefined color schemes 31 | source("R/color.R") 32 | 33 | ## Load feature_rankplot functions 34 | source("R/feature_rankplot.R") 35 | source("R/feature_rankplot_hist.R") 36 | source("R/feature_rankplot_hist_custom.R") 37 | 38 | outdir <- "figures" 39 | data.Seurat <- "data/5P-CITE-seq_Titration.rds" 40 | data.abpanel <- "data/Supplementary_Table_1.xlsx" 41 | data.markerStats <- "data/markerByClusterStats.tsv" 42 | 43 | ## Make a custom function for formatting the concentration scale 44 | scaleFUNformat <- function(x) sprintf("%.2f", x) 45 | ``` 46 | 47 | ## Load Seurat object 48 | 49 | Subset to only focus on conditions with 1 mio cells and dilution factor 4 (thus comparing 50µl to 25µl staining volume in PBMCs). 50 | 51 | ```{r loadSeurat} 52 | object <- readRDS(file=data.Seurat) 53 | 54 | ## Show number of cells from each sample 55 | table(object$group) 56 | 57 | object <- subset(object, subset=volume == "25µl") 58 | object 59 | ``` 60 | 61 | ## Load Ab panel annotation and concentrations 62 | 63 | Marker stats is reused in other comparisons and was calculated in the end of the preprocessing vignette. 64 | 65 | ```{r loadABPanel} 66 | abpanel <- data.frame(readxl::read_excel(data.abpanel)) 67 | rownames(abpanel) <- abpanel$Marker 68 | 69 | ## As we are only working with dilution factor 4 samples here, we want to show labels accordingly 70 | # a bit of a hack... 71 | abpanel$conc_µg_per_mL <- abpanel$conc_µg_per_mL/4 72 | 73 | markerStats <- read.table(data.markerStats) 74 | markerStats.PBMC <- markerStats[markerStats$tissue == "PBMC",] 75 | rownames(markerStats) <- paste(markerStats$marker,markerStats$tissue,sep="_") 76 | 77 | ## Make a ordering vector ordering markers per concentration and total UMI count 78 | marker.order <- markerStats.PBMC$marker[order(markerStats.PBMC$conc_µg_per_mL, markerStats.PBMC$UMItotal, decreasing=TRUE)] 79 | 80 | head(abpanel) 81 | head(markerStats) 82 | ``` 83 | 84 | ## Cell type and tissue overview 85 | 86 | Make tSNE plots colored by cell type, cluster and tissue of origin. 87 | 88 | ```{r tsnePlots, fig.height=3, fig.width=7} 89 | p.tsne.cellsAtStaining <- DimPlot(object, group.by="cellsAtStaining", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap(~"cellsAtStaining") + scale_color_manual(values=color.cellsAtStaining) 90 | 91 | p.tsne.cluster <- DimPlot(object, group.by="supercluster", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + scale_color_manual(values=color.supercluster) + facet_wrap(~"Cell types") 92 | 93 | p.tsne.finecluster <- DimPlot(object, label=TRUE, label.size=3, reduction="tsne", group.by="fineCluster", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap( ~"Clusters") + guides(col=F) 94 | 95 | p.tsne.cluster + p.tsne.finecluster + p.tsne.cellsAtStaining 96 | ``` 97 | 98 | ## Overall ADT counts 99 | 100 | Extract UMI data and calculate UMI sum per marker within each condition. 101 | 102 | ```{r calculateUMIcountsPerMarker} 103 | ## Get the data 104 | ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts")) 105 | ADT.matrix$marker <- rownames(ADT.matrix) 106 | ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"] 107 | ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc)) 108 | 109 | ## Get cell annotations 110 | cell.annotation <- FetchData(object, vars=c("cellsAtStaining")) 111 | 112 | ## Calculate marker sum from each dilution within both tissues 113 | ADT.matrix.agg <- ADT.matrix %>% group_by(cellsAtStaining=cell.annotation[name,"cellsAtStaining"], marker, conc) %>% summarise(sum=sum(value)) 114 | 115 | ## Order markers by concentration 116 | ADT.matrix.agg$marker.byConc <- factor(ADT.matrix.agg$marker, levels=marker.order) 117 | 118 | ## Extract marker annotation 119 | ann.markerConc <- abpanel[marker.order,] 120 | ann.markerConc$Marker <- factor(marker.order, levels=marker.order) 121 | 122 | ADT.matrix.agg.total <- ADT.matrix.agg 123 | ``` 124 | 125 | ## Plot overall ADT counts by conditions 126 | 127 | Samples stained with diluted Ab panel have reduced ADT counts. 128 | 129 | ```{r UMIcountsPerCondition, fig.width=2.5, fig.height=2} 130 | 131 | p.UMIcountsPerCondition <- ggplot(ADT.matrix.agg.total[order(-ADT.matrix.agg$conc, -ADT.matrix.agg$sum),], aes(x=cellsAtStaining, y=sum/10^6, fill=conc)) + 132 | geom_bar(stat="identity", col=alpha(col="black",alpha=0.05)) + 133 | scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) + 134 | scale_y_continuous(expand=c(0,0,0,0.05)) + 135 | labs(fill="DF4\nµg/mL", y=bquote("ADT UMI counts ("~10^6~")")) + 136 | guides(fill=guide_colourbar(reverse=T)) + 137 | theme(panel.grid.major=element_blank(), axis.title.x=element_blank(), panel.border=element_blank(), axis.line = element_line(), legend.position="right") 138 | 139 | p.UMIcountsPerCondition 140 | ``` 141 | 142 | ## Compare total UMI counts per marker 143 | 144 | Plot total UMI counts for each marker at the investigated dilution factors (DF1 vs. DF4). To ease readability, we place dashed lines between each concentration. 145 | 146 | ```{r plotUMIcountsPerMarker, fig.width=4.5, fig.height=5} 147 | ## Calculate "breaks" where concentration change. 148 | lines <- length(marker.order)-cumsum(sapply(split(ann.markerConc$Marker,ann.markerConc$conc_µg_per_mL),length))+0.5 149 | lines <- data.frame(breaks=lines[-length(lines)]) 150 | 151 | ## Make a marker by concentration "heatmap" 152 | p.markerByConc <- ggplot(ann.markerConc, aes(x=1, y=Marker, fill=conc_µg_per_mL)) + 153 | geom_tile(col=alpha(col="black",alpha=0.2)) + 154 | geom_hline(data=lines,aes(yintercept=breaks), linetype="dashed", alpha=0.5) + 155 | scale_fill_viridis_c(trans="log2") + 156 | labs(fill="µg/mL") + 157 | theme_get() + 158 | theme(axis.ticks.x=element_blank(), axis.title = element_blank(), axis.text.x=element_blank(), panel.grid=element_blank(), legend.position="right", plot.margin=unit(c(0.1,0.1,0.1,0.1),"mm")) + scale_x_continuous(expand=c(0,0)) 159 | 160 | ## Make UMI counts per Marker plot 161 | p.UMIcountsPerMarker <- ggplot(ADT.matrix.agg, aes(x=marker.byConc,y=log2(sum))) + 162 | geom_line(aes(group=marker), size=1.2, color="#666666") + 163 | geom_point(aes(group=cellsAtStaining, fill=cellsAtStaining), pch=21, size=0.7) + 164 | geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 165 | scale_fill_manual(values=color.cellsAtStaining) + 166 | scale_y_continuous(breaks=c(9:17)) + 167 | ylab("log2(UMI sum)") + 168 | guides(fill=guide_legend(override.aes=list(size=1.5), reverse=TRUE)) + 169 | theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="bottom", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm"), legend.title=element_blank()) + 170 | coord_flip() 171 | 172 | ## Combine plot with markerByConc annotation heatmap 173 | plotUMIcountsPerMarker <- p.markerByConc + guides(fill=F) + p.UMIcountsPerMarker + guides(fill=F) + plot_spacer() + guide_area() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect') 174 | 175 | plotUMIcountsPerMarker 176 | ``` 177 | 178 | ## Compare change in UMI/cell within expressing cluster 179 | 180 | Using a specific percentile may be prone to outliers in small clusters (i.e. the 90th percentile of a cluster of 30 will be the #3 higest cell making it prone to outliers). We thus set a threshold of the value to only be the 90th percentile if cluster contains more than 100 cells. For smaller clusters, the median is used. Expressing cluster is identified in the "preprocessing" vignette. 181 | 182 | ```{r UMIinExpressingCells, fig.width=4.5, fig.height=5} 183 | ## Get the data 184 | ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts")) 185 | ADT.matrix$marker <- rownames(ADT.matrix) 186 | ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"] 187 | ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc)) 188 | 189 | ## Get cell annotations 190 | cell.annotation <- FetchData(object, vars=c("cellsAtStaining", "fineCluster")) 191 | 192 | ## Calculate marker statistics from each dilution within each cluster 193 | ADT.matrix.agg <- ADT.matrix %>% group_by(cellsAtStaining=cell.annotation[name,"cellsAtStaining"], fineCluster=cell.annotation[name,"fineCluster"], marker, conc) %>% summarise(sum=sum(value), median=quantile(value, probs=c(0.9)), nth=nth(value)) 194 | ADT.matrix.agg$tissue == "PBMC" 195 | 196 | ## Use data for the previously determined expressing cluster. 197 | Cluster.max <- markerStats[markerStats$tissue == "PBMC",c("marker","fineCluster")] 198 | Cluster.max$fineCluster <- factor(Cluster.max$fineCluster) 199 | 200 | ADT.matrix.aggByClusterMax <- Cluster.max %>% left_join(ADT.matrix.agg) 201 | ADT.matrix.aggByClusterMax$marker.byConc <- factor(ADT.matrix.aggByClusterMax$marker, levels=marker.order) 202 | 203 | p.UMIinExpressingCells <- ggplot(ADT.matrix.aggByClusterMax, aes(x=marker.byConc, y=log2(nth))) + 204 | geom_line(aes(group=marker), size=1.2, color="#666666") + 205 | geom_point(aes(group=cellsAtStaining, fill=cellsAtStaining), pch=21, size=0.7) + 206 | geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 207 | geom_text(aes(label=paste0(fineCluster," ")), y=Inf, adj=1, size=1.5) + 208 | scale_fill_manual(values=color.cellsAtStaining) + 209 | scale_y_continuous(breaks=c(0:11), labels=2^c(0:11), expand=c(0.05,0.5)) + 210 | ylab("90th percentile UMI of expressing cluster") + 211 | theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="right", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm")) + 212 | coord_flip() 213 | 214 | ## Combine plot with markerByConc annotation heatmap 215 | UMIinExpressingCells <- p.markerByConc + theme(legend.position="none") + p.UMIinExpressingCells + theme(legend.position="none") + plot_spacer() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect') 216 | 217 | UMIinExpressingCells 218 | ``` 219 | 220 | ## Titration examples 221 | 222 | Most markers are largely unaffected by reducing staining cellsAtStaining. However, some antibodies used at low concentrations and targeting abundant epitopes are affected, an example of such is CD31: 223 | 224 | ```{r fig.width=1.4, fig.height=2.3} 225 | ## Make helper function for plotting titration plots 226 | titrationPlot <- function(marker, gate.PBMC=NULL, gate.Lung=NULL, y.axis=FALSE, show.gate=TRUE, legend=FALSE){ 227 | curMarker.name <- marker 228 | 229 | ## Get antibody concentration for legends 230 | curMarker.DF1conc <- abpanel[curMarker.name, "conc_µg_per_mL"] 231 | if(show.gate==TRUE){ 232 | ## Load gating percentages from manually set DSB thresholds 233 | gate <- data.frame(gate=markerStats[markerStats$marker == curMarker.name & markerStats$tissue== "PBMC",c("pct")]) 234 | gate$gate <- 1-(gate$gate/100) 235 | rownames(gate) <- gate$wrap 236 | ## Allow manual gating 237 | if(!is.null(gate.PBMC)) gate <- gate.PBMC 238 | } else { 239 | gate <- NULL 240 | } 241 | 242 | p <- feature_rankplot_hist_custom(data=object, 243 | marker=paste0("adt_",curMarker.name), 244 | group="cellsAtStaining", 245 | barcodeGroup="supercluster", 246 | conc=curMarker.DF1conc, 247 | legend=legend, 248 | yaxis.text=y.axis, 249 | gates=gate, 250 | histogram.colors=color.cellsAtStaining, 251 | title=curMarker.name) 252 | 253 | return(p) 254 | } 255 | 256 | p.CD31 <- titrationPlot("CD31", legend=TRUE) 257 | 258 | p.CD31 259 | ``` 260 | 261 | ## tSNE plots 262 | 263 | Make tSNE plots with raw UMI counts. Use rainbow color scheme to show dynamic range in expression levels. 264 | 265 | ```{r, fig.height=2, fig.width=7} 266 | show_tsne_markers <- c("CD31","CD44") 267 | f.tsne.format <- function(x){ 268 | x + 269 | scale_color_gradientn(colours = c("#000033","#3333FF","#3377FF","#33AAFF","#33CC33","orange","red"), 270 | limits=c(0,NA)) + 271 | scale_y_continuous(expand=c(0,0,0.05,0), limits=c(-45.52796,37.94770)) + 272 | xlim(c(-40.83170,49.63832)) + 273 | theme_get() + 274 | theme(plot.title=element_text(size=7, face="bold", hjust=0.5), 275 | plot.background=element_blank(), 276 | panel.background=element_blank(), 277 | axis.title=element_blank(), 278 | axis.text.x=element_blank(), 279 | axis.text.y=element_blank(), 280 | legend.key.width=unit(3,"mm"), 281 | legend.key.height=unit(2,"mm"), 282 | legend.position=c(1,-0.03), 283 | legend.justification=c(1,0), 284 | legend.background=element_blank(), 285 | legend.direction="horizontal") 286 | } 287 | 288 | maximum <- apply(FetchData(object, vars=paste0("adt_",show_tsne_markers), slot="counts"),2,quantile,probs=c(0.95)) 289 | 290 | p.tsne.1 <- f.tsne.format(FeaturePlot(subset(object, subset=cellsAtStaining=="1000k"), reduction="tsne", sort=TRUE, combine=FALSE, features=paste0("adt_",show_tsne_markers[1]), slot="counts", max.cutoff=maximum[1], pt.size=0.1)[[1]]) 291 | p.tsne.2 <- f.tsne.format(FeaturePlot(subset(object, subset=cellsAtStaining=="200k"), reduction="tsne", sort=TRUE, combine=FALSE, features=paste0("adt_",show_tsne_markers[1]), slot="counts", max.cutoff=maximum[1], pt.size=0.1)[[1]]) 292 | p.tsne.3 <- f.tsne.format(FeaturePlot(subset(object, subset=cellsAtStaining=="1000k"), reduction="tsne", sort=TRUE, combine=FALSE, features=paste0("adt_",show_tsne_markers[2]), slot="counts", max.cutoff=maximum[2], pt.size=0.1)[[1]]) 293 | p.tsne.4 <- f.tsne.format(FeaturePlot(subset(object, subset=cellsAtStaining=="200k"), reduction="tsne", sort=TRUE, combine=FALSE, features=paste0("adt_",show_tsne_markers[2]), slot="counts", max.cutoff=maximum[2], pt.size=0.1)[[1]]) 294 | 295 | p.tsne <- list(p.tsne.1 + ggtitle("1000k"),p.tsne.2 + ggtitle("200k"),p.tsne.3 + ggtitle("1000k"),p.tsne.4 + ggtitle("200k")) 296 | ## Get common y-axis label 297 | p.tsne[[1]] <- p.tsne[[1]] + theme(axis.title.y=element_text()) 298 | # a bit of a hack to get a common x-axis label 299 | p.tsne[[2]] <- p.tsne[[2]] + theme(axis.title.x=element_text(hjust=1.2)) 300 | 301 | p.UMI.tsne <- cowplot::plot_grid(plotlist=p.tsne, 302 | align="h", 303 | axis="tb", 304 | nrow=1, 305 | rel_widths=c(1.05,1,1,1), 306 | labels=c("E",show_tsne_markers[1],"",show_tsne_markers[2]), 307 | label_size=panel.label_size, 308 | vjust=panel.label_vjust, 309 | hjust=c(panel.label_hjust,0.5,panel.label_hjust,0.5)) 310 | 311 | p.UMI.tsne 312 | ``` 313 | 314 | ## Final plot 315 | 316 | ```{r figure, fig.width=7, fig.height=6} 317 | A <- p.UMIcountsPerCondition + theme(legend.key.width=unit(0.3,"cm"), 318 | legend.key.height=unit(0.4,"cm"), 319 | legend.text=element_text(size=unit(5,"pt")), 320 | plot.margin=unit(c(0.3,0,0.5,0),"cm")) 321 | 322 | B1 <- p.markerByConc + theme(text = element_text(size=10), 323 | plot.margin=unit(c(0.3,0,0,0),"cm"), 324 | legend.position="none") 325 | B2 <- p.UMIcountsPerMarker + theme(legend.position="none") 326 | C <- p.UMIinExpressingCells + theme(legend.position="none") 327 | 328 | BC.legend <- cowplot::get_legend(p.UMIcountsPerMarker + 329 | guides(fill=guide_legend(reverse=FALSE)) + 330 | theme(legend.position="bottom", 331 | legend.direction="horizontal", 332 | legend.background=element_blank(), 333 | legend.box.background=element_blank(), legend.key=element_blank())) 334 | 335 | D <- p.CD31 + theme(plot.margin=unit(c(0.5,0,0,0),"cm")) 336 | 337 | AD <- cowplot::plot_grid(A,D,NULL, 338 | ncol=1, 339 | rel_heights=c(13,17,1.5), 340 | labels=c("A","D",""), 341 | label_size=panel.label_size, 342 | vjust=panel.label_vjust, 343 | hjust=panel.label_hjust) 344 | 345 | BC <- cowplot::plot_grid(B1, B2, C, 346 | nrow=1, 347 | rel_widths=c(2,10,10), 348 | align="h", 349 | axis="tb", 350 | labels=c("B", "", "C"), 351 | label_size=panel.label_size, 352 | vjust=panel.label_vjust, 353 | hjust=panel.label_hjust) 354 | 355 | p.figure <- cowplot::plot_grid(cowplot::ggdraw(plot_grid(AD, BC, 356 | nrow=1, 357 | rel_widths=c(1,4), 358 | align="v", 359 | axis="l")) + 360 | cowplot::draw_plot(BC.legend,0.27,0.020,0.2,0.00001), 361 | p.UMI.tsne, rel_heights=c(3,1.35), align="v", axis="lr", ncol=1) 362 | 363 | png(file=file.path(outdir,"Figure 4.png"), 364 | width=figure.width.full, 365 | height=6, 366 | units = figure.unit, 367 | res=figure.resolution, 368 | antialias=figure.antialias) 369 | 370 | p.figure 371 | 372 | dev.off() 373 | 374 | p.figure 375 | ``` 376 | 377 | ## Individual titration plots 378 | 379 | For supplementary information. 380 | 381 | ```{r suppFig, fig.width=7, fig.height=10} 382 | plots.columns = 6 383 | rows.max <- 5 384 | 385 | markers <- abpanel[rownames(object[["ADT.kallisto"]]),] 386 | markers <- markers[order(markers$Category, markers$Marker),] 387 | 388 | plots <- list() 389 | 390 | ## Make individual plots for each marker 391 | for(i in 1:nrow(markers)){ 392 | curMarker <- markers[i,] 393 | curMarker.name <- curMarker$Marker 394 | y.axis <- ifelse((i-1) %in% c(0,6,12,18,24,30,36,42,48),TRUE,FALSE) 395 | plots[[curMarker.name]] <- titrationPlot(curMarker.name, y.axis=y.axis) 396 | } 397 | 398 | # a bit of a hack to make celltype legend 399 | p.legend <- cowplot::get_legend(ggplot(data.frame(supercluster=object$supercluster), 400 | aes(color=supercluster,x=1,y=1)) + 401 | geom_point(shape=15, size=1.5) + 402 | scale_color_manual(values=color.supercluster) + 403 | theme(legend.title=element_blank(), 404 | legend.margin=margin(0,0,0,0), 405 | legend.key.size = unit(0.15,"cm"), 406 | legend.position = c(0.98,1.1), 407 | legend.justification=c(1,1), 408 | legend.direction="horizontal")) 409 | 410 | plots.num <- length(plots) 411 | plots.perPage <- plots.columns*rows.max 412 | plots.pages <- ceiling(plots.num/plots.perPage) 413 | 414 | ## Make a supplementary figure split into pages 415 | for(i in 1:plots.pages){ 416 | start <- (i-1)*plots.perPage+1 417 | end <- i*plots.perPage 418 | end <- min(end,plots.num) 419 | curPlots <- c(start:end) 420 | plots.rows <- ceiling(length(curPlots)/plots.columns) 421 | 422 | curPlots <- cowplot::plot_grid(plotlist=plots[curPlots],ncol=plots.columns, rel_widths=c(1.1,1,1,1,1,1), align="h", axis="tb") 423 | curPlots.layout <- cowplot::plot_grid(NULL, p.legend, curPlots, vjust=-0.5, hjust=panel.label_hjust, label_size=panel.label_size, ncol=1, rel_heights= c(0.5, 1.3, 70/5*plots.rows)) 424 | 425 | png(file=file.path(outdir,paste0("Supplementary Figure 4",LETTERS[i],".png")), 426 | units=figure.unit, 427 | res=figure.resolution, 428 | width=figure.width.full, 429 | height=(2*plots.rows), 430 | antialias=figure.antialias) 431 | 432 | print(curPlots.layout) 433 | 434 | dev.off() 435 | 436 | print(curPlots.layout) 437 | } 438 | ``` 439 | -------------------------------------------------------------------------------- /Cell-number-titration_files/figure-gfm/UMIcountsPerCondition-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/UMIcountsPerCondition-1.png -------------------------------------------------------------------------------- /Cell-number-titration_files/figure-gfm/UMIinExpressingCells-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/UMIinExpressingCells-1.png -------------------------------------------------------------------------------- /Cell-number-titration_files/figure-gfm/figure-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/figure-1.png -------------------------------------------------------------------------------- /Cell-number-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png -------------------------------------------------------------------------------- /Cell-number-titration_files/figure-gfm/suppFig-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/suppFig-1.png -------------------------------------------------------------------------------- /Cell-number-titration_files/figure-gfm/suppFig-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/suppFig-2.png -------------------------------------------------------------------------------- /Cell-number-titration_files/figure-gfm/suppFig1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/suppFig1-1.png -------------------------------------------------------------------------------- /Cell-number-titration_files/figure-gfm/suppFig1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/suppFig1-2.png -------------------------------------------------------------------------------- /Cell-number-titration_files/figure-gfm/tsnePlots-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/tsnePlots-1.png -------------------------------------------------------------------------------- /Cell-number-titration_files/figure-gfm/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /Cell-number-titration_files/figure-gfm/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Cell-number-titration_files/figure-gfm/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/demux-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/demux-1.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/demux-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/demux-2.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/downsample-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/downsample-1.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/dsbnorm-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/dsbnorm-1.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/filter-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/filter-1.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/fineClusters-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/fineClusters-1.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/fineClusters-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/fineClusters-2.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/preprocessRNA-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/preprocessRNA-1.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/preprocessRNA-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/preprocessRNA-2.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/preprocessRNA-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/preprocessRNA-3.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/superclustering-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/superclustering-1.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/superclustering-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/superclustering-2.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-3-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-3-2.png -------------------------------------------------------------------------------- /Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Demux_Preprocess_Downsample_files/figure-gfm/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /Load unfiltered data.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "CITE-seq optimization - Load unfiltered data" 3 | author: "Terkild Brink Buus" 4 | date: "30/3/2020" 5 | output: github_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(warning=FALSE, message=FALSE) 10 | options(stringsAsFactors=FALSE) 11 | ``` 12 | 13 | ## Load utilities 14 | 15 | Including libraries, plotting and color settings and custom utility functions 16 | 17 | ```{r loadLibraries, results='hide', message=FALSE, warning=FALSE} 18 | set.seed(114) 19 | require("Seurat", quietly=T) 20 | require("tidyverse", quietly=T) 21 | library("Matrix", quietly=T) 22 | library("DropletUtils", quietly=T) 23 | 24 | ## Load ggplot theme and defaults 25 | source("R/ggplot_settings.R") 26 | 27 | ## Load helper functions 28 | source("R/Utilities.R") 29 | 30 | read_kallisto_data <- function(file.path){ 31 | ## Load mtx and transpose it 32 | res_mat <- as(t(readMM(file.path(file.path,"cells_x_genes.mtx"))), 'CsparseMatrix') 33 | ## Attach genes 34 | rownames(res_mat) <- read.csv(file.path(file.path,"cells_x_genes.genes.txt"), sep = '\t', header = F)[,1] 35 | ## Attach barcodes 36 | colnames(res_mat) <- read.csv(file.path(file.path,"cells_x_genes.barcodes.txt"), header = F, sep = '\t')[,1] 37 | 38 | return(res_mat) 39 | } 40 | ``` 41 | 42 | ## Set file paths 43 | 44 | How the different aligned and counted read outputs from various algorithms were generated using Snakemake and can be seen in the included [Snakefile](Snakefile) 45 | 46 | ```{r} 47 | data.drive <- "F:/" 48 | data.project.dir <- "Projects/ECCITE-seq/TotalSeqC_TitrationA" 49 | outdir <- "figures" 50 | t2g.file <- file.path(data.drive,data.project.dir,"/kallisto/t2g_cellranger.txt") 51 | kallistobusDir <- file.path(data.drive,data.project.dir,"kallisto/gex/c1/counts_unfiltered") 52 | 53 | ## ADT data 54 | kallistobusDirADT <- file.path(data.drive,data.project.dir,"kallisto/features/A1_S5.ADT_15/counts_unfiltered") 55 | data10XADTDir <- file.path(data.drive,data.project.dir,"cellranger_A1/outs/raw_feature_bc_matrix") 56 | dataCSCADTDir <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT/umi_count") 57 | dataCSCADTDir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT/uncorrected_cells/dense_umis.tsv") 58 | dataCSCADTnocorrectDir <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT_nocorrect/umi_count") 59 | dataCSCADTnocorrectDir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT_nocorrect/uncorrected_cells/dense_umis.tsv") 60 | 61 | ## HTO data 62 | kallistobusDirHTO <- file.path(data.drive,data.project.dir,"kallisto/features/H1_S6.HTO_A_13/counts_unfiltered") 63 | data10XHTODir <- file.path(data.drive,data.project.dir,"cellranger_H1/outs/raw_feature_bc_matrix") 64 | dataCSCHTODir <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO/umi_count") 65 | dataCSCHTODir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO/uncorrected_cells/dense_umis.tsv") 66 | dataCSCHTOnocorrectDir <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO_nocorrect/umi_count") 67 | dataCSCHTOnocorrectDir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO_nocorrect/uncorrected_cells/dense_umis.tsv") 68 | 69 | ## 10X datasets 70 | data.10X.dir <- file.path(data.drive,"data","10XDatasets") 71 | ``` 72 | 73 | # TITRATION DATASET 74 | ## Load GEX data 75 | 76 | From kallisto-bustools output. Modified from 77 | 78 | ```{r loadGEX} 79 | raw_mtx <- read_kallisto_data(kallistobusDir) 80 | 81 | t2g <- unique(read.csv(t2g.file, sep = '\t', header=F)[,2:3]) # load t2g file 82 | t2g <- data.frame(t2g[,2], row.names = t2g[,1]) 83 | gene_sym <- t2g[as.character(rownames(raw_mtx)),1] # get symbols for gene ids 84 | 85 | ## Which rows have same gene symbol (but different Ensembl gene id) 86 | gene_sym.duplicated <- which(gene_sym %in% gene_sym[which(duplicated(gene_sym))]) 87 | 88 | ## Which genes are have duplicated entries 89 | gene_sym.duplicated.unique <- unique(gene_sym[gene_sym.duplicated]) 90 | 91 | ## Make placeholder matrix for duplicate gene symbols 92 | raw_mtx_dedup <- Matrix(data=0,nrow=length(gene_sym.duplicated.unique),ncol=ncol(raw_mtx)) 93 | rownames(raw_mtx_dedup) <- gene_sym.duplicated.unique 94 | colnames(raw_mtx_dedup) <- colnames(raw_mtx) 95 | 96 | ## Combine counts from genes with same gene symbol (but different Ensembl gene id) 97 | for(i in seq_along(gene_sym.duplicated)){ 98 | curGene <- gene_sym[gene_sym.duplicated[i]] 99 | curRow <- gene_sym.duplicated.unique == curGene 100 | raw_mtx_dedup[curRow,] <- raw_mtx_dedup[curRow,] + raw_mtx[gene_sym.duplicated[i],] 101 | } 102 | 103 | ## Merged combined counts duplicate gene symbol with matrix of unique gene symbol counts 104 | raw_mtx <- raw_mtx[-gene_sym.duplicated,] 105 | rownames(raw_mtx) <- gene_sym[-gene_sym.duplicated] 106 | raw_mtx <- rbind(raw_mtx,raw_mtx_dedup) 107 | 108 | tot_counts <- Matrix::colSums(raw_mtx) 109 | bc_rank <- DropletUtils::barcodeRanks(raw_mtx, lower = 10) 110 | gex.inflection <- S4Vectors::metadata(bc_rank)[["inflection"]] 111 | gex.aboveInf <- names(which(tot_counts > gex.inflection)) 112 | 113 | GEX.knee_plot <- knee_plot_auc(bc_rank) 114 | GEX.knee_plot 115 | 116 | kallisto.GEX <- raw_mtx 117 | ``` 118 | 119 | ## Load ADT data from titration dataset 120 | 121 | ```{r loadADT} 122 | ADT.res_mat <- read_kallisto_data(kallistobusDirADT) 123 | 124 | ADT.tot_counts <- Matrix::colSums(ADT.res_mat) 125 | ADT.bc_rank <- DropletUtils::barcodeRanks(ADT.res_mat, lower = 10) 126 | 127 | ADT.knee_plot <- knee_plot_highlight(ADT.bc_rank, highlight=gex.aboveInf) 128 | ADT.knee_plot 129 | 130 | kallisto.ADT <- ADT.res_mat 131 | 132 | cellranger.ADT <- Read10X(data.dir=data10XADTDir) 133 | 134 | CSC.ADT <- Read10X(data.dir=dataCSCADTDir, gene.column=1) 135 | CSC.ADT.dense <- read.table(file=dataCSCADTDir.dense) 136 | CSC.ADT <- cbind(CSC.ADT,CSC.ADT.dense) 137 | CSC.ADT <- CSC.ADT[rownames(CSC.ADT) != "unmapped",] 138 | 139 | CSC.ADT.uncorrected <- Read10X(data.dir=dataCSCADTnocorrectDir, gene.column=1) 140 | CSC.ADT.uncorrected.dense <- read.table(file=dataCSCADTnocorrectDir.dense) 141 | CSC.ADT.uncorrected <- cbind(CSC.ADT.uncorrected,CSC.ADT.uncorrected.dense) 142 | CSC.ADT.uncorrected <- CSC.ADT.uncorrected[rownames(CSC.ADT.uncorrected) != "unmapped",] 143 | 144 | ``` 145 | 146 | ## Load HTO data from titration dataset 147 | 148 | ```{r loadHTO} 149 | HTO.res_mat <- read_kallisto_data(kallistobusDirHTO) 150 | 151 | HTO.tot_counts <- Matrix::colSums(HTO.res_mat) 152 | HTO.bc_rank <- DropletUtils::barcodeRanks(HTO.res_mat, lower = 10) 153 | 154 | HTO.knee_plot <- knee_plot_highlight(HTO.bc_rank, highlight=gex.aboveInf) 155 | HTO.knee_plot 156 | 157 | kallisto.HTO <- HTO.res_mat 158 | 159 | knee_plots_combined <- cowplot::plot_grid(GEX.knee_plot, ADT.knee_plot, HTO.knee_plot, labels=c("mRNA","ADT","HTO"), nrow=1, label_size=panel.label_size-1, vjust=panel.label_vjust, hjust=panel.label_hjust) 160 | 161 | 162 | png(file=file.path(outdir,"Supplementary Figure S5.png"), width=figure.width.full, height=2.5, units=figure.unit, res=figure.resolution, antialias=figure.antialias) 163 | knee_plots_combined 164 | dev.off() 165 | 166 | cellranger.HTO <- Read10X(data.dir=data10XHTODir) 167 | 168 | CSC.HTO <- Read10X(data.dir=dataCSCHTODir, gene.column=1) 169 | CSC.HTO.dense <- read.table(file=dataCSCHTODir.dense) 170 | CSC.HTO <- cbind(CSC.HTO,CSC.HTO.dense) 171 | CSC.HTO <- CSC.HTO[rownames(CSC.HTO) != "unmapped",] 172 | 173 | CSC.HTO.uncorrected <- Read10X(data.dir=dataCSCHTOnocorrectDir, gene.column=1) 174 | CSC.HTO.uncorrected.dense <- read.table(file=dataCSCHTOnocorrectDir.dense) 175 | CSC.HTO.uncorrected <- cbind(CSC.HTO.uncorrected,CSC.HTO.uncorrected.dense) 176 | CSC.HTO.uncorrected <- CSC.HTO.uncorrected[rownames(CSC.HTO.uncorrected) != "unmapped",] 177 | 178 | ``` 179 | 180 | # 10X DATASETS 181 | ## Load GEX data from 10X datasets 182 | 183 | ```{r} 184 | data.10X.datasets <- c("PBMC_1k_GEXFeature_v3","PBMC_10k_GEXFeature_v3","PBMC_GEXFeatureVDJ_v1") 185 | data.10X.datasets.dir <- file.path(data.10X.dir,data.10X.datasets) 186 | names(data.10X.datasets.dir) <- data.10X.datasets 187 | 188 | ## Load gene expression data to distinguish cell-containing droplets from empty-droplets 189 | data.10X.datasets.gex.dir <- file.path(data.10X.datasets.dir,"raw_feature_bc_matrix") 190 | names(data.10X.datasets.gex.dir) <- data.10X.datasets 191 | data.10X.datasets.gex <- lapply(data.10X.datasets.gex.dir, function(dir)Read10X(data.dir=dir)$`Gene Expression`) 192 | data.10X.datasets.gex.bc_rank <- lapply(data.10X.datasets.gex,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10)) 193 | data.10X.datasets.gex.tot_counts <- lapply(data.10X.datasets.gex,function(raw_mtx)tot_counts <- Matrix::colSums(raw_mtx)) 194 | 195 | ## Get inflection points 196 | data.10X.datasets.gex.inflection <- sapply(data.10X.datasets.gex.bc_rank,function(bc_rank)S4Vectors::metadata(bc_rank)[["inflection"]]) 197 | 198 | ## Extract barcodes above inflection point 199 | data.10X.datasets.gex.aboveInf <- sapply(data.10X.datasets, function(dataset)names(which(data.10X.datasets.gex.tot_counts[[dataset]] > data.10X.datasets.gex.inflection[dataset]))) 200 | 201 | data.10X.datasets.gex.aboveInf.index <- sapply(data.10X.datasets, function(dataset)which(data.10X.datasets.gex.tot_counts[[dataset]] > data.10X.datasets.gex.inflection[dataset])) 202 | 203 | identical(colnames(data.10X.datasets.gex[[1]])[data.10X.datasets.gex.aboveInf.index[[1]]],data.10X.datasets.gex.aboveInf[[1]]) 204 | 205 | sapply(data.10X.datasets.gex.aboveInf,length) 206 | 207 | ## Draw knee plots 208 | data.10X.datasets.knee_plots <- lapply(data.10X.datasets.gex.bc_rank,function(x)knee_plot_auc(x)+theme(legend.position="none")) 209 | 210 | rm(data.10X.datasets.gex) 211 | rm(data.10X.datasets.gex.bc_rank) 212 | rm(data.10X.datasets.gex.tot_counts) 213 | 214 | ``` 215 | 216 | 217 | ## Load Kallisto ADT data 218 | 219 | 10Xv3 chemestry needs to translate feature barcodes to GEX barcodes to be compatible. Traslation matrix can be downloaded here: https://github.com/10XGenomics/cellranger/blob/master/lib/python/cellranger/barcodes/translation/3M-february-2018.txt.gz 220 | 221 | ```{r} 222 | ## Translate V3 feature barcodes into cell barcodes for using 223 | translateV3 <- read.table("F:/data/10XDatasets/10xv3_feature_to_gex_barcode_translation.txt", header=FALSE) 224 | translateV3.names <- translateV3[,1] 225 | translateV3 <- translateV3[,2] 226 | names(translateV3) <- translateV3.names 227 | 228 | data.10X.datasets.adt.kallisto.dir <- sapply(data.10X.datasets.dir,function(datasetDir)dir(path=file.path(datasetDir,"kallisto","features"), pattern="counts_unfiltered", recursive=TRUE, full.names=TRUE, include.dirs=TRUE)) 229 | 230 | data.10X.datasets.adt.kallisto <- lapply(data.10X.datasets.adt.kallisto.dir,function(dir)read_kallisto_data(dir)) 231 | 232 | lapply(data.10X.datasets.adt.kallisto,dim) 233 | 234 | data.10X.datasets.adt.kallisto[grep("_v3$",data.10X.datasets)] <- lapply(data.10X.datasets.adt.kallisto[grep("_v3$",data.10X.datasets)],function(data){colnames(data) <- translateV3[colnames(data)]; return(data)}) 235 | 236 | data.10X.datasets.adt.kallisto.bc_rank <- lapply(data.10X.datasets.adt.kallisto,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10)) 237 | 238 | adt.kallist.aboveInf <- names(which(Matrix::colSums(data.10X.datasets.adt.kallisto[[1]]) > S4Vectors::metadata(data.10X.datasets.adt.kallisto.bc_rank[[1]])[["inflection"]])) 239 | 240 | knee_plots.adt.kallisto <- lapply(data.10X.datasets.adt.kallisto.bc_rank,knee_plot) 241 | cowplot::plot_grid(plotlist=knee_plots.adt.kallisto, nrow=1) 242 | ``` 243 | 244 | ## Load CSC ADT data 245 | 246 | ```{r} 247 | data.10X.datasets.adt.csc.dir <- sapply(data.10X.datasets.dir,function(datasetDir)dir(path=file.path(datasetDir,"cite-seq-count"), pattern="umi_count", recursive=TRUE, full.names=TRUE, include.dirs=TRUE)) 248 | 249 | data.10X.datasets.adt.csc.dense.dir <- sapply(data.10X.datasets.dir,function(datasetDir)list.files(path=file.path(datasetDir,"cite-seq-count"), pattern="dense_umis.tsv", recursive=TRUE, full.names=TRUE, include.dirs=FALSE)) 250 | 251 | data.10X.datasets.adt.csc_nc.dir <- grep("_nocorrect",data.10X.datasets.adt.csc.dir, value=TRUE) 252 | data.10X.datasets.adt.csc_nc.dense.dir <- grep("_nocorrect",data.10X.datasets.adt.csc.dense.dir, value=TRUE) 253 | names(data.10X.datasets.adt.csc_nc.dir) <- data.10X.datasets 254 | names(data.10X.datasets.adt.csc_nc.dense.dir) <- data.10X.datasets 255 | 256 | data.10X.datasets.adt.csc.dir <- setdiff(data.10X.datasets.adt.csc.dir, data.10X.datasets.adt.csc_nc.dir) 257 | data.10X.datasets.adt.csc.dense.dir <- setdiff(data.10X.datasets.adt.csc.dense.dir, data.10X.datasets.adt.csc_nc.dense.dir) 258 | names(data.10X.datasets.adt.csc.dir) <- data.10X.datasets 259 | names(data.10X.datasets.adt.csc.dense.dir) <- data.10X.datasets 260 | 261 | data.10X.datasets.adt.csc <- lapply(data.10X.datasets.adt.csc.dir,function(dir)Read10X(data.dir=dir, gene.column=1)) 262 | data.10X.datasets.adt.csc.dense <- lapply(data.10X.datasets.adt.csc.dense.dir,function(dir)read.table(file=dir)) 263 | data.10X.datasets.adt.csc <- lapply(data.10X.datasets,function(dataset)Matrix::cbind2(data.10X.datasets.adt.csc[[dataset]],Matrix::Matrix(as.matrix(data.10X.datasets.adt.csc.dense[[dataset]])))) 264 | names(data.10X.datasets.adt.csc) <- data.10X.datasets 265 | data.10X.datasets.adt.csc <- lapply(data.10X.datasets.adt.csc,function(data)data[rownames(data) != "unmapped",]) 266 | 267 | data.10X.datasets.adt.csc[grep("_v3$",data.10X.datasets)] <- lapply(data.10X.datasets.adt.csc[grep("_v3$",data.10X.datasets)],function(data){colnames(data) <- translateV3[colnames(data)]; return(data)}) 268 | 269 | data.10X.datasets.adt.csc_nc <- lapply(data.10X.datasets.adt.csc_nc.dir,function(dir)Read10X(data.dir=dir, gene.column=1)) 270 | data.10X.datasets.adt.csc_nc.dense <- lapply(data.10X.datasets.adt.csc_nc.dense.dir,function(dir)read.table(file=dir)) 271 | data.10X.datasets.adt.csc_nc <- lapply(data.10X.datasets,function(dataset)Matrix::cbind2(data.10X.datasets.adt.csc_nc[[dataset]],Matrix::Matrix(as.matrix(data.10X.datasets.adt.csc_nc.dense[[dataset]])))) 272 | names(data.10X.datasets.adt.csc_nc) <- data.10X.datasets 273 | data.10X.datasets.adt.csc_nc <- lapply(data.10X.datasets.adt.csc_nc,function(data)data[rownames(data) != "unmapped",]) 274 | 275 | data.10X.datasets.adt.csc_nc[grep("_v3$",data.10X.datasets)] <- lapply(data.10X.datasets.adt.csc_nc[grep("_v3$",data.10X.datasets)],function(data){colnames(data) <- translateV3[colnames(data)]; return(data)}) 276 | 277 | data.10X.datasets.adt.csc.bc_rank <- lapply(data.10X.datasets.adt.csc,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10)) 278 | 279 | knee_plots.adt.csc <- lapply(data.10X.datasets.adt.csc.bc_rank,knee_plot) 280 | cowplot::plot_grid(plotlist=knee_plots.adt.csc, nrow=1) 281 | ``` 282 | 283 | 284 | ## Load CellRanger featureOnly ADT data 285 | 286 | ```{r} 287 | data.10X.datasets.adt.cellranger.dir <- sapply(data.10X.datasets.dir,function(datasetDir)dir(path=file.path(datasetDir), pattern="raw_feature_bc_matrix", recursive=TRUE, full.names=TRUE, include.dirs=TRUE)[1]) 288 | 289 | data.10X.datasets.adt.cellranger <- lapply(data.10X.datasets.adt.cellranger.dir,function(dir)Read10X(dir)) 290 | lapply(data.10X.datasets.adt.cellranger,dim) 291 | 292 | data.10X.datasets.adt.cellranger.bc_rank <- lapply(data.10X.datasets.adt.cellranger,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10)) 293 | 294 | knee_plots.adt.cellranger <- lapply(data.10X.datasets.adt.cellranger.bc_rank,knee_plot) 295 | cowplot::plot_grid(plotlist=knee_plots.adt.cellranger, nrow=1) 296 | 297 | cowplot::plot_grid(plotlist=c(knee_plots.adt.cellranger,knee_plots.adt.csc,knee_plots.adt.kallisto), labels=data.10X.datasets, nrow=3) 298 | ``` 299 | 300 | ## Save data 301 | 302 | ```{r} 303 | 304 | save(file="data/data.10X.datasets.Rdata", 305 | data.10X.datasets, 306 | data.10X.datasets.adt.kallisto, 307 | data.10X.datasets.adt.csc, 308 | data.10X.datasets.adt.csc_nc, 309 | data.10X.datasets.adt.cellranger, 310 | data.10X.datasets.gex.aboveInf, 311 | data.10X.datasets.knee_plots) 312 | 313 | save(file="data/data.HTO.Rdata", 314 | kallisto.HTO, 315 | cellranger.HTO, 316 | CSC.HTO, 317 | CSC.HTO.uncorrected, 318 | gex.aboveInf) 319 | 320 | save(file="data/data.ADT.Rdata", 321 | kallisto.ADT, 322 | cellranger.ADT, 323 | CSC.ADT, 324 | CSC.ADT.uncorrected, 325 | gex.aboveInf) 326 | 327 | ``` 328 | -------------------------------------------------------------------------------- /Load-unfiltered-data.md: -------------------------------------------------------------------------------- 1 | CITE-seq optimization - Load unfiltered data 2 | ================ 3 | Terkild Brink Buus 4 | 30/3/2020 5 | 6 | ## Load utilities 7 | 8 | Including libraries, plotting and color settings and custom utility 9 | functions 10 | 11 | ``` r 12 | set.seed(114) 13 | require("Seurat", quietly=T) 14 | require("tidyverse", quietly=T) 15 | library("Matrix", quietly=T) 16 | library("DropletUtils", quietly=T) 17 | 18 | ## Load ggplot theme and defaults 19 | source("R/ggplot_settings.R") 20 | 21 | ## Load helper functions 22 | source("R/Utilities.R") 23 | 24 | read_kallisto_data <- function(file.path){ 25 | ## Load mtx and transpose it 26 | res_mat <- as(t(readMM(file.path(file.path,"cells_x_genes.mtx"))), 'CsparseMatrix') 27 | ## Attach genes 28 | rownames(res_mat) <- read.csv(file.path(file.path,"cells_x_genes.genes.txt"), sep = '\t', header = F)[,1] 29 | ## Attach barcodes 30 | colnames(res_mat) <- read.csv(file.path(file.path,"cells_x_genes.barcodes.txt"), header = F, sep = '\t')[,1] 31 | 32 | return(res_mat) 33 | } 34 | ``` 35 | 36 | ## Set file paths 37 | 38 | How the different aligned and counted read outputs from various 39 | algorithms were generated using Snakemake and can be seen in the 40 | included [Snakefile](Snakefile) 41 | 42 | ``` r 43 | data.drive <- "F:/" 44 | data.project.dir <- "Projects/ECCITE-seq/TotalSeqC_TitrationA" 45 | outdir <- "figures" 46 | t2g.file <- file.path(data.drive,data.project.dir,"/kallisto/t2g_cellranger.txt") 47 | kallistobusDir <- file.path(data.drive,data.project.dir,"kallisto/gex/c1/counts_unfiltered") 48 | 49 | ## ADT data 50 | kallistobusDirADT <- file.path(data.drive,data.project.dir,"kallisto/features/A1_S5.ADT_15/counts_unfiltered") 51 | data10XADTDir <- file.path(data.drive,data.project.dir,"cellranger_A1/outs/raw_feature_bc_matrix") 52 | dataCSCADTDir <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT/umi_count") 53 | dataCSCADTDir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT/uncorrected_cells/dense_umis.tsv") 54 | dataCSCADTnocorrectDir <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT_nocorrect/umi_count") 55 | dataCSCADTnocorrectDir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT_nocorrect/uncorrected_cells/dense_umis.tsv") 56 | 57 | ## HTO data 58 | kallistobusDirHTO <- file.path(data.drive,data.project.dir,"kallisto/features/H1_S6.HTO_A_13/counts_unfiltered") 59 | data10XHTODir <- file.path(data.drive,data.project.dir,"cellranger_H1/outs/raw_feature_bc_matrix") 60 | dataCSCHTODir <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO/umi_count") 61 | dataCSCHTODir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO/uncorrected_cells/dense_umis.tsv") 62 | dataCSCHTOnocorrectDir <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO_nocorrect/umi_count") 63 | dataCSCHTOnocorrectDir.dense <- file.path(data.drive,data.project.dir,"cite-seq-count/H1_S6_d1_HTO_nocorrect/uncorrected_cells/dense_umis.tsv") 64 | 65 | ## 10X datasets 66 | data.10X.dir <- file.path(data.drive,"data","10XDatasets") 67 | ``` 68 | 69 | # TITRATION DATASET 70 | 71 | ## Load GEX data 72 | 73 | From kallisto-bustools output. Modified from 74 | 75 | 76 | ``` r 77 | raw_mtx <- read_kallisto_data(kallistobusDir) 78 | 79 | t2g <- unique(read.csv(t2g.file, sep = '\t', header=F)[,2:3]) # load t2g file 80 | t2g <- data.frame(t2g[,2], row.names = t2g[,1]) 81 | gene_sym <- t2g[as.character(rownames(raw_mtx)),1] # get symbols for gene ids 82 | 83 | ## Which rows have same gene symbol (but different Ensembl gene id) 84 | gene_sym.duplicated <- which(gene_sym %in% gene_sym[which(duplicated(gene_sym))]) 85 | 86 | ## Which genes are have duplicated entries 87 | gene_sym.duplicated.unique <- unique(gene_sym[gene_sym.duplicated]) 88 | 89 | ## Make placeholder matrix for duplicate gene symbols 90 | raw_mtx_dedup <- Matrix(data=0,nrow=length(gene_sym.duplicated.unique),ncol=ncol(raw_mtx)) 91 | rownames(raw_mtx_dedup) <- gene_sym.duplicated.unique 92 | colnames(raw_mtx_dedup) <- colnames(raw_mtx) 93 | 94 | ## Combine counts from genes with same gene symbol (but different Ensembl gene id) 95 | for(i in seq_along(gene_sym.duplicated)){ 96 | curGene <- gene_sym[gene_sym.duplicated[i]] 97 | curRow <- gene_sym.duplicated.unique == curGene 98 | raw_mtx_dedup[curRow,] <- raw_mtx_dedup[curRow,] + raw_mtx[gene_sym.duplicated[i],] 99 | } 100 | 101 | ## Merged combined counts duplicate gene symbol with matrix of unique gene symbol counts 102 | raw_mtx <- raw_mtx[-gene_sym.duplicated,] 103 | rownames(raw_mtx) <- gene_sym[-gene_sym.duplicated] 104 | raw_mtx <- rbind(raw_mtx,raw_mtx_dedup) 105 | 106 | tot_counts <- Matrix::colSums(raw_mtx) 107 | bc_rank <- DropletUtils::barcodeRanks(raw_mtx, lower = 10) 108 | gex.inflection <- S4Vectors::metadata(bc_rank)[["inflection"]] 109 | gex.aboveInf <- names(which(tot_counts > gex.inflection)) 110 | 111 | GEX.knee_plot <- knee_plot_auc(bc_rank) 112 | GEX.knee_plot 113 | ``` 114 | 115 | ![](Load-unfiltered-data_files/figure-gfm/loadGEX-1.png) 116 | 117 | ``` r 118 | kallisto.GEX <- raw_mtx 119 | ``` 120 | 121 | ## Load ADT data from titration dataset 122 | 123 | ``` r 124 | ADT.res_mat <- read_kallisto_data(kallistobusDirADT) 125 | 126 | ADT.tot_counts <- Matrix::colSums(ADT.res_mat) 127 | ADT.bc_rank <- DropletUtils::barcodeRanks(ADT.res_mat, lower = 10) 128 | 129 | ADT.knee_plot <- knee_plot_highlight(ADT.bc_rank, highlight=gex.aboveInf) 130 | ADT.knee_plot 131 | ``` 132 | 133 | ![](Load-unfiltered-data_files/figure-gfm/loadADT-1.png) 134 | 135 | ``` r 136 | kallisto.ADT <- ADT.res_mat 137 | 138 | cellranger.ADT <- Read10X(data.dir=data10XADTDir) 139 | 140 | CSC.ADT <- Read10X(data.dir=dataCSCADTDir, gene.column=1) 141 | CSC.ADT.dense <- read.table(file=dataCSCADTDir.dense) 142 | CSC.ADT <- cbind(CSC.ADT,CSC.ADT.dense) 143 | CSC.ADT <- CSC.ADT[rownames(CSC.ADT) != "unmapped",] 144 | 145 | CSC.ADT.uncorrected <- Read10X(data.dir=dataCSCADTnocorrectDir, gene.column=1) 146 | CSC.ADT.uncorrected.dense <- read.table(file=dataCSCADTnocorrectDir.dense) 147 | CSC.ADT.uncorrected <- cbind(CSC.ADT.uncorrected,CSC.ADT.uncorrected.dense) 148 | CSC.ADT.uncorrected <- CSC.ADT.uncorrected[rownames(CSC.ADT.uncorrected) != "unmapped",] 149 | ``` 150 | 151 | ## Load HTO data from titration dataset 152 | 153 | ``` r 154 | HTO.res_mat <- read_kallisto_data(kallistobusDirHTO) 155 | 156 | HTO.tot_counts <- Matrix::colSums(HTO.res_mat) 157 | HTO.bc_rank <- DropletUtils::barcodeRanks(HTO.res_mat, lower = 10) 158 | 159 | HTO.knee_plot <- knee_plot_highlight(HTO.bc_rank, highlight=gex.aboveInf) 160 | HTO.knee_plot 161 | ``` 162 | 163 | ![](Load-unfiltered-data_files/figure-gfm/loadHTO-1.png) 164 | 165 | ``` r 166 | kallisto.HTO <- HTO.res_mat 167 | 168 | knee_plots_combined <- cowplot::plot_grid(GEX.knee_plot, ADT.knee_plot, HTO.knee_plot, labels=c("mRNA","ADT","HTO"), nrow=1, label_size=panel.label_size-1, vjust=panel.label_vjust, hjust=panel.label_hjust) 169 | 170 | 171 | png(file=file.path(outdir,"Supplementary Figure S4.png"), width=figure.width.full, height=2.5, units=figure.unit, res=figure.resolution, antialias=figure.antialias) 172 | knee_plots_combined 173 | dev.off() 174 | ``` 175 | 176 | ## png 177 | ## 2 178 | 179 | ``` r 180 | cellranger.HTO <- Read10X(data.dir=data10XHTODir) 181 | 182 | CSC.HTO <- Read10X(data.dir=dataCSCHTODir, gene.column=1) 183 | CSC.HTO.dense <- read.table(file=dataCSCHTODir.dense) 184 | CSC.HTO <- cbind(CSC.HTO,CSC.HTO.dense) 185 | CSC.HTO <- CSC.HTO[rownames(CSC.HTO) != "unmapped",] 186 | 187 | CSC.HTO.uncorrected <- Read10X(data.dir=dataCSCHTOnocorrectDir, gene.column=1) 188 | CSC.HTO.uncorrected.dense <- read.table(file=dataCSCHTOnocorrectDir.dense) 189 | CSC.HTO.uncorrected <- cbind(CSC.HTO.uncorrected,CSC.HTO.uncorrected.dense) 190 | CSC.HTO.uncorrected <- CSC.HTO.uncorrected[rownames(CSC.HTO.uncorrected) != "unmapped",] 191 | ``` 192 | 193 | # 10X DATASETS 194 | 195 | ## Load GEX data from 10X datasets 196 | 197 | ``` r 198 | data.10X.datasets <- c("PBMC_1k_GEXFeature_v3","PBMC_10k_GEXFeature_v3","PBMC_GEXFeatureVDJ_v1") 199 | data.10X.datasets.dir <- file.path(data.10X.dir,data.10X.datasets) 200 | names(data.10X.datasets.dir) <- data.10X.datasets 201 | 202 | ## Load gene expression data to distinguish cell-containing droplets from empty-droplets 203 | data.10X.datasets.gex.dir <- file.path(data.10X.datasets.dir,"raw_feature_bc_matrix") 204 | names(data.10X.datasets.gex.dir) <- data.10X.datasets 205 | data.10X.datasets.gex <- lapply(data.10X.datasets.gex.dir, function(dir)Read10X(data.dir=dir)$`Gene Expression`) 206 | data.10X.datasets.gex.bc_rank <- lapply(data.10X.datasets.gex,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10)) 207 | data.10X.datasets.gex.tot_counts <- lapply(data.10X.datasets.gex,function(raw_mtx)tot_counts <- Matrix::colSums(raw_mtx)) 208 | 209 | ## Get inflection points 210 | data.10X.datasets.gex.inflection <- sapply(data.10X.datasets.gex.bc_rank,function(bc_rank)S4Vectors::metadata(bc_rank)[["inflection"]]) 211 | 212 | ## Extract barcodes above inflection point 213 | data.10X.datasets.gex.aboveInf <- sapply(data.10X.datasets, function(dataset)names(which(data.10X.datasets.gex.tot_counts[[dataset]] > data.10X.datasets.gex.inflection[dataset]))) 214 | 215 | data.10X.datasets.gex.aboveInf.index <- sapply(data.10X.datasets, function(dataset)which(data.10X.datasets.gex.tot_counts[[dataset]] > data.10X.datasets.gex.inflection[dataset])) 216 | 217 | identical(colnames(data.10X.datasets.gex[[1]])[data.10X.datasets.gex.aboveInf.index[[1]]],data.10X.datasets.gex.aboveInf[[1]]) 218 | ``` 219 | 220 | ## [1] TRUE 221 | 222 | ``` r 223 | sapply(data.10X.datasets.gex.aboveInf,length) 224 | ``` 225 | 226 | ## PBMC_1k_GEXFeature_v3 PBMC_10k_GEXFeature_v3 PBMC_GEXFeatureVDJ_v1 227 | ## 740 7960 7627 228 | 229 | ``` r 230 | ## Draw knee plots 231 | data.10X.datasets.knee_plots <- lapply(data.10X.datasets.gex.bc_rank,function(x)knee_plot_auc(x)+theme(legend.position="none")) 232 | 233 | rm(data.10X.datasets.gex) 234 | rm(data.10X.datasets.gex.bc_rank) 235 | rm(data.10X.datasets.gex.tot_counts) 236 | ``` 237 | 238 | ## Load Kallisto ADT data 239 | 240 | 10Xv3 chemestry needs to translate feature barcodes to GEX barcodes to 241 | be compatible. Traslation matrix can be downloaded here: 242 | 243 | 244 | ``` r 245 | ## Translate V3 feature barcodes into cell barcodes for using 246 | translateV3 <- read.table("F:/data/10XDatasets/10xv3_feature_to_gex_barcode_translation.txt", header=FALSE) 247 | translateV3.names <- translateV3[,1] 248 | translateV3 <- translateV3[,2] 249 | names(translateV3) <- translateV3.names 250 | 251 | data.10X.datasets.adt.kallisto.dir <- sapply(data.10X.datasets.dir,function(datasetDir)dir(path=file.path(datasetDir,"kallisto","features"), pattern="counts_unfiltered", recursive=TRUE, full.names=TRUE, include.dirs=TRUE)) 252 | 253 | data.10X.datasets.adt.kallisto <- lapply(data.10X.datasets.adt.kallisto.dir,function(dir)read_kallisto_data(dir)) 254 | 255 | lapply(data.10X.datasets.adt.kallisto,dim) 256 | ``` 257 | 258 | ## $PBMC_1k_GEXFeature_v3 259 | ## [1] 17 124716 260 | ## 261 | ## $PBMC_10k_GEXFeature_v3 262 | ## [1] 17 674603 263 | ## 264 | ## $PBMC_GEXFeatureVDJ_v1 265 | ## [1] 17 734856 266 | 267 | ``` r 268 | data.10X.datasets.adt.kallisto[grep("_v3$",data.10X.datasets)] <- lapply(data.10X.datasets.adt.kallisto[grep("_v3$",data.10X.datasets)],function(data){colnames(data) <- translateV3[colnames(data)]; return(data)}) 269 | 270 | data.10X.datasets.adt.kallisto.bc_rank <- lapply(data.10X.datasets.adt.kallisto,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10)) 271 | 272 | adt.kallist.aboveInf <- names(which(Matrix::colSums(data.10X.datasets.adt.kallisto[[1]]) > S4Vectors::metadata(data.10X.datasets.adt.kallisto.bc_rank[[1]])[["inflection"]])) 273 | 274 | knee_plots.adt.kallisto <- lapply(data.10X.datasets.adt.kallisto.bc_rank,knee_plot) 275 | cowplot::plot_grid(plotlist=knee_plots.adt.kallisto, nrow=1) 276 | ``` 277 | 278 | ![](Load-unfiltered-data_files/figure-gfm/unnamed-chunk-3-1.png) 279 | 280 | ## Load CSC ADT data 281 | 282 | ``` r 283 | data.10X.datasets.adt.csc.dir <- sapply(data.10X.datasets.dir,function(datasetDir)dir(path=file.path(datasetDir,"cite-seq-count"), pattern="umi_count", recursive=TRUE, full.names=TRUE, include.dirs=TRUE)) 284 | 285 | data.10X.datasets.adt.csc.dense.dir <- sapply(data.10X.datasets.dir,function(datasetDir)list.files(path=file.path(datasetDir,"cite-seq-count"), pattern="dense_umis.tsv", recursive=TRUE, full.names=TRUE, include.dirs=FALSE)) 286 | 287 | data.10X.datasets.adt.csc_nc.dir <- grep("_nocorrect",data.10X.datasets.adt.csc.dir, value=TRUE) 288 | data.10X.datasets.adt.csc_nc.dense.dir <- grep("_nocorrect",data.10X.datasets.adt.csc.dense.dir, value=TRUE) 289 | names(data.10X.datasets.adt.csc_nc.dir) <- data.10X.datasets 290 | names(data.10X.datasets.adt.csc_nc.dense.dir) <- data.10X.datasets 291 | 292 | data.10X.datasets.adt.csc.dir <- setdiff(data.10X.datasets.adt.csc.dir, data.10X.datasets.adt.csc_nc.dir) 293 | data.10X.datasets.adt.csc.dense.dir <- setdiff(data.10X.datasets.adt.csc.dense.dir, data.10X.datasets.adt.csc_nc.dense.dir) 294 | names(data.10X.datasets.adt.csc.dir) <- data.10X.datasets 295 | names(data.10X.datasets.adt.csc.dense.dir) <- data.10X.datasets 296 | 297 | data.10X.datasets.adt.csc <- lapply(data.10X.datasets.adt.csc.dir,function(dir)Read10X(data.dir=dir, gene.column=1)) 298 | data.10X.datasets.adt.csc.dense <- lapply(data.10X.datasets.adt.csc.dense.dir,function(dir)read.table(file=dir)) 299 | data.10X.datasets.adt.csc <- lapply(data.10X.datasets,function(dataset)Matrix::cbind2(data.10X.datasets.adt.csc[[dataset]],Matrix::Matrix(as.matrix(data.10X.datasets.adt.csc.dense[[dataset]])))) 300 | names(data.10X.datasets.adt.csc) <- data.10X.datasets 301 | data.10X.datasets.adt.csc <- lapply(data.10X.datasets.adt.csc,function(data)data[rownames(data) != "unmapped",]) 302 | 303 | data.10X.datasets.adt.csc[grep("_v3$",data.10X.datasets)] <- lapply(data.10X.datasets.adt.csc[grep("_v3$",data.10X.datasets)],function(data){colnames(data) <- translateV3[colnames(data)]; return(data)}) 304 | 305 | data.10X.datasets.adt.csc_nc <- lapply(data.10X.datasets.adt.csc_nc.dir,function(dir)Read10X(data.dir=dir, gene.column=1)) 306 | data.10X.datasets.adt.csc_nc.dense <- lapply(data.10X.datasets.adt.csc_nc.dense.dir,function(dir)read.table(file=dir)) 307 | data.10X.datasets.adt.csc_nc <- lapply(data.10X.datasets,function(dataset)Matrix::cbind2(data.10X.datasets.adt.csc_nc[[dataset]],Matrix::Matrix(as.matrix(data.10X.datasets.adt.csc_nc.dense[[dataset]])))) 308 | names(data.10X.datasets.adt.csc_nc) <- data.10X.datasets 309 | data.10X.datasets.adt.csc_nc <- lapply(data.10X.datasets.adt.csc_nc,function(data)data[rownames(data) != "unmapped",]) 310 | 311 | data.10X.datasets.adt.csc_nc[grep("_v3$",data.10X.datasets)] <- lapply(data.10X.datasets.adt.csc_nc[grep("_v3$",data.10X.datasets)],function(data){colnames(data) <- translateV3[colnames(data)]; return(data)}) 312 | 313 | data.10X.datasets.adt.csc.bc_rank <- lapply(data.10X.datasets.adt.csc,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10)) 314 | 315 | knee_plots.adt.csc <- lapply(data.10X.datasets.adt.csc.bc_rank,knee_plot) 316 | cowplot::plot_grid(plotlist=knee_plots.adt.csc, nrow=1) 317 | ``` 318 | 319 | ![](Load-unfiltered-data_files/figure-gfm/unnamed-chunk-4-1.png) 320 | 321 | ## Load CellRanger featureOnly ADT data 322 | 323 | ``` r 324 | data.10X.datasets.adt.cellranger.dir <- sapply(data.10X.datasets.dir,function(datasetDir)dir(path=file.path(datasetDir), pattern="raw_feature_bc_matrix", recursive=TRUE, full.names=TRUE, include.dirs=TRUE)[1]) 325 | 326 | data.10X.datasets.adt.cellranger <- lapply(data.10X.datasets.adt.cellranger.dir,function(dir)Read10X(dir)) 327 | lapply(data.10X.datasets.adt.cellranger,dim) 328 | ``` 329 | 330 | ## $PBMC_1k_GEXFeature_v3 331 | ## [1] 17 6794880 332 | ## 333 | ## $PBMC_10k_GEXFeature_v3 334 | ## [1] 17 6794880 335 | ## 336 | ## $PBMC_GEXFeatureVDJ_v1 337 | ## [1] 17 737280 338 | 339 | ``` r 340 | data.10X.datasets.adt.cellranger.bc_rank <- lapply(data.10X.datasets.adt.cellranger,function(raw_mtx)DropletUtils::barcodeRanks(raw_mtx, lower = 10)) 341 | 342 | knee_plots.adt.cellranger <- lapply(data.10X.datasets.adt.cellranger.bc_rank,knee_plot) 343 | cowplot::plot_grid(plotlist=knee_plots.adt.cellranger, nrow=1) 344 | ``` 345 | 346 | ![](Load-unfiltered-data_files/figure-gfm/unnamed-chunk-5-1.png) 347 | 348 | ``` r 349 | cowplot::plot_grid(plotlist=c(knee_plots.adt.cellranger,knee_plots.adt.csc,knee_plots.adt.kallisto), labels=data.10X.datasets, nrow=3) 350 | ``` 351 | 352 | ![](Load-unfiltered-data_files/figure-gfm/unnamed-chunk-5-2.png) 353 | 354 | ## Save data 355 | 356 | ``` r 357 | save(file="data/data.10X.datasets.Rdata", 358 | data.10X.datasets, 359 | data.10X.datasets.adt.kallisto, 360 | data.10X.datasets.adt.csc, 361 | data.10X.datasets.adt.csc_nc, 362 | data.10X.datasets.adt.cellranger, 363 | data.10X.datasets.gex.aboveInf, 364 | data.10X.datasets.knee_plots) 365 | 366 | save(file="data/data.HTO.Rdata", 367 | kallisto.HTO, 368 | cellranger.HTO, 369 | CSC.HTO, 370 | CSC.HTO.uncorrected, 371 | gex.aboveInf) 372 | 373 | save(file="data/data.ADT.Rdata", 374 | kallisto.ADT, 375 | cellranger.ADT, 376 | CSC.ADT, 377 | CSC.ADT.uncorrected, 378 | gex.aboveInf) 379 | ``` 380 | -------------------------------------------------------------------------------- /Load-unfiltered-data_files/figure-gfm/loadADT-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Load-unfiltered-data_files/figure-gfm/loadADT-1.png -------------------------------------------------------------------------------- /Load-unfiltered-data_files/figure-gfm/loadGEX-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Load-unfiltered-data_files/figure-gfm/loadGEX-1.png -------------------------------------------------------------------------------- /Load-unfiltered-data_files/figure-gfm/loadHTO-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Load-unfiltered-data_files/figure-gfm/loadHTO-1.png -------------------------------------------------------------------------------- /Load-unfiltered-data_files/figure-gfm/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Load-unfiltered-data_files/figure-gfm/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /Load-unfiltered-data_files/figure-gfm/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Load-unfiltered-data_files/figure-gfm/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /Load-unfiltered-data_files/figure-gfm/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Load-unfiltered-data_files/figure-gfm/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /Load-unfiltered-data_files/figure-gfm/unnamed-chunk-5-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Load-unfiltered-data_files/figure-gfm/unnamed-chunk-5-2.png -------------------------------------------------------------------------------- /R/Utilities.R: -------------------------------------------------------------------------------- 1 | #' Knee plot for filtering empty droplets 2 | #' 3 | #' Visualizes the inflection point to filter empty droplets. This function plots 4 | #' different datasets with a different color. 5 | #' 6 | #' @param bc_rank A `DataFrame` output from `DropletUtil::barcodeRanks`. 7 | #' @return A ggplot2 object. 8 | #' 9 | knee_plot <- function(bc_rank) { 10 | library("ggplot2") 11 | knee_plt <- tibble(rank = bc_rank[["rank"]], 12 | total = bc_rank[["total"]]) %>% 13 | distinct() %>% 14 | dplyr::filter(total > 0) 15 | annot <- tibble(inflection = S4Vectors::metadata(bc_rank)[["inflection"]], 16 | rank_cutoff = max(bc_rank$rank[bc_rank$total > S4Vectors::metadata(bc_rank)[["inflection"]]]), 17 | knee = S4Vectors::metadata(bc_rank)[["knee"]], 18 | knee_cutoff = max(bc_rank$rank[bc_rank$total > S4Vectors::metadata(bc_rank)[["knee"]]])) 19 | p <- ggplot(knee_plt, aes(total, rank)) + 20 | geom_line() + 21 | geom_hline(aes(yintercept = rank_cutoff), data = annot, linetype = 2) + 22 | geom_vline(aes(xintercept = inflection), data = annot, linetype = 2) + 23 | geom_hline(aes(yintercept = knee_cutoff), data = annot, linetype = "dotted", col="red") + 24 | geom_vline(aes(xintercept = knee), data = annot, linetype = "dotted", col="red") + 25 | geom_label(aes(y=0,x=knee,label=knee), data = annot, hjust=0, vjust=0, col="red") + 26 | geom_label(aes(y=0,x=inflection,label=inflection), data = annot, hjust=1, vjust=0) + 27 | geom_label(aes(y=knee_cutoff,x=Inf,label=knee_cutoff), data = annot, hjust=1, vjust=1, col="red") + 28 | geom_label(aes(y=rank_cutoff,x=Inf,label=rank_cutoff), data = annot, hjust=1, vjust=0) + 29 | scale_x_log10() + 30 | scale_y_log10() + 31 | annotation_logticks() + 32 | labs(y = "Rank", x = "Total UMIs") 33 | return(p) 34 | } 35 | 36 | knee_plot_auc <- function(bc_rank) { 37 | library("ggplot2") 38 | knee_plt <- tibble(rank = bc_rank[["rank"]], 39 | total = bc_rank[["total"]]) %>% 40 | distinct() %>% 41 | dplyr::filter(total > 0) 42 | annot <- tibble(inflection = S4Vectors::metadata(bc_rank)[["inflection"]], 43 | rank_cutoff = length(which(bc_rank$total > S4Vectors::metadata(bc_rank)[["inflection"]])), 44 | knee = S4Vectors::metadata(bc_rank)[["knee"]], 45 | knee_cutoff = length(which(bc_rank$total > S4Vectors::metadata(bc_rank)[["knee"]]))) 46 | p <- ggplot(knee_plt, aes(total, rank)) + 47 | geom_line() + 48 | geom_ribbon(aes(xmin = 0, xmax = total, fill = rank > annot$rank_cutoff), alpha=0.5) + 49 | geom_hline(data=annot,aes(yintercept = rank_cutoff), linetype = 2) + 50 | geom_label(data=annot,aes(y=rank_cutoff,x=Inf,label=rank_cutoff), hjust=1, vjust=1) + 51 | scale_fill_manual(values=c("black","grey"), labels=c("Cell","EmptyDrop")) + 52 | scale_x_log10(expand=c(0,0,0.05,0)) + 53 | scale_y_log10(expand=c(0,0,0.05,0)) + 54 | annotation_logticks() + 55 | labs(y = "Rank", x = "Total UMIs") + 56 | guides(fill=guide_legend(override.aes=list(alpha=1, color="black"))) + 57 | theme(legend.position=c(1,1), 58 | legend.justification=c(1,1), 59 | legend.title=element_blank(), 60 | legend.direction="vertical", 61 | legend.key.size=unit(0.3,"cm"), 62 | legend.background=element_blank()) 63 | return(p) 64 | } 65 | 66 | knee_plot_highlight <- function(bc_rank, highlight=c()) { 67 | library("ggplot2") 68 | knee_plt <- tibble(rank = bc_rank[["rank"]], 69 | total = bc_rank[["total"]], 70 | barcode=rownames(bc_rank)) %>% 71 | distinct() %>% 72 | dplyr::filter(total > 0) 73 | 74 | annot <- tibble(inflection = S4Vectors::metadata(bc_rank)[["inflection"]], 75 | rank_cutoff = max(bc_rank$rank[bc_rank$total > S4Vectors::metadata(bc_rank)[["inflection"]]]), 76 | knee = S4Vectors::metadata(bc_rank)[["knee"]], 77 | knee_cutoff = max(bc_rank$rank[bc_rank$total > S4Vectors::metadata(bc_rank)[["knee"]]])) 78 | 79 | cutoff <- 18000 80 | data.highlight <- knee_plt[knee_plt$barcode %in% highlight,] 81 | data.highlight <- rbind(data.highlight[data.highlight$rank <= cutoff,],data.highlight[sample(nrow(data.highlight[data.highlight$rank > cutoff,]),1000),]) 82 | 83 | p <- ggplot(knee_plt, aes(total, rank)) + 84 | geom_line(color="black") + 85 | geom_hline(yintercept=length(highlight), linetype="dashed", color="red", size=0.25, alpha=0.5) + 86 | geom_label(data=annot,aes(y=length(highlight),x=Inf,label=length(highlight)), hjust=1, vjust=1) + 87 | scale_x_log10(expand=c(0,0,0.05,0)) + 88 | scale_y_log10(expand=c(0,0,0.05,0)) + 89 | annotation_logticks() + 90 | labs(y = "Rank", x = "Total UMIs") + 91 | theme(legend.position=c(1,.99), 92 | legend.justification=c(1,1), 93 | legend.title=element_blank(), 94 | legend.direction="vertical") 95 | return(p) 96 | } 97 | 98 | ## nth function extracts the value at a set fractile or median if fractile "rank" is less than a set "nth" threshhold 99 | nth <- function(value, nth=10, fractile=0.9){ 100 | if(length(value)*(1-fractile) <= nth){ 101 | newvalue <- median(value) 102 | } else { 103 | newvalue <- quantile(value, probs=c(fractile)) 104 | } 105 | return(newvalue) 106 | } 107 | 108 | ## Biexponential transformation (inspired by flowJo) 109 | biexp_trans <- function(lim = 5, decade.size = lim){ 110 | trans <- function(x){ 111 | ifelse(x <= lim, 112 | x, 113 | lim + decade.size * (suppressWarnings(log(x, 10)) - 114 | log(lim, 10))) 115 | } 116 | inv <- function(x) { 117 | ifelse(x <= lim, 118 | x, 119 | 10^(((x-lim)/decade.size) + log(lim,10))) 120 | } 121 | breaks <- function(x) { 122 | if (all(x <= lim)) { 123 | scales::pretty_breaks()(x) 124 | } else if (all(x > lim)) { 125 | scales::breaks_log(10)(x) 126 | } else { 127 | unique(c(scales::pretty_breaks()(c(x[1],lim)), 128 | scales::breaks_log(10)(c(lim, x[2])))) 129 | } 130 | } 131 | scales::trans_new(paste0("biexp-",format(lim)), trans, inv, breaks) 132 | } -------------------------------------------------------------------------------- /R/color.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/R/color.R -------------------------------------------------------------------------------- /R/feature_rankplot.R: -------------------------------------------------------------------------------- 1 | fractile.line <- function(x, Q=0.9, trans="log2",add.n=0.5){ 2 | q <- quantile(x,probs=c(Q)) 3 | transrev <- ggforce::trans_reverser(trans)$inverse 4 | data.frame(y=q, yend=q,label=signif(transrev(-q),2)-add.n) 5 | } 6 | 7 | #' Feature-Rankplot 8 | #' 9 | #' Draws a normalized rank plot for a given feature. 10 | #' Can include barcode plot to annotate cells along the ranking (such as cell type). 11 | #' 12 | #' @param data `Double` used for ranking the cells 13 | #' @param group `Factor` used for grouping comparitors (each having independt ranking) 14 | #' @param color `Factor` used for coloring the graphs 15 | #' @param linetype `Factor` used for annotating line and smooth line plots 16 | #' @param wrap `Factor` used for wrapping the plot 17 | #' @param draw.points `Boolean` of whether to draw ranked points 18 | #' @param draw.line `Boolean` of whether to connect ranked points with a line 19 | #' @param draw.smooth `Boolean` of whether to draw a smoothed rank plot line 20 | #' @param draw.fractile `Boolean` of whether to draw a fractile lines and values 21 | #' @param draw.barcode `Boolean` of whether to draw a barcode plot by clusters 22 | #' @param trans Transformation method for visualizing data 23 | #' @param add.n `Double` constant to add for showing "0" in log transformation 24 | #' @param colors Named vector of colors for manual coloring 25 | #' @param fractile.upper `Double` fractile for upper line 26 | #' @param fractile.lower `Double` fractile for lower line 27 | #' @param barcodeGroup `Factor` used grouping the barcode plot 28 | #' @param barcode.stepSize `Double` of the relative size of each barcode "column" (until a better way is determined) 29 | #' @param barcode.downsample=500 `Integer` number of cells to be included in random downsampling within each barcodeGroup 30 | #' @param barcode.alpha `Double` opacity of each "bar" in the barcode plot 31 | #' @param barcode.refGroups `Character` from which groups (from "group" input) should barcodes be shown. (If NULL all groups will be included) 32 | #' 33 | #' @return A ggplot object 34 | 35 | feature_rankplot <- function(data, 36 | group=NULL, 37 | color=NULL, 38 | linetype=NULL, 39 | wrap=NULL, 40 | draw.points=TRUE, 41 | draw.line=FALSE, 42 | draw.smooth=FALSE, 43 | draw.fractile=FALSE, 44 | draw.barcode=FALSE, 45 | trans="biexp", 46 | add.n=0, 47 | colors=NULL, 48 | fractile.upper=0.8, 49 | fractile.lower=0.2, 50 | barcodeGroup=NULL, 51 | barcode.stepSize=0.3, 52 | barcode.downsample=500, 53 | barcode.alpha=0.3, 54 | barcode.refGroups=NULL, 55 | barcode.colors=NULL 56 | ){ 57 | require("ggrepel") 58 | require("ggplot2") 59 | 60 | ## Make a combined dataframe before ordering by rank 61 | data.combined <- data.frame(value=data,include=1) 62 | if(!is.null(group)) data.combined$group <- group 63 | if(!is.null(color)) data.combined$color <- color 64 | if(!is.null(linetype)) data.combined$linetype <- linetype 65 | if(!is.null(wrap)) data.combined$wrap <- wrap 66 | if(!is.null(barcodeGroup)) data.combined$barcodeGroup <- barcodeGroup 67 | 68 | ## Split data into groups for which ranks should be independent 69 | if(!is.null(group) | !is.null(wrap)){ 70 | if(is.null(group)){ 71 | data.grouped <- split(data.combined,list(wrap)) 72 | } else if(is.null(wrap)){ 73 | data.grouped <- split(data.combined,list(group)) 74 | } else { 75 | data.grouped <- split(data.combined,list(group,wrap)) 76 | } 77 | } else { 78 | data.grouped <- list(data.combined) 79 | } 80 | 81 | ## Calculate normalized ranks for each group 82 | rankedList <- lapply(data.grouped,FUN=function(x){ 83 | x <- x[order(x$value),] 84 | x$rank <- seq_along(x$value)/length(x$value) 85 | return(x) 86 | }) 87 | 88 | ## Define transformation functions 89 | transFun <- ggforce::trans_reverser(trans)$transform 90 | transFun.inverse <- ggforce::trans_reverser(trans)$inverse 91 | 92 | ## Merge the rankedList into a single dataframe for plotting 93 | plotData <- do.call("rbind",rankedList) 94 | plotData$value <- plotData$value+add.n 95 | 96 | p <- ggplot(plotData,aes(x=rank,y=value)) 97 | 98 | if(draw.smooth == TRUE) p <- p + geom_line(stat="smooth",method="auto",se=FALSE, aes(color=color, group=group, linetype=linetype), alpha=0.5) 99 | if(draw.points == TRUE) p <- p + geom_point(aes(color=color),alpha=1,pch=19) 100 | if(draw.line == TRUE) p <- p + geom_line(aes(color=color, group=group, linetype=linetype),alpha=0.5) 101 | 102 | ## Include "barcode plot" 103 | if(draw.barcode == TRUE & !is.null(barcodeGroup)){ 104 | value.max <- max(plotData$value) 105 | 106 | # Would be nice to have the barcode plot as a seperate plot 107 | # But at the same time, we would like to keep the ability to 108 | # make facets and keep the barcode information for each facet. 109 | # To achieve this, we need to define a set "step" size for each 110 | # barcode line. As this varies depending on the range of values 111 | # we are using a bit of a hack (seems to work for our data): 112 | ## Step denotes the "column" width of each barcode column. 113 | step <- barcode.stepSize*(log(max(plotData$value))/log(300)) 114 | 115 | ## Where should the first barcode column start (x-axis value) 116 | step.max <- transFun(value.max)+(step/2) 117 | barcodeGroups <- unique(plotData$barcodeGroup) 118 | 119 | ## Transform steps according to transformation function 120 | steps <- transFun.inverse((step.max-step+seq_along(barcodeGroups)*step)) 121 | names(steps) <- barcodeGroups 122 | 123 | subset <- lapply(barcodeGroups,FUN=function(x){ 124 | ## We allow basing barcodes on specific groups as this allows us to only show 125 | ## cells from DF1 (as these are most likely to show expression - if detectable) 126 | if(!is.null(barcode.refGroups)){ 127 | subset <- which(plotData$barcodeGroup == x & plotData$group %in% barcode.refGroups) 128 | } else { 129 | subset <- which(plotData$barcodeGroup == x) 130 | } 131 | 132 | ## Barcodes easily saturate with, to avoid this, we do random downsampling within each barcodeGroup 133 | downsample <- ifelse(length(subset) > barcode.downsample, barcode.downsample, length(subset)) 134 | subset <- subset[sample(x=length(subset), size=downsample, replace=FALSE)] 135 | }) 136 | subset <- do.call("c",subset) 137 | plotData.barcode <- plotData[subset,] 138 | 139 | ## Allow another color scale 140 | p <- p + ggnewscale::new_scale_color() 141 | p <- p + geom_point(data=plotData.barcode, aes(y=steps[barcodeGroup], col=barcodeGroup, alpha=barcode.alpha),shape="-", size=2) 142 | 143 | ## Set manual color scheme for barcode groups 144 | if(!is.null(barcode.colors)){ 145 | p <- p + scale_color_manual(values=barcode.colors) 146 | } 147 | } 148 | 149 | ## ADD fractile STATS 150 | if(draw.fractile == TRUE){ 151 | if(fractile.upper > 0){ 152 | ## Add line segments for upper fractile 153 | # a bit of a hack to get positions to align? is there a better solution? 154 | p <- p + stat_summary_bin(geom = "segment", binwidth=2, fun.data = fractile.line, fun.args=list(Q=fractile.upper,trans=trans,add.n=add.n), aes(x=1, xend=fractile.upper, group=group), linetype="dashed") 155 | 156 | ## Add text labels for upper fractile 157 | p <- p + stat_summary_bin(geom = "text_repel", binwidth=2, fun.data = fractile.line, fun.args=list(Q=fractile.upper,trans=trans,add.n=add.n), aes(x=fractile.upper, group=group), position=position_nudge(x=(1-fractile.upper)), col="black", direction="y",hjust=1,nudge_x=fractile.upper, fontface="bold",segment.alpha=0.25) 158 | 159 | ## a bit of a hack to get the lines in all facets - not sure why its needed? 160 | #,purpose=unique(plotData[,wrap.by]) 161 | p <- p + geom_vline(data=data.frame(expand.grid(list(q=c(fractile.lower,fractile.upper)))),aes(xintercept=q),alpha=0.35,linetype="dotted") 162 | } 163 | 164 | if(fractile.lower > 0){ 165 | ## Add line segments for lower fractile 166 | p <- p + stat_summary_bin(geom = "segment", binwidth=2, fun.data = fractile.line, fun.args=list(Q=fractile.lower,trans=trans,add.n=add.n), aes(x=1, xend=fractile.lower, group=group),linetype="dashed",alpha=0.5) 167 | 168 | 169 | ## Add text labels for lower fractile 170 | p <- p + stat_summary_bin(geom = "text", binwidth=2, fun.data = fractile.line, fun.args=list(Q=fractile.lower,trans=trans,add.n=add.n), aes(x=0.95, group=group), col="black", hjust=0, vjust=-0.5, fontface="italic") 171 | } 172 | } 173 | 174 | ## Scale 175 | if(trans == "biexp"){ 176 | p <- p + scale_y_continuous(trans=trans, limits=c(-1,max(plotData$value)), expand=c(0.01,0.01)) 177 | } else { 178 | p <- p + scale_y_continuous(trans=trans, expand=c(0.01,0.01)) 179 | } 180 | 181 | p <- p + scale_x_continuous(expand=c(0.01,0.01)) 182 | 183 | ## Facet 184 | if(!is.null(wrap)){ 185 | p <- p + facet_grid(~wrap) 186 | } 187 | 188 | ## Layout 189 | p <- p + labs(col="Sample") + theme_bw() + ylab("Count") + xlab("Rank fraction") + guides(alpha=FALSE) 190 | p <- p + theme_get() + coord_flip() 191 | 192 | ## Manual colors 193 | if(!is.null(colors))p <- p + scale_color_manual(values=colors) 194 | 195 | return(p) 196 | } -------------------------------------------------------------------------------- /R/feature_rankplot_hist.R: -------------------------------------------------------------------------------- 1 | feature_rankplot_hist <- function(data, 2 | group=NULL, 3 | color=NULL, 4 | linetype=NULL, 5 | wrap=NULL, 6 | barcodeGroup=NULL, 7 | draw.histogram=TRUE, 8 | trans="biexp", 9 | add.n=0, 10 | histogram.colors=NULL, 11 | title="", 12 | gates=NULL, 13 | legend=TRUE, 14 | yaxis.text=FALSE, ...){ 15 | library("cowplot") 16 | 17 | ## Make a combined data.matrix 18 | data.combined <- data.frame(value=data) 19 | if(!is.null(group)) data.combined$group <- group 20 | if(!is.null(color)) data.combined$color <- color 21 | if(!is.null(linetype)) data.combined$linetype <- linetype 22 | if(!is.null(wrap)){ 23 | data.combined$wrap <- wrap 24 | } else { 25 | data.combined$wrap <- 1 26 | } 27 | if(!is.null(barcodeGroup)) data.combined$barcodeGroup <- barcodeGroup 28 | 29 | ## Calculate (UMI) sum values 30 | data.combined.sum <- data.combined %>% 31 | group_by(wrap=wrap, group=group) %>% 32 | summarise(sum=sum(value)) %>% 33 | arrange(wrap, sum) 34 | 35 | ## Make "nice" labels with group name and UMI sum for each wrap 36 | data.combined.sum.label <- data.combined.sum %>% 37 | group_by(wrap) %>% 38 | summarise(label=paste(paste0(group,": ",sprintf("%05s",as.character(sum))),collapse="\n")) 39 | 40 | ## Make histograms 41 | if(draw.histogram == TRUE){ 42 | p.hist <- ggplot(data.combined, aes(x=value)) + 43 | scale_x_continuous(trans=trans,limits=c(-1,max(data.combined$value)), expand=c(0.01,0.01)) + 44 | geom_density(aes(y=..density.. ,linetype=group, fill=group), alpha=0.5, bw=0.35) + 45 | guides(fill=guide_legend(reverse = TRUE), linetype=guide_legend(reverse = TRUE)) + 46 | scale_y_continuous(expand=c(0,0)) + scale_fill_manual(values=histogram.colors) + 47 | theme(axis.title=element_blank(), 48 | axis.text.y=element_blank(), 49 | axis.text.x=element_blank(), 50 | axis.ticks=element_blank(), 51 | panel.border=element_blank(), 52 | panel.grid=element_blank(), 53 | plot.margin=unit(c(0,0,0,0),"cm"), 54 | legend.direction="vertical", 55 | legend.title=element_blank(), 56 | legend.background=element_blank(), 57 | legend.box.margin=unit(c(0,0,0,0),"mm"), 58 | legend.key.width=unit(0.15,"cm"), 59 | legend.key.height=unit(0.10,"cm"), 60 | legend.position=c(0.4,2), 61 | legend.justification=c(0,1)) 62 | 63 | if(!is.null(wrap)){ 64 | p.hist <- p.hist + geom_text(data=data.combined.sum.label, x = Inf, y = Inf, hjust=1, vjust=1.5, aes(label=label), size=1.5) 65 | p.hist <- p.hist + facet_wrap( ~wrap) 66 | } else { 67 | scale_label <- with(data.combined.sum[order(factor(data.combined.sum$group, levels=levels(data.combined$group))),],paste0(group,": ",sprintf("%05s",as.character(sum)))) 68 | p.hist <- p.hist + 69 | scale_linetype_discrete(labels=scale_label) + 70 | scale_fill_manual(values=histogram.colors, labels=scale_label) + 71 | theme(legend.position=c(1,1.5), legend.justification=c(1,1), legend.text.align=1, plot.margin=unit(c(0.3,0,0,0),"cm")) 72 | } 73 | } 74 | 75 | ## Draw feature_rankplot 76 | p.feature_rankplot <- feature_rankplot(data=data.combined$value, 77 | group=data.combined$group, 78 | linetype=data.combined$group, 79 | wrap=data.combined$wrap, 80 | barcodeGroup=data.combined$barcodeGroup, 81 | barcode.stepSize=0.4, 82 | draw.points = F, 83 | draw.barcode = T, 84 | draw.line = T, 85 | trans=trans, ...) + 86 | theme(strip.text = element_blank(), 87 | plot.margin = unit(c(0,0.3,0,0),"cm"), 88 | legend.direction = "vertical", 89 | legend.position = c(0.90,0.02), 90 | legend.key.size=unit(0.2,"cm"), 91 | legend.title=element_blank(), 92 | legend.justification=c(1,0), 93 | axis.title=element_blank(), 94 | axis.text.y=element_blank()) + 95 | guides(linetype=F, col=guide_legend(override.aes = list(shape = 15)), group=F) + ylab("UMI count") + xlab("Cell ranking") 96 | 97 | if(legend == FALSE){ 98 | p.feature_rankplot <- p.feature_rankplot + theme(legend.position="none") 99 | } 100 | 101 | if(yaxis.text == TRUE){ 102 | p.feature_rankplot <- p.feature_rankplot + theme(axis.title.y=element_text(size=6)) 103 | } 104 | 105 | if(!is.null(gates)){ 106 | p.feature_rankplot <- p.feature_rankplot + geom_vline(data=gates,aes(xintercept=gate), col="red", alpha=0.5, linetype="dashed") 107 | } 108 | 109 | p <- plot_grid(p.hist, p.feature_rankplot, ncol=1, align="v", axis="lr", label_size=7, labels=c(title,""), hjust = 0, vjust=1.1, rel_heights=c(5,15,2)) 110 | 111 | return(p) 112 | } -------------------------------------------------------------------------------- /R/feature_rankplot_hist_custom.R: -------------------------------------------------------------------------------- 1 | feature_rankplot_hist_custom <- function(data,marker,group=NULL,barcodeGroup=NULL,wrap=NULL,conc=NULL,title=NULL,histogram.colors=c("red","blue"),barcode.colors=color.supercluster,...){ 2 | 3 | data <- FetchData(data, vars=c(marker,barcodeGroup,group,wrap), slot = "counts") 4 | colnames(data)[1:3] <- c("value","barcodeGroup","group") 5 | 6 | color.manual <- histogram.colors 7 | 8 | if(is.null(wrap)){ 9 | curWrap <- NULL 10 | } else { 11 | colnames(data)[4] <- "wrap" 12 | } 13 | 14 | if(group == "dilution"){ 15 | if(!is.null(conc)){ 16 | data$conc <- conc 17 | data$conc[data$group == "DF4"] <- conc/4 18 | data$conc <- factor(data$conc, levels=rev(sort(unique(data$conc)))) 19 | levels(data$conc) <- sprintf("%2.2fug/mL",as.double(levels(data$conc))) 20 | } else { 21 | data$conc <- data$group 22 | } 23 | 24 | data$group <- data$conc 25 | curWrap <- data$wrap 26 | names(color.manual) <- levels(data$group) 27 | } 28 | 29 | p <- feature_rankplot_hist(data=data$value, 30 | group=data$group, 31 | wrap=curWrap, 32 | barcodeGroup=data$barcodeGroup, 33 | title=ifelse(!is.null(title),title,marker), 34 | barcode.refGroups=levels(data$group)[1], 35 | histogram.colors=color.manual, 36 | barcode.colors=barcode.colors, 37 | ...) 38 | } -------------------------------------------------------------------------------- /R/ggplot_settings.R: -------------------------------------------------------------------------------- 1 | require("ggplot2") 2 | 3 | text.size <- 7 4 | text.axis.size <- 6 5 | panel.label_size <- 10 6 | panel.label_vjust <- 0.98 7 | panel.label_hjust <- 0 8 | figure.resolution <- 600 9 | figure.antialias <- "cleartype" 10 | figure.width.full <- 7 11 | figure.unit <- "in" 12 | 13 | 14 | theme_set(theme_bw(base_size=text.size) + 15 | theme( 16 | text=element_text(size=text.size), 17 | axis.text.y=element_text(size=text.axis.size), 18 | axis.text.x=element_text(angle=45, hjust=1, size=text.axis.size), 19 | panel.grid.minor=element_blank(), 20 | strip.background=element_blank(), 21 | strip.text=element_text(face="bold", size=text.size), 22 | legend.position = "bottom", 23 | plot.margin = unit(c(1,1,1,1),"mm"))) 24 | 25 | update_geom_defaults("line", list(size=0.35)) 26 | update_geom_defaults("bar", list(size=0.25)) 27 | update_geom_defaults("tile", list(size=0.25)) 28 | update_geom_defaults("rect", list(size=0.25)) 29 | update_geom_defaults("density", list(size=0.25)) 30 | update_geom_defaults("vline", list(size=0.25)) 31 | update_geom_defaults("hline", list(size=0.25)) 32 | update_geom_defaults("point", list(size=1)) 33 | 34 | library("ggalluvial") 35 | update_geom_defaults("stratum", list(size=0.25)) 36 | update_geom_defaults("flow", list(size=0.25)) 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CITE-seq optimization 2 | Code and results from TotalSeqC antibody titration and pipeline benchmarking for CITE-seq experiments. 3 | 4 | Data and Seurat V3 objects from the study is deposited at FigShare under this DOI: [10.6084/m9.figshare.c.5018987](https://doi.org/10.6084/m9.figshare.c.5018987) 5 | 6 | This repository contains all the code used in the processing of the aligned data and data analysis (used for generating all figures) included in the manuscript at BioRxiv.org: 7 | 8 | **Manuscript** 9 | 10 | [Improving oligo-conjugated antibody signal in multimodal single-cell analysis](https://www.biorxiv.org/content/10.1101/2020.06.15.153080v1). 11 | Terkild Brink Buus, Alberto Herrera, Ellie Ivanova, Eleni Mimitou, Anthony Cheng, Thales Papagiannakopoulos, Peter Smibert, Niels Odum, Sergei B Koralov. bioRxiv 2020.06.15.153080; doi: https://doi.org/10.1101/2020.06.15.153080 12 | 13 | --- 14 | 15 | **Pre-processing:** 16 | * [Loading data, Demultiplexing, Preprocessing and down-sampling](Demux_Preprocess_Downsample.md) - Supplementary Figure S1 17 | * [Load unfiltered data and determine cell-containing vs. empty droplets](Load-unfiltered-data.md) - Supplementary Figure S6 18 | 19 | **Data analysis:** 20 | * [Antibody concentration titration](Antibody-titration.md) - Figure 1, 2 and Supplementary Figure S2 21 | * [Reducing staining volume](Volume-titration.md) - Figure 3 and Supplementary Figure S3 22 | * [Reducing cell number at staining](Cell-number-titration.md) - Figure 4 and Supplementary Figure S4 23 | * [Reducing cell number mitigates reduced staining volume](Volume-and-cell-number-titration.md) - Supplementary Figure S5 24 | * [ADT signal in cell-containing vs. empty droplets](ADT-reads-in-cells-vs-empty-drops.md) - Figure 5 and Supplementary Figure S8 25 | * [10X Datasets: UMI per marker plots](10X-Datasets-UMI-per-marker.md) - Supplementary Figure S7 26 | * [Comparison of ADT counting methods](ADT-counting-methods.md) - Figure 6 and Supplementary Figure S9 27 | 28 | We also included the [Snakefiles](Snakemake/) used with Snakemake to generate the alignment and counting data from our dataset and for the 10X datasets. 29 | -------------------------------------------------------------------------------- /Sequencing satuation.R: -------------------------------------------------------------------------------- 1 | set.seed(114) 2 | require("Seurat", quietly=T) 3 | require("tidyverse", quietly=T) 4 | 5 | data.Seurat <- "data/5P-CITE-seq_Titration.rds" 6 | 7 | dataCSCADTDirReads <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT_nocorrect/read_count") 8 | dataCSCADTDir <- file.path(data.drive,data.project.dir,"cite-seq-count/A1_S5_d1_ADT_nocorrect/umi_count") 9 | 10 | CSC.ADT.uncorrected <- Read10X(data.dir=dataCSCADTDir, gene.column=1) 11 | CSC.ADT.uncorrected <- CSC.ADT.uncorrected[rownames(CSC.ADT.uncorrected) != "unmapped",] 12 | 13 | CSC.ADT.uncorrected.reads <- Read10X(data.dir=dataCSCADTDirReads, gene.column=1) 14 | CSC.ADT.uncorrected.reads <- CSC.ADT.uncorrected.reads[rownames(CSC.ADT.uncorrected.reads) != "unmapped",] 15 | 16 | object <- readRDS(file=data.Seurat) 17 | 18 | ## Show number of cells from each sample 19 | table(object$group) 20 | 21 | object <- subset(object, subset=volume == "50µl") 22 | object 23 | 24 | UMI <- Matrix::colSums(CSC.ADT.uncorrected)[colnames(object)] 25 | reads <- Matrix::colSums(CSC.ADT.uncorrected.reads)[colnames(object)] 26 | 27 | df <- data.frame(barcode=colnames(object),sample=object$group,UMI=UMI,reads=reads) 28 | 29 | df %>% group_by(sample) %>% summarize(UMI=sum(UMI), reads=sum(reads)) %>% mutate(saturation=(1-(UMI/reads))) 30 | -------------------------------------------------------------------------------- /Snakemake/10X_VDJ/Snakefile: -------------------------------------------------------------------------------- 1 | cells=8000 2 | cores=16 3 | memory="64G" 4 | whitelist="include/10xv2_whitelist.txt" 5 | featureRef="include/feature-ref.csv" 6 | chemistry="10xv2" 7 | HTO_num=0 8 | HTO_skip=HTO_num+2 9 | 10 | def get_trim_length(wcs): 11 | Trim = "10" 12 | if wcs.type == 'HTO': 13 | Trim = "0" 14 | return [Trim] 15 | 16 | rule concat: 17 | input: 18 | R11="data/fastq/{sample}_L001_R1_001.fastq.gz", 19 | R12="data/fastq/{sample}_L002_R1_001.fastq.gz", 20 | R21="data/fastq/{sample}_L001_R2_001.fastq.gz", 21 | R22="data/fastq/{sample}_L002_R2_001.fastq.gz" 22 | output: 23 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 24 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz" 25 | shell: 26 | "cat {input.R11} {input.R12} > {output.R1} & " 27 | "cat {input.R21} {input.R22} > {output.R2}" 28 | 29 | rule citeseqcount: 30 | input: 31 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 32 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz", 33 | features="include/cite-seq-count/CSC_{type}-ref.csv", 34 | whitelist={whitelist} 35 | params: 36 | Trim=get_trim_length 37 | output: 38 | directory("cite-seq-count/{sample}_d{hamming}_{type}") 39 | shell: 40 | "CITE-seq-Count -T {cores} --max-error {wildcards.hamming} -R1 {input.R1} -R2 {input.R2} -t {input.features} -cbf 1 -cbl 16 -umif 17 -umil 26 -o {output} -wl {input.whitelist} -cells {cells} -u unmapped_{wildcards.sample}.csv --start-trim {params.Trim}" 41 | 42 | rule citeseqcount_nocorrect: 43 | input: 44 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 45 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz", 46 | features="include/cite-seq-count/CSC_{type}-ref.csv", 47 | whitelist={whitelist} 48 | params: 49 | Trim=get_trim_length 50 | output: 51 | directory("cite-seq-count/{sample}_d{hamming}_{type}_nocorrect") 52 | shell: 53 | "CITE-seq-Count -T {cores} --no_umi_correction --max-error {wildcards.hamming} -R1 {input.R1} -R2 {input.R2} -t {input.features} -cbf 1 -cbl 16 -umif 17 -umil 26 -o {output} -wl {input.whitelist} -cells {cells} -u unmapped_{wildcards.sample}.csv --start-trim {params.Trim}" 54 | 55 | rule featureList_ADT: 56 | input: 57 | {featureRef} 58 | output: 59 | "include/ADT-ref.csv" 60 | shell: 61 | "tail -n +{HTO_skip} {input} > {output}" 62 | 63 | rule featureList_HTO: 64 | input: 65 | {featureRef} 66 | output: 67 | "include/HTO-ref.csv" 68 | shell: 69 | "tail -n +2 {input} | head -n {HTO_num} > {output}" 70 | 71 | rule featureList_citeseqcount: 72 | input: 73 | "include/{type}-ref.csv" 74 | output: 75 | "include/cite-seq-count/CSC_{type}-ref.csv" 76 | shell: 77 | "awk -F, '{{print $5\",\"$1}}' {input} > {output}" 78 | 79 | rule featureList_kallisto: 80 | input: 81 | "include/{type}-ref.csv" 82 | output: 83 | "include/kallisto/kallisto_{type}-ref.csv" 84 | shell: 85 | "awk -F, '{{print $1\",\"$5}}' {input} > {output}" 86 | 87 | rule featureList_kallisto_addBase: 88 | input: 89 | "include/{type}-ref.csv" 90 | output: 91 | "include/kallisto/kallisto_{type}_{addBase}-ref.csv" 92 | shell: 93 | "awk -F, '{{print $1\",\"$5\"{wildcards.addBase}\"}}' {input} > {output}" 94 | 95 | rule kallisto_GEX_count: 96 | input: 97 | index="include/kallisto/idx_human.idx", 98 | t2g="include/kallisto/t2g_human.txt", 99 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 100 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz" 101 | output: 102 | directory("kallisto/{sample}") 103 | shell: 104 | "kb count -i {input.index} -g {input.t2g} -x {chemistry} -t {cores} -m {memory} -w {whitelist} -o {output} {input.R1} {input.R2}" 105 | 106 | rule kallisto_kite: 107 | input: 108 | "include/kallisto/kallisto_{type}-ref.csv" 109 | output: 110 | t2g="include/kallisto/mismatch_t2g_{type}_{k}.txt", 111 | fa="include/kallisto/mismatch_{type}_{k}.fa" 112 | shell: 113 | "python3 ~/kite/featuremap/featuremap.py {input} --t2g {output.t2g} --fa {output.fa}" 114 | 115 | rule kallisto_kite_index: 116 | input: 117 | "include/kallisto/mismatch_{type}_{k}.fa" 118 | output: 119 | "include/kallisto/mismatch_{type}_{k}.idx" 120 | shell: 121 | "module load kallisto; " 122 | "kallisto index -i {output} -k {wildcards.k} {input}" 123 | 124 | rule kallisto_feature_count: 125 | input: 126 | index="include/kallisto/mismatch_{type}_{k}.idx", 127 | t2g="include/kallisto/mismatch_t2g_{type}_{k}.txt", 128 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 129 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz" 130 | output: 131 | directory("kallisto/features/{sample}.{type}_{k}") 132 | shell: 133 | "kb count -i {input.index} -g {input.t2g} -x {chemistry} -t {cores} -w {whitelist} -o {output} {input.R1} {input.R2}" 134 | 135 | rule cellranger_feature: 136 | input: 137 | transcriptome="/gpfs/data/sequence/cellranger-refdata/refdata-cellranger-GRCh38-3.0.0", 138 | libraries="include/cellranger/libraries_{id}.csv", 139 | featureRef="include/cellranger/feature-ref_{id}.csv" 140 | params: 141 | chemistry="SC5P-R2" 142 | output: 143 | directory("cellranger_{id}") 144 | shell: 145 | "module unload miniconda3; module load cellranger/3.1.0; " 146 | "cellranger count --id={output} --transcriptome={input.transcriptome} --expect-cells={cells} --libraries={input.libraries} --feature-ref={input.featureRef} --chemistry={params.chemistry} --nosecondary --nopreflight --disable-ui --localcores={cores}" 147 | -------------------------------------------------------------------------------- /Snakemake/10X_VDJ/include/feature-ref.csv: -------------------------------------------------------------------------------- 1 | id,name,read,pattern,sequence,feature_type 2 | CD3,CD3_UCHT1_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,CTCATTGTAACTCCT,Antibody Capture 3 | CD19,CD19_HIB19_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,CTGGGCAATTACTCG,Antibody Capture 4 | CD45RA,CD45RA_HI100_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,TCAATCCTTCCGCTT,Antibody Capture 5 | CD4,CD4_RPA-T4_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,TGTTCCCGCTCAACT,Antibody Capture 6 | CD8a,CD8a_RPA-T8_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,GCTGCGCTTTCCATT,Antibody Capture 7 | CD14,CD14_M5E2_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,TCTCAGACCTCCGTA,Antibody Capture 8 | CD16,CD16_3G8_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,AAGTTCACTCTTTGC,Antibody Capture 9 | CD56,CD56_QA17A16_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,TTCGCCGCATTGAGT,Antibody Capture 10 | CD25,CD25_BC96_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,TTTGTCCTGTACGCC,Antibody Capture 11 | CD45RO,CD45RO_UCHL1_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,CTCCGAATCATGTTG,Antibody Capture 12 | PD-1,PD-1_EH12.2H7_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,ACAGCGCCGTATTTA,Antibody Capture 13 | TIGIT,TIGIT_A15153G_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,TTGCTTACCGCCAGA,Antibody Capture 14 | isotype_control_IgG1,isotype_control_IgG1_MOPC-21_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,GCCGGACGACATTAA,Antibody Capture 15 | isotype_control_IgG2a,isotype_control_IgG2a_MOPC-173_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,CTCCTACCTAAACTG,Antibody Capture 16 | isotype_control_IgG2b,isotype_control_IgG2b_MPC-11_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,ATATGTATCACGCGA,Antibody Capture 17 | CD127,CD127_A019D5_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,GTGTGTTGTCCTATG,Antibody Capture 18 | CD15,CD15_W6D3_TotalC,R2,^NNNNNNNNNN(BC)NNNNNNNNN,TCACCAGTACCTAGT,Antibody Capture -------------------------------------------------------------------------------- /Snakemake/10X_VDJ/runSnakemake.sh: -------------------------------------------------------------------------------- 1 | time snakemake kallisto/features/vdj_v1_hs_pbmc3_5gex_protein_antibody_S1.ADT_15 2 | time snakemake cite-seq-count/vdj_v1_hs_pbmc3_5gex_protein_antibody_S1_d1_ADT 3 | time snakemake cellranger_vdj_v1_hs_pbmc3_5gex_protein_antibody 4 | time snakemake cite-seq-count/vdj_v1_hs_pbmc3_5gex_protein_antibody_S1_d1_ADT_nocorrect 5 | -------------------------------------------------------------------------------- /Snakemake/10X_v3/Snakefile: -------------------------------------------------------------------------------- 1 | cells=10000 2 | cores=16 3 | memory="64G" 4 | whitelist="include/10xv3_whitelist.txt" 5 | featureRef="include/feature-ref.csv" 6 | chemistry="10xv3" 7 | HTO_num=0 8 | HTO_skip=HTO_num+2 9 | 10 | def get_trim_length(wcs): 11 | Trim = "10" 12 | if wcs.type == 'HTO': 13 | Trim = "0" 14 | return [Trim] 15 | 16 | rule concat: 17 | input: 18 | R11="fastq/{sample}_L001_R1_001.fastq.gz", 19 | R12="fastq/{sample}_L002_R1_001.fastq.gz", 20 | R21="fastq/{sample}_L001_R2_001.fastq.gz", 21 | R22="fastq/{sample}_L002_R2_001.fastq.gz" 22 | output: 23 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 24 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz" 25 | shell: 26 | "cat {input.R11} {input.R12} > {output.R1} & " 27 | "cat {input.R21} {input.R22} > {output.R2}" 28 | 29 | rule citeseqcount: 30 | input: 31 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 32 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz", 33 | features="include/cite-seq-count/CSC_{type}-ref.csv", 34 | whitelist={whitelist} 35 | params: 36 | Trim=get_trim_length 37 | output: 38 | directory("cite-seq-count/{sample}_d{hamming}_{type}") 39 | shell: 40 | "CITE-seq-Count -T {cores} --max-error {wildcards.hamming} -R1 {input.R1} -R2 {input.R2} -t {input.features} -cbf 1 -cbl 16 -umif 17 -umil 28 -o {output} -wl {input.whitelist} -cells {cells} -u unmapped_{wildcards.sample}.csv --start-trim {params.Trim}" 41 | 42 | rule citeseqcount_nocorrect: 43 | input: 44 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 45 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz", 46 | features="include/cite-seq-count/CSC_{type}-ref.csv", 47 | whitelist={whitelist} 48 | params: 49 | Trim=get_trim_length 50 | output: 51 | directory("cite-seq-count/{sample}_d{hamming}_{type}_nocorrect") 52 | shell: 53 | "CITE-seq-Count -T {cores} --no_umi_correction --max-error {wildcards.hamming} -R1 {input.R1} -R2 {input.R2} -t {input.features} -cbf 1 -cbl 16 -umif 17 -umil 28 -o {output} -wl {input.whitelist} -cells {cells} -u unmapped_{wildcards.sample}.csv --start-trim {params.Trim}" 54 | 55 | rule featureList_ADT: 56 | input: 57 | {featureRef} 58 | output: 59 | "include/ADT-ref.csv" 60 | shell: 61 | "tail -n +{HTO_skip} {input} > {output}" 62 | 63 | rule featureList_HTO: 64 | input: 65 | {featureRef} 66 | output: 67 | "include/HTO-ref.csv" 68 | shell: 69 | "tail -n +2 {input} | head -n {HTO_num} > {output}" 70 | 71 | rule featureList_citeseqcount: 72 | input: 73 | "include/{type}-ref.csv" 74 | output: 75 | "include/cite-seq-count/CSC_{type}-ref.csv" 76 | shell: 77 | "awk -F, '{{print $5\",\"$1}}' {input} > {output}" 78 | 79 | rule featureList_kallisto: 80 | input: 81 | "include/{type}-ref.csv" 82 | output: 83 | "include/kallisto/kallisto_{type}-ref.csv" 84 | shell: 85 | "awk -F, '{{print $1\",\"$5}}' {input} > {output}" 86 | 87 | rule featureList_kallisto_addBase: 88 | input: 89 | "include/{type}-ref.csv" 90 | output: 91 | "include/kallisto/kallisto_{type}_{addBase}-ref.csv" 92 | shell: 93 | "awk -F, '{{print $1\",\"$5\"{wildcards.addBase}\"}}' {input} > {output}" 94 | 95 | rule kallisto_GEX_count: 96 | input: 97 | index="include/kallisto/idx_human.idx", 98 | t2g="include/kallisto/t2g_human.txt", 99 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 100 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz" 101 | output: 102 | directory("kallisto/{sample}") 103 | shell: 104 | "kb count -i {input.index} -g {input.t2g} -x {chemistry} -t {cores} -m {memory} -w {whitelist} -o {output} {input.R1} {input.R2}" 105 | 106 | rule kallisto_kite: 107 | input: 108 | "include/kallisto/kallisto_{type}-ref.csv" 109 | output: 110 | t2g="include/kallisto/mismatch_t2g_{type}_{k}.txt", 111 | fa="include/kallisto/mismatch_{type}_{k}.fa" 112 | shell: 113 | "python3 ~/kite/featuremap/featuremap.py {input} --t2g {output.t2g} --fa {output.fa}" 114 | 115 | rule kallisto_kite_index: 116 | input: 117 | "include/kallisto/mismatch_{type}_{k}.fa" 118 | output: 119 | "include/kallisto/mismatch_{type}_{k}.idx" 120 | shell: 121 | "module load kallisto; " 122 | "kallisto index -i {output} -k {wildcards.k} {input}" 123 | 124 | rule kallisto_feature_count: 125 | input: 126 | index="include/kallisto/mismatch_{type}_{k}.idx", 127 | t2g="include/kallisto/mismatch_t2g_{type}_{k}.txt", 128 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 129 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz" 130 | output: 131 | directory("kallisto/features/{sample}.{type}_{k}") 132 | shell: 133 | "kb count -i {input.index} -g {input.t2g} -x {chemistry} -t {cores} -w {whitelist} -o {output} {input.R1} {input.R2}" 134 | 135 | rule cellranger_feature: 136 | input: 137 | transcriptome="/gpfs/data/sequence/cellranger-refdata/refdata-cellranger-GRCh38-3.0.0", 138 | libraries="include/cellranger/libraries_{id}.csv", 139 | featureRef="include/cellranger/feature-ref_{id}.csv" 140 | params: 141 | chemistry="SC3Pv3" 142 | output: 143 | directory("cellranger_{id}") 144 | shell: 145 | "module unload miniconda3; module load cellranger/3.1.0; " 146 | "cellranger count --id={output} --transcriptome={input.transcriptome} --expect-cells={cells} --libraries={input.libraries} --feature-ref={input.featureRef} --chemistry={params.chemistry} --nosecondary --nopreflight --disable-ui --localcores={cores}" 147 | -------------------------------------------------------------------------------- /Snakemake/10X_v3/include/feature-ref.csv: -------------------------------------------------------------------------------- 1 | id,name,read,pattern,sequence,feature_type 2 | CD3,CD3_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,AACAAGACCCTTGAG,Antibody Capture 3 | CD4,CD4_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,TACCCGTAATAGCGT,Antibody Capture 4 | CD8a,CD8a_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,ATTGGCACTCAGATG,Antibody Capture 5 | CD14,CD14_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,GAAAGTCAAAGCACT,Antibody Capture 6 | CD15,CD15_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,ACGAATCAATCTGTG,Antibody Capture 7 | CD16,CD16_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,GTCTTTGTCAGTGCA,Antibody Capture 8 | CD56,CD56_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,GTTGTCCGACAATAC,Antibody Capture 9 | CD19,CD19_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,TCAACGCTTGGCTAG,Antibody Capture 10 | CD25,CD25_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,GTGCATTCAACAGTA,Antibody Capture 11 | CD45RA,CD45RA_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,GATGAGAACAGGTTT,Antibody Capture 12 | CD45RO,CD45RO_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,TGCATGTCATCGGTG,Antibody Capture 13 | PD-1,PD-1_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,AAGTCGTGAGGCATG,Antibody Capture 14 | TIGIT,TIGIT_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,TGAAGGCTCATTTGT,Antibody Capture 15 | CD127,CD127_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,ACATTGACGCAACTA,Antibody Capture 16 | IgG2a,IgG2a_control_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,CTCTATTCAGACCAG,Antibody Capture 17 | IgG1,IgG1_control_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,ACTCACTGGAGTCTC,Antibody Capture 18 | IgG2b,IgG2b_control_TotalSeqB,R2,5PNNNNNNNNNN(BC)NNNNNNNNN,ATCACATCGTTGCCA,Antibody Capture 19 | -------------------------------------------------------------------------------- /Snakemake/10X_v3/runSnakemake.sh: -------------------------------------------------------------------------------- 1 | time snakemake kallisto/features/pbmc_10k_protein_v3_antibody_S2.ADT_15 2 | time snakemake cite-seq-count/pbmc_10k_protein_v3_antibody_S2_d1_ADT 3 | time snakemake cellranger_pbmc_10k_protein_v3_antibody 4 | time snakemake cite-seq-count/pbmc_10k_protein_v3_antibody_S2_d1_ADT_nocorrect 5 | -------------------------------------------------------------------------------- /Snakemake/Snakefile: -------------------------------------------------------------------------------- 1 | cells=15000 2 | cores=16 3 | memory="64G" 4 | whitelist="include/10xv2_whitelist.txt" 5 | featureRef="include/feature-ref.csv" 6 | chemistry="10xv2" 7 | HTO_num=6 8 | HTO_skip=HTO_num+2 9 | 10 | def get_trim_length(wcs): 11 | Trim = "10" 12 | if wcs.type == 'HTO': 13 | Trim = "0" 14 | return [Trim] 15 | 16 | rule concat: 17 | input: 18 | R11="data/fastq/{sample}_L001_R1_001.fastq.gz", 19 | R12="data/fastq/{sample}_L002_R1_001.fastq.gz", 20 | R21="data/fastq/{sample}_L001_R2_001.fastq.gz", 21 | R22="data/fastq/{sample}_L002_R2_001.fastq.gz" 22 | output: 23 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 24 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz" 25 | shell: 26 | "cat {input.R11} {input.R12} > {output.R1} & " 27 | "cat {input.R21} {input.R22} > {output.R2}" 28 | 29 | rule citeseqcount: 30 | input: 31 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 32 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz", 33 | features="include/cite-seq-count/CSC_{type}-ref.csv", 34 | whitelist={whitelist} 35 | params: 36 | Trim=get_trim_length 37 | output: 38 | directory("cite-seq-count/{sample}_d{hamming}_{type}") 39 | shell: 40 | "CITE-seq-Count -T {cores} --max-error {wildcards.hamming} -R1 {input.R1} -R2 {input.R2} -t {input.features} -cbf 1 -cbl 16 -umif 17 -umil 26 -o {output} -wl {input.whitelist} -cells {cells} -u unmapped_{wildcards.sample}.csv --start-trim {params.Trim}" 41 | 42 | rule citeseqcount_nocorrect: 43 | input: 44 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 45 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz", 46 | features="include/cite-seq-count/CSC_{type}-ref.csv", 47 | whitelist={whitelist} 48 | params: 49 | Trim=get_trim_length 50 | output: 51 | directory("cite-seq-count/{sample}_d{hamming}_{type}_nocorrect") 52 | shell: 53 | "CITE-seq-Count -T {cores} --no_umi_correction --max-error {wildcards.hamming} -R1 {input.R1} -R2 {input.R2} -t {input.features} -cbf 1 -cbl 16 -umif 17 -umil 26 -o {output} -wl {input.whitelist} -cells {cells} -u unmapped_{wildcards.sample}.csv --start-trim {params.Trim}" 54 | 55 | rule featureList_ADT: 56 | input: 57 | {featureRef} 58 | output: 59 | "include/ADT-ref.csv" 60 | shell: 61 | "tail -n +{HTO_skip} {input} > {output}" 62 | 63 | rule featureList_HTO: 64 | input: 65 | {featureRef} 66 | output: 67 | "include/HTO-ref.csv" 68 | shell: 69 | "tail -n +2 {input} | head -n {HTO_num} > {output}" 70 | 71 | rule featureList_citeseqcount: 72 | input: 73 | "include/{type}-ref.csv" 74 | output: 75 | "include/cite-seq-count/CSC_{type}-ref.csv" 76 | shell: 77 | "awk -F, '{{print $5\",\"$1}}' {input} > {output}" 78 | 79 | rule featureList_kallisto: 80 | input: 81 | "include/{type}-ref.csv" 82 | output: 83 | "include/kallisto/kallisto_{type}-ref.csv" 84 | shell: 85 | "awk -F, '{{print $1\",\"$5}}' {input} > {output}" 86 | 87 | rule featureList_kallisto_addBase: 88 | input: 89 | "include/{type}-ref.csv" 90 | output: 91 | "include/kallisto/kallisto_{type}_{addBase}-ref.csv" 92 | shell: 93 | "awk -F, '{{print $1\",\"$5\"{wildcards.addBase}\"}}' {input} > {output}" 94 | 95 | rule kallisto_GEX_ref: 96 | input: 97 | gtf="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/genes/genes.gtf", 98 | fa="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/fasta/genome.fa" 99 | output: 100 | index="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/kallisto/idx_cellranger.idx", 101 | t2g="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/kallisto/t2g_cellranger.txt", 102 | fa="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/kallisto/cDNA_cellranger.fa" 103 | shell: 104 | "kb ref -i {output.index} -g {output.t2g} -f1 {output.fa} {input.fa} {input.gtf}" 105 | 106 | rule kallisto_GEX_count: 107 | input: 108 | t2g="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/kallisto/t2g_cellranger.txt", 109 | index="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/kallisto/idx_cellranger.idx", 110 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 111 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz" 112 | output: 113 | directory("kallisto/gex/{sample}") 114 | shell: 115 | "kb count -i {input.index} -g {input.t2g} -x {chemistry} -t {cores} -w {whitelist} -o {output} {input.R1} {input.R2}" 116 | 117 | rule kallisto_GEX_count_CD45: 118 | input: 119 | t2g="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/kallisto/t2g_cellranger_CD45.txt", 120 | index="/gpfs/data/koralovlab/ref/refdata-cellranger-GRCh38-3.0.0/kallisto/idx_cellranger.idx", 121 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 122 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz" 123 | output: 124 | directory("kallisto/gex_CD45/{sample}") 125 | shell: 126 | "kb count -i {input.index} -g {input.t2g} -x {chemistry} -t {cores} -w {whitelist} -o {output} {input.R1} {input.R2}" 127 | 128 | rule kallisto_kite: 129 | input: 130 | "include/kallisto/kallisto_{type}-ref.csv" 131 | output: 132 | t2g="include/kallisto/mismatch_t2g_{type}_{k}.txt", 133 | fa="include/kallisto/mismatch_{type}_{k}.fa" 134 | shell: 135 | "python3 ~/kite/featuremap/featuremap.py {input} --t2g {output.t2g} --fa {output.fa}" 136 | 137 | rule kallisto_kite_index: 138 | input: 139 | "include/kallisto/mismatch_{type}_{k}.fa" 140 | output: 141 | "include/kallisto/mismatch_{type}_{k}.idx" 142 | shell: 143 | "module load kallisto; " 144 | "kallisto index -i {output} -k {wildcards.k} {input}" 145 | 146 | rule kallisto_feature_count: 147 | input: 148 | index="include/kallisto/mismatch_{type}_{k}.idx", 149 | t2g="include/kallisto/mismatch_t2g_{type}_{k}.txt", 150 | R1="fastq_merged/{sample}_L001_R1_001.fastq.gz", 151 | R2="fastq_merged/{sample}_L001_R2_001.fastq.gz" 152 | output: 153 | directory("kallisto/features/{sample}.{type}_{k}") 154 | shell: 155 | "kb count -i {input.index} -g {input.t2g} -x {chemistry} -t {cores} -w {whitelist} -o {output} {input.R1} {input.R2}" 156 | 157 | rule cellranger_feature: 158 | input: 159 | transcriptome="/gpfs/data/sequence/cellranger-refdata/refdata-cellranger-GRCh38-3.0.0", 160 | libraries="include/cellranger/libraries_{id}.csv", 161 | featureRef="include/cellranger/feature-ref_{id}.csv" 162 | params: 163 | chemistry="SC5P-R2" 164 | output: 165 | directory("cellranger_{id}") 166 | shell: 167 | "module unload miniconda3; module load cellranger/3.1.0; " 168 | "cellranger count --id={output} --transcriptome={input.transcriptome} --expect-cells={cells} --libraries={input.libraries} --feature-ref={input.featureRef} --chemistry={params.chemistry} --nosecondary --nopreflight --disable-ui --localcores={cores}" 169 | -------------------------------------------------------------------------------- /Snakemake/include/feature-ref.csv: -------------------------------------------------------------------------------- 1 | id,name,read,pattern,sequence,feature_type 2 | 1,1,R2,^(BC),AGGACCATCCAA,Antibody Capture 3 | 2,2,R2,^(BC),ACATGTTACCGT,Antibody Capture 4 | 3,3,R2,^(BC),AGCTTACTATCC,Antibody Capture 5 | 4,4,R2,^(BC),TCGATAATGCGA,Antibody Capture 6 | 5,5,R2,^(BC),GAGGCTGAGCTA,Antibody Capture 7 | 6,6,R2,^(BC),GTGTGACGTATT,Antibody Capture 8 | CD103,CD103,R2,5PNNNNNNNNNN(BC),GACCTCATTGTGAAT,Antibody Capture 9 | CD223,CD223,R2,5PNNNNNNNNNN(BC),CATTTGTCTGCCGGT,Antibody Capture 10 | CD274,CD274,R2,5PNNNNNNNNNN(BC),GTTGTCCGACAATAC,Antibody Capture 11 | CD45,CD45,R2,5PNNNNNNNNNN(BC),TGCAATTACCCGGAT,Antibody Capture 12 | CD134,CD134,R2,5PNNNNNNNNNN(BC),AACCCACCGTTGTTA,Antibody Capture 13 | CD56,CD56,R2,5PNNNNNNNNNN(BC),TCCTTTCCTGATAGG,Antibody Capture 14 | CD366,CD366,R2,5PNNNNNNNNNN(BC),TGTCCTACCCAACTT,Antibody Capture 15 | HLA-DR,HLA-DR,R2,5PNNNNNNNNNN(BC),AATAGCGAGCAAGTA,Antibody Capture 16 | TCRab,TCRab,R2,5PNNNNNNNNNN(BC),CGTAACGTAGAGCGA,Antibody Capture 17 | CD279,CD279,R2,5PNNNNNNNNNN(BC),ACAGCGCCGTATTTA,Antibody Capture 18 | CD45RO,CD45RO,R2,5PNNNNNNNNNN(BC),CTCCGAATCATGTTG,Antibody Capture 19 | CD152,CD152,R2,5PNNNNNNNNNN(BC),ATGGTTCACGTAATC,Antibody Capture 20 | CD107a,CD107a,R2,5PNNNNNNNNNN(BC),CAGCCCACTGCAATA,Antibody Capture 21 | CD194,CD194,R2,5PNNNNNNNNNN(BC),AGCTTACCTGCACGA,Antibody Capture 22 | CD11b,CD11b,R2,5PNNNNNNNNNN(BC),GACAAGTGATCTGCA,Antibody Capture 23 | CD26,CD26,R2,5PNNNNNNNNNN(BC),GGTGGCTAGATAATG,Antibody Capture 24 | CD14,CD14,R2,5PNNNNNNNNNN(BC),TCTCAGACCTCCGTA,Antibody Capture 25 | CD127,CD127,R2,5PNNNNNNNNNN(BC),GTGTGTTGTCCTATG,Antibody Capture 26 | CD28,CD28,R2,5PNNNNNNNNNN(BC),TGAGAACGACCCTAA,Antibody Capture 27 | CD183,CD183,R2,5PNNNNNNNNNN(BC),GCGATGGTAGATTAT,Antibody Capture 28 | CD62L,CD62L,R2,5PNNNNNNNNNN(BC),GTCCCTGCAACTTGA,Antibody Capture 29 | CD117,CD117,R2,5PNNNNNNNNNN(BC),AGACTAATAGCTGAC,Antibody Capture 30 | CD123,CD123,R2,5PNNNNNNNNNN(BC),CTTCACTCTGTCAGG,Antibody Capture 31 | TCRgd,TCRgd,R2,5PNNNNNNNNNN(BC),CTTCCGATTCATTCA,Antibody Capture 32 | CD138,CD138,R2,5PNNNNNNNNNN(BC),GTATAGACCAAAGCC,Antibody Capture 33 | CD1a,CD1a,R2,5PNNNNNNNNNN(BC),GATCGTGTTGTGTTA,Antibody Capture 34 | CD25,CD25,R2,5PNNNNNNNNNN(BC),TTTGTCCTGTACGCC,Antibody Capture 35 | CD197,CD197,R2,5PNNNNNNNNNN(BC),AGTTCAGTCAACCGA,Antibody Capture 36 | CD4,CD4,R2,5PNNNNNNNNNN(BC),TGTTCCCGCTCAACT,Antibody Capture 37 | IgG1,IgG1,R2,5PNNNNNNNNNN(BC),GCCGGACGACATTAA,Antibody Capture 38 | CD80,CD80,R2,5PNNNNNNNNNN(BC),ACGAATCAATCTGTG,Antibody Capture 39 | IgG2A,IgG2A,R2,5PNNNNNNNNNN(BC),CTCCTACCTAAACTG,Antibody Capture 40 | CD31,CD31,R2,5PNNNNNNNNNN(BC),ACCTTTATGCCACGG,Antibody Capture 41 | CD141,CD141,R2,5PNNNNNNNNNN(BC),GGATAACCGCGCTTT,Antibody Capture 42 | CD2,CD2,R2,5PNNNNNNNNNN(BC),TACGATTTGTCAGGG,Antibody Capture 43 | CD66b,CD66b,R2,5PNNNNNNNNNN(BC),AGCTGTAAGTTTCGG,Antibody Capture 44 | CD24,CD24,R2,5PNNNNNNNNNN(BC),AGATTCCTTCGTGTT,Antibody Capture 45 | CD3,CD3,R2,5PNNNNNNNNNN(BC),CTCATTGTAACTCCT,Antibody Capture 46 | CD1c,CD1c,R2,5PNNNNNNNNNN(BC),GAGCTACTTCACTCG,Antibody Capture 47 | CD86,CD86,R2,5PNNNNNNNNNN(BC),GTCTTTGTCAGTGCA,Antibody Capture 48 | CD5,CD5,R2,5PNNNNNNNNNN(BC),CATTAACGGGATGCC,Antibody Capture 49 | CD44,CD44,R2,5PNNNNNNNNNN(BC),AATCCTTCCGAATGT,Antibody Capture 50 | CD69,CD69,R2,5PNNNNNNNNNN(BC),GTCTCTTGGCTTAAA,Antibody Capture 51 | HLA-ABC,HLA-ABC,R2,5PNNNNNNNNNN(BC),TATGCGAGGCTTATC,Antibody Capture 52 | CD19,CD19,R2,5PNNNNNNNNNN(BC),CTGGGCAATTACTCG,Antibody Capture 53 | CD45RA,CD45RA,R2,5PNNNNNNNNNN(BC),TCAATCCTTCCGCTT,Antibody Capture 54 | CD8,CD8,R2,5PNNNNNNNNNN(BC),GCTGCGCTTTCCATT,Antibody Capture 55 | CD70,CD70,R2,5PNNNNNNNNNN(BC),CGCGAACATAAGAAG,Antibody Capture 56 | CD196,CD196,R2,5PNNNNNNNNNN(BC),GATCCCTTTGTCACT,Antibody Capture 57 | CD39,CD39,R2,5PNNNNNNNNNN(BC),TTACCTGGTATCCGT,Antibody Capture 58 | EpCAM,EpCAM,R2,5PNNNNNNNNNN(BC),TTCCGAGCAAGTATC,Antibody Capture 59 | CD30,CD30,R2,5PNNNNNNNNNN(BC),TCAGGGTGTGCTGTA,Antibody Capture 60 | -------------------------------------------------------------------------------- /Snakemake/runSnakemake.sh: -------------------------------------------------------------------------------- 1 | time snakemake kallisto/gex/c1 2 | time snakemake kallisto/features/H1_S6.HTO_A_13 3 | time snakemake kallisto/features/A1_S5.ADT_15 4 | time snakemake cite-seq-count/A1_S5_d1_ADT 5 | time snakemake cite-seq-count/H1_S6_d1_HTO 6 | time snakemake cellranger_A1 7 | time snakemake cellranger_H1 8 | time snakemake cite-seq-count/H1_S6_d1_HTO_nocorrect 9 | time snakemake cite-seq-count/A1_S5_d1_ADT_nocorrect 10 | -------------------------------------------------------------------------------- /Volume and cell number titration.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "CITE-seq optimization - Staining volume and cell number titration" 3 | author: "Terkild Brink Buus" 4 | date: "30/3/2020" 5 | output: github_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(warning=FALSE, message=FALSE) 10 | options(stringsAsFactors=FALSE) 11 | ``` 12 | 13 | ## Load utilities 14 | 15 | Including libraries, plotting and color settings and custom utility functions 16 | 17 | ```{r loadLibraries, results='hide', message=FALSE, warning=FALSE} 18 | set.seed(114) 19 | require("Seurat", quietly=T) 20 | require("tidyverse", quietly=T) 21 | library("Matrix", quietly=T) 22 | library("patchwork", quietly=T) 23 | 24 | ## Load ggplot theme and defaults 25 | source("R/ggplot_settings.R") 26 | 27 | ## Load helper functions 28 | source("R/Utilities.R") 29 | 30 | ## Load predefined color schemes 31 | source("R/color.R") 32 | 33 | ## Load feature_rankplot functions 34 | source("R/feature_rankplot.R") 35 | source("R/feature_rankplot_hist.R") 36 | source("R/feature_rankplot_hist_custom.R") 37 | 38 | outdir <- "figures" 39 | data.Seurat <- "data/5P-CITE-seq_Titration.rds" 40 | data.abpanel <- "data/Supplementary_Table_1.xlsx" 41 | data.markerStats <- "data/markerByClusterStats.tsv" 42 | 43 | ## Make a custom function for formatting the concentration scale 44 | scaleFUNformat <- function(x) sprintf("%.2f", x) 45 | ``` 46 | 47 | ## Load Seurat object 48 | 49 | Subset to only focus on conditions with 200k or 1 mio cells and dilution factor 4 (thus comparing 50µl to 25µl staining volume with 1 mio or 200k PBMCs at staining). 50 | 51 | ```{r loadSeurat} 52 | object <- readRDS(file=data.Seurat) 53 | 54 | ## Show number of cells from each sample 55 | table(object$group) 56 | 57 | object <- subset(object, subset=dilution == "DF4" & cellsAtStaining %in% c("200k","1000k")) 58 | object 59 | color.volnum <- c("50µl_1000k"="#0082c8","25µl_1000k"="#f58231","25µl_200k"="#911eb4") 60 | shape.volnum <- c("50µl_1000k"=21,"25µl_1000k"=22,"25µl_200k"=23) 61 | object$volnum <- factor(paste(object$volume,object$cellsAtStaining,sep="_"),levels=names(color.volnum)) 62 | ``` 63 | 64 | ## Load Ab panel annotation and concentrations 65 | 66 | Marker stats is reused in other comparisons and was calculated in the end of the preprocessing vignette. 67 | 68 | ```{r loadABPanel} 69 | abpanel <- data.frame(readxl::read_excel(data.abpanel)) 70 | rownames(abpanel) <- abpanel$Marker 71 | 72 | ## As we are only working with dilution factor 4 samples here, we want to show labels accordingly 73 | # a bit of a hack... 74 | abpanel$conc_µg_per_mL <- abpanel$conc_µg_per_mL/4 75 | 76 | markerStats <- read.table(data.markerStats) 77 | markerStats.PBMC <- markerStats[markerStats$tissue == "PBMC",] 78 | rownames(markerStats) <- paste(markerStats$marker,markerStats$tissue,sep="_") 79 | 80 | ## Make a ordering vector ordering markers per concentration and total UMI count 81 | marker.order <- markerStats.PBMC$marker[order(markerStats.PBMC$conc_µg_per_mL, markerStats.PBMC$UMItotal, decreasing=TRUE)] 82 | 83 | head(abpanel) 84 | head(markerStats) 85 | ``` 86 | 87 | ## Cell type and tissue overview 88 | 89 | Make tSNE plots colored by cell type, cluster and tissue of origin. 90 | 91 | ```{r tsnePlots, fig.height=3, fig.width=7} 92 | p.tsne.volume <- DimPlot(object, group.by="volnum", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap(~"Volume") + scale_color_manual(values=color.volnum) 93 | 94 | p.tsne.cluster <- DimPlot(object, group.by="supercluster", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + scale_color_manual(values=color.supercluster) + facet_wrap(~"Cell types") 95 | 96 | p.tsne.finecluster <- DimPlot(object, label=TRUE, label.size=3, reduction="tsne", group.by="fineCluster", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap( ~"Clusters") + guides(col=F) 97 | 98 | p.tsne.cluster + p.tsne.finecluster + p.tsne.volume 99 | ``` 100 | 101 | ## Overall ADT counts 102 | 103 | Extract UMI data and calculate UMI sum per marker within each condition. 104 | 105 | ```{r calculateUMIcountsPerMarker} 106 | ## Get the data 107 | ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts")) 108 | ADT.matrix$marker <- rownames(ADT.matrix) 109 | ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"] 110 | ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc)) 111 | 112 | ## Get cell annotations 113 | cell.annotation <- FetchData(object, vars=c("volnum")) 114 | 115 | ## Calculate marker sum from each dilution within both tissues 116 | ADT.matrix.agg <- ADT.matrix %>% group_by(volume=cell.annotation[name,"volnum"], marker, conc) %>% summarise(sum=sum(value)) 117 | 118 | ## Order markers by concentration 119 | ADT.matrix.agg$marker.byConc <- factor(ADT.matrix.agg$marker, levels=marker.order) 120 | 121 | ## Extract marker annotation 122 | ann.markerConc <- abpanel[marker.order,] 123 | ann.markerConc$Marker <- factor(marker.order, levels=marker.order) 124 | 125 | ADT.matrix.agg.total <- ADT.matrix.agg 126 | ``` 127 | 128 | ## Plot overall ADT counts by conditions 129 | 130 | Samples stained with diluted Ab panel have reduced ADT counts. 131 | 132 | ```{r UMIcountsPerCondition, fig.width=2.5, fig.height=2} 133 | 134 | p.UMIcountsPerCondition <- ggplot(ADT.matrix.agg.total[order(-ADT.matrix.agg$conc, -ADT.matrix.agg$sum),], aes(x=volume, y=sum/10^6, fill=conc)) + 135 | geom_bar(stat="identity", col=alpha(col="black",alpha=0.05)) + 136 | scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) + 137 | scale_y_continuous(expand=c(0,0,0,0.05)) + 138 | labs(fill="DF4\nµg/mL", y=bquote("ADT UMI counts ("~10^6~")")) + 139 | guides(fill=guide_colourbar(reverse=T)) + 140 | theme(panel.grid.major=element_blank(), axis.title.x=element_blank(), panel.border=element_blank(), axis.line = element_line(), legend.position="right") 141 | 142 | p.UMIcountsPerCondition 143 | ``` 144 | 145 | ## Compare total UMI counts per marker 146 | 147 | Plot total UMI counts for each marker at the investigated dilution factors (DF1 vs. DF4). To ease readability, we place dashed lines between each concentration. 148 | 149 | ```{r plotUMIcountsPerMarker, fig.width=4.5, fig.height=5} 150 | ## Calculate "breaks" where concentration change. 151 | lines <- length(marker.order)-cumsum(sapply(split(ann.markerConc$Marker,ann.markerConc$conc_µg_per_mL),length))+0.5 152 | lines <- data.frame(breaks=lines[-length(lines)]) 153 | 154 | ## Make a marker by concentration "heatmap" 155 | p.markerByConc <- ggplot(ann.markerConc, aes(x=1, y=Marker, fill=conc_µg_per_mL)) + 156 | geom_tile(col=alpha(col="black",alpha=0.2)) + 157 | geom_hline(data=lines,aes(yintercept=breaks), linetype="dashed", alpha=0.5) + 158 | scale_fill_viridis_c(trans="log2") + 159 | labs(fill="µg/mL") + 160 | theme_get() + 161 | theme(axis.ticks.x=element_blank(), axis.title = element_blank(), axis.text.x=element_blank(), panel.grid=element_blank(), legend.position="right", plot.margin=unit(c(0.1,0.1,0.1,0.1),"mm")) + scale_x_continuous(expand=c(0,0)) 162 | 163 | ## Make UMI counts per Marker plot 164 | p.UMIcountsPerMarker <- ggplot(ADT.matrix.agg, aes(x=marker.byConc,y=log2(sum))) + 165 | geom_line(aes(group=marker), size=1, color="#666666", alpha=0.5) + 166 | ggbeeswarm::geom_quasirandom(aes(group=volume, fill=volume, pch=volume), size=1, dodge.width=-0.75) + 167 | geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 168 | scale_fill_manual(values=color.volnum) + 169 | scale_y_continuous(breaks=c(9:17)) + 170 | scale_shape_manual(values=shape.volnum) + 171 | ylab("log2(UMI sum)") + 172 | guides(fill=guide_legend(override.aes=list(size=1.5), ncol=1, reverse=FALSE)) + 173 | theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="bottom", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm"), legend.title=element_blank()) + 174 | coord_flip() 175 | 176 | ## Combine plot with markerByConc annotation heatmap 177 | plotUMIcountsPerMarker <- p.markerByConc + guides(fill=F) + p.UMIcountsPerMarker + guides(fill=F, shape=F) + plot_spacer() + guide_area() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect') 178 | 179 | plotUMIcountsPerMarker 180 | ``` 181 | 182 | ## Compare change in UMI/cell within expressing cluster 183 | 184 | Using a specific percentile may be prone to outliers in small clusters (i.e. the 90th percentile of a cluster of 30 will be the #3 higest cell making it prone to outliers). We thus set a threshold of the value to only be the 90th percentile if cluster contains more than 100 cells. For smaller clusters, the median is used. Expressing cluster is identified in the "preprocessing" vignette. 185 | 186 | ```{r UMIinExpressingCells, fig.width=4.5, fig.height=5} 187 | ## Get the data 188 | ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts")) 189 | ADT.matrix$marker <- rownames(ADT.matrix) 190 | ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"] 191 | ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc)) 192 | 193 | ## Get cell annotations 194 | cell.annotation <- FetchData(object, vars=c("volnum", "fineCluster")) 195 | 196 | ## Calculate marker statistics from each dilution within each cluster 197 | ADT.matrix.agg <- ADT.matrix %>% group_by(volume=cell.annotation[name,"volnum"], fineCluster=cell.annotation[name,"fineCluster"], marker, conc) %>% summarise(sum=sum(value), median=quantile(value, probs=c(0.9)), nth=nth(value)) 198 | ADT.matrix.agg$tissue == "PBMC" 199 | 200 | ## Use data for the previously determined expressing cluster. 201 | Cluster.max <- markerStats[markerStats$tissue == "PBMC",c("marker","fineCluster")] 202 | Cluster.max$fineCluster <- factor(Cluster.max$fineCluster) 203 | 204 | ADT.matrix.aggByClusterMax <- Cluster.max %>% left_join(ADT.matrix.agg) 205 | ADT.matrix.aggByClusterMax$marker.byConc <- factor(ADT.matrix.aggByClusterMax$marker, levels=marker.order) 206 | 207 | p.UMIinExpressingCells <- ggplot(ADT.matrix.aggByClusterMax, aes(x=marker.byConc, y=log2(nth))) + 208 | geom_line(aes(group=marker), size=1, alpha=0.5, color="#666666") + 209 | ggbeeswarm::geom_quasirandom(aes(group=volume, fill=volume, pch=volume), size=1, show.legend=FALSE, dodge.width=-0.75) + 210 | geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 211 | geom_text(aes(label=paste0(fineCluster," ")), y=Inf, adj=1, size=1.5) + 212 | scale_fill_manual(values=color.volnum) + 213 | scale_shape_manual(values=shape.volnum) + 214 | scale_y_continuous(breaks=c(0:11), labels=2^c(0:11), expand=c(0.05,0.5)) + 215 | ylab("90th percentile UMI of expressing cluster") + 216 | theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="right", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm")) + 217 | coord_flip() 218 | 219 | ## Combine plot with markerByConc annotation heatmap 220 | UMIinExpressingCells <- p.markerByConc + theme(legend.position="none") + p.UMIinExpressingCells + theme(legend.position="none") + plot_spacer() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect') 221 | 222 | UMIinExpressingCells 223 | ``` 224 | 225 | ## Titration examples 226 | 227 | Most markers are largely unaffected by reducing staining volume. However, some antibodies used at low concentrations and targeting abundant epitopes are affected, an example of such is CD31: 228 | 229 | ```{r fig.width=1.4, fig.height=2.3} 230 | ## Make helper function for plotting titration plots 231 | titrationPlot <- function(marker, gate.PBMC=NULL, gate.Lung=NULL, y.axis=FALSE, show.gate=TRUE, legend=FALSE){ 232 | curMarker.name <- marker 233 | 234 | ## Get antibody concentration for legends 235 | curMarker.DF1conc <- abpanel[curMarker.name, "conc_µg_per_mL"] 236 | if(show.gate==TRUE){ 237 | ## Load gating percentages from manually set DSB thresholds 238 | gate <- data.frame(gate=markerStats[markerStats$marker == curMarker.name & markerStats$tissue== "PBMC",c("pct")]) 239 | gate$gate <- 1-(gate$gate/100) 240 | rownames(gate) <- gate$wrap 241 | ## Allow manual gating 242 | if(!is.null(gate.PBMC)) gate <- gate.PBMC 243 | } else { 244 | gate <- NULL 245 | } 246 | 247 | p <- feature_rankplot_hist_custom(data=object, 248 | marker=paste0("adt_",curMarker.name), 249 | group="volnum", 250 | barcodeGroup="supercluster", 251 | conc=curMarker.DF1conc, 252 | legend=legend, 253 | yaxis.text=y.axis, 254 | gates=gate, 255 | histogram.colors=color.volnum, 256 | title=curMarker.name) 257 | 258 | return(p) 259 | } 260 | 261 | p.CD31 <- titrationPlot("CD31", legend=TRUE) 262 | 263 | p.CD31 264 | ``` 265 | 266 | ## Final plot 267 | 268 | ```{r figure3, fig.width=7, fig.height=4.5} 269 | A <- p.UMIcountsPerCondition + theme(legend.key.width=unit(0.3,"cm"), 270 | legend.key.height=unit(0.4,"cm"), 271 | legend.text=element_text(size=unit(5,"pt")), 272 | plot.margin=unit(c(0.3,0,0,0),"cm")) 273 | 274 | B1 <- p.markerByConc + theme(text = element_text(size=10), 275 | plot.margin=unit(c(0.3,0,1,0),"cm"), 276 | legend.position="none") 277 | B2 <- p.UMIcountsPerMarker + theme(legend.position="none") 278 | C <- p.UMIinExpressingCells + theme(legend.position="none") 279 | 280 | BC.legend <- cowplot::get_legend(p.UMIcountsPerMarker + 281 | theme(legend.position="bottom", 282 | legend.direction="horizontal", 283 | legend.background=element_blank(), 284 | legend.box.background=element_blank(), 285 | legend.key=element_blank(), 286 | legend.key.height=unit(2,"mm"))) 287 | 288 | D <- p.CD31 + theme(plot.margin=unit(c(0.5,0,0,0),"cm")) 289 | 290 | AD <- cowplot::plot_grid(A,D,NULL, 291 | ncol=1, 292 | rel_heights = c(14,16,1.5), 293 | labels=c("A","D",""), 294 | label_size=panel.label_size, 295 | vjust=panel.label_vjust, 296 | hjust=panel.label_hjust) 297 | 298 | BC <- cowplot::plot_grid(B1, B2, C, 299 | nrow=1, 300 | rel_widths=c(2,10,10), 301 | align="h", 302 | axis="tb", 303 | labels=c("B", "", "C"), 304 | label_size=panel.label_size, 305 | vjust=panel.label_vjust, 306 | hjust=panel.label_hjust) 307 | 308 | p.final <- cowplot::ggdraw(plot_grid(AD, BC, nrow=1, rel_widths=c(1.2,4), align="v", axis="l")) + 309 | cowplot::draw_plot(BC.legend,0.27,0.023,0.2,0.00001) 310 | 311 | png(file=file.path(outdir,"Supplementary Figure S5.png"), 312 | width=figure.width.full, 313 | height=4.7, 314 | units = figure.unit, 315 | res=figure.resolution, 316 | antialias=figure.antialias) 317 | 318 | p.final 319 | 320 | dev.off() 321 | 322 | p.final 323 | ``` 324 | 325 | ## Individual titration plots 326 | 327 | For supplementary information. 328 | 329 | ```{r suppFig1, fig.width=7, fig.height=10, eval=FALSE} 330 | plots.columns = 6 331 | rows.max <- 5 332 | 333 | markers <- abpanel[rownames(object[["ADT.kallisto"]]),] 334 | markers <- markers[order(markers$Category, markers$Marker),] 335 | 336 | plots <- list() 337 | 338 | ## Make individual plots for each marker 339 | for(i in 1:nrow(markers)){ 340 | curMarker <- markers[i,] 341 | curMarker.name <- curMarker$Marker 342 | y.axis <- ifelse((i-1) %in% c(0,6,12,18,24,30,36,42,48),TRUE,FALSE) 343 | plots[[curMarker.name]] <- titrationPlot(curMarker.name, y.axis=y.axis) 344 | } 345 | 346 | # a bit of a hack to make celltype legend 347 | p.legend <- cowplot::get_legend(ggplot(data.frame(supercluster=object$supercluster), 348 | aes(color=supercluster,x=1,y=1)) + 349 | geom_point(shape=15, size=1.5) + 350 | scale_color_manual(values=color.supercluster) + 351 | theme(legend.title=element_blank(), 352 | legend.margin=margin(0,0,0,0), 353 | legend.key.size = unit(0.15,"cm"), 354 | legend.position = c(0.98,1.1), 355 | legend.justification=c(1,1), 356 | legend.direction="horizontal")) 357 | 358 | plots.num <- length(plots) 359 | plots.perPage <- plots.columns*rows.max 360 | plots.pages <- ceiling(plots.num/plots.perPage) 361 | 362 | ## Make a supplementary figure split into pages 363 | for(i in 1:plots.pages){ 364 | start <- (i-1)*plots.perPage+1 365 | end <- i*plots.perPage 366 | end <- min(end,plots.num) 367 | curPlots <- c(start:end) 368 | plots.rows <- ceiling(length(curPlots)/plots.columns) 369 | 370 | curPlots <- cowplot::plot_grid(plotlist=plots[curPlots],ncol=plots.columns, rel_widths=c(1.1,1,1,1,1,1), align="h", axis="tb") 371 | curPlots.layout <- cowplot::plot_grid(NULL, p.legend, curPlots, vjust=-0.5, hjust=panel.label_hjust, label_size=panel.label_size, ncol=1, rel_heights= c(0.5, 1.3, 70/5*plots.rows)) 372 | 373 | png(file=file.path(outdir,paste0("Supplementary Figure X",LETTERS[i],".png")), 374 | units=figure.unit, 375 | res=figure.resolution, 376 | width=figure.width.full, 377 | height=(2*plots.rows), 378 | antialias=figure.antialias) 379 | 380 | print(curPlots.layout) 381 | 382 | dev.off() 383 | 384 | print(curPlots.layout) 385 | } 386 | ``` 387 | -------------------------------------------------------------------------------- /Volume titration.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "CITE-seq optimization - Staining volume titration" 3 | author: "Terkild Brink Buus" 4 | date: "30/3/2020" 5 | output: github_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(warning=FALSE, message=FALSE) 10 | options(stringsAsFactors=FALSE) 11 | ``` 12 | 13 | ## Load utilities 14 | 15 | Including libraries, plotting and color settings and custom utility functions 16 | 17 | ```{r loadLibraries, results='hide', message=FALSE, warning=FALSE} 18 | set.seed(114) 19 | require("Seurat", quietly=T) 20 | require("tidyverse", quietly=T) 21 | library("Matrix", quietly=T) 22 | library("patchwork", quietly=T) 23 | 24 | ## Load ggplot theme and defaults 25 | source("R/ggplot_settings.R") 26 | 27 | ## Load helper functions 28 | source("R/Utilities.R") 29 | 30 | ## Load predefined color schemes 31 | source("R/color.R") 32 | 33 | ## Load feature_rankplot functions 34 | source("R/feature_rankplot.R") 35 | source("R/feature_rankplot_hist.R") 36 | source("R/feature_rankplot_hist_custom.R") 37 | 38 | outdir <- "figures" 39 | data.Seurat <- "data/5P-CITE-seq_Titration.rds" 40 | data.abpanel <- "data/Supplementary_Table_1.xlsx" 41 | data.markerStats <- "data/markerByClusterStats.tsv" 42 | 43 | ## Make a custom function for formatting the concentration scale 44 | scaleFUNformat <- function(x) sprintf("%.2f", x) 45 | ``` 46 | 47 | ## Load Seurat object 48 | 49 | Subset to only focus on conditions with 1 mio cells and dilution factor 4 (thus comparing 50µl to 25µl staining volume in PBMCs). 50 | 51 | ```{r loadSeurat} 52 | object <- readRDS(file=data.Seurat) 53 | 54 | ## Show number of cells from each sample 55 | table(object$group) 56 | 57 | object <- subset(object, subset=dilution == "DF4" & cellsAtStaining == "1000k") 58 | object 59 | ``` 60 | 61 | ## Load Ab panel annotation and concentrations 62 | 63 | Marker stats is reused in other comparisons and was calculated in the end of the preprocessing vignette. 64 | 65 | ```{r loadABPanel} 66 | abpanel <- data.frame(readxl::read_excel(data.abpanel)) 67 | rownames(abpanel) <- abpanel$Marker 68 | 69 | ## As we are only working with dilution factor 4 samples here, we want to show labels accordingly 70 | # a bit of a hack... 71 | abpanel$conc_µg_per_mL <- abpanel$conc_µg_per_mL/4 72 | 73 | markerStats <- read.table(data.markerStats) 74 | markerStats.PBMC <- markerStats[markerStats$tissue == "PBMC",] 75 | rownames(markerStats) <- paste(markerStats$marker,markerStats$tissue,sep="_") 76 | 77 | ## Make a ordering vector ordering markers per concentration and total UMI count 78 | marker.order <- markerStats.PBMC$marker[order(markerStats.PBMC$conc_µg_per_mL, markerStats.PBMC$UMItotal, decreasing=TRUE)] 79 | 80 | head(abpanel) 81 | head(markerStats) 82 | ``` 83 | 84 | ## Cell type and tissue overview 85 | 86 | Make tSNE plots colored by cell type, cluster and tissue of origin. 87 | 88 | ```{r tsnePlots, fig.height=3, fig.width=7} 89 | p.tsne.volume <- DimPlot(object, group.by="volume", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap(~"Volume") + scale_color_manual(values=color.volume) 90 | 91 | p.tsne.cluster <- DimPlot(object, group.by="supercluster", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + scale_color_manual(values=color.supercluster) + facet_wrap(~"Cell types") 92 | 93 | p.tsne.finecluster <- DimPlot(object, label=TRUE, label.size=3, reduction="tsne", group.by="fineCluster", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap( ~"Clusters") + guides(col=F) 94 | 95 | p.tsne.cluster + p.tsne.finecluster + p.tsne.volume 96 | ``` 97 | 98 | ## Overall ADT counts 99 | 100 | Extract UMI data and calculate UMI sum per marker within each condition. 101 | 102 | ```{r calculateUMIcountsPerMarker} 103 | ## Get the data 104 | ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts")) 105 | ADT.matrix$marker <- rownames(ADT.matrix) 106 | ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"] 107 | ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc)) 108 | 109 | ## Get cell annotations 110 | cell.annotation <- FetchData(object, vars=c("volume")) 111 | 112 | ## Calculate marker sum from each dilution within both tissues 113 | ADT.matrix.agg <- ADT.matrix %>% group_by(volume=cell.annotation[name,"volume"], marker, conc) %>% summarise(sum=sum(value)) 114 | 115 | ## Order markers by concentration 116 | ADT.matrix.agg$marker.byConc <- factor(ADT.matrix.agg$marker, levels=marker.order) 117 | 118 | ## Extract marker annotation 119 | ann.markerConc <- abpanel[marker.order,] 120 | ann.markerConc$Marker <- factor(marker.order, levels=marker.order) 121 | 122 | ADT.matrix.agg.total <- ADT.matrix.agg 123 | ``` 124 | 125 | ## Plot overall ADT counts by conditions 126 | 127 | Samples stained with diluted Ab panel have reduced ADT counts. 128 | 129 | ```{r UMIcountsPerCondition, fig.width=2.5, fig.height=2} 130 | 131 | p.UMIcountsPerCondition <- ggplot(ADT.matrix.agg.total[order(-ADT.matrix.agg$conc, -ADT.matrix.agg$sum),], aes(x=volume, y=sum/10^6, fill=conc)) + 132 | geom_bar(stat="identity", col=alpha(col="black",alpha=0.05)) + 133 | scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) + 134 | scale_y_continuous(expand=c(0,0,0,0.05)) + 135 | labs(fill="DF4\nµg/mL", y=bquote("ADT UMI counts ("~10^6~")")) + 136 | guides(fill=guide_colourbar(reverse=T)) + 137 | theme(panel.grid.major=element_blank(), axis.title.x=element_blank(), panel.border=element_blank(), axis.line = element_line(), legend.position="right") 138 | 139 | p.UMIcountsPerCondition 140 | ``` 141 | 142 | ## Compare total UMI counts per marker 143 | 144 | Plot total UMI counts for each marker at the investigated dilution factors (DF1 vs. DF4). To ease readability, we place dashed lines between each concentration. 145 | 146 | ```{r plotUMIcountsPerMarker, fig.width=4.5, fig.height=5} 147 | ## Calculate "breaks" where concentration change. 148 | lines <- length(marker.order)-cumsum(sapply(split(ann.markerConc$Marker,ann.markerConc$conc_µg_per_mL),length))+0.5 149 | lines <- data.frame(breaks=lines[-length(lines)]) 150 | 151 | ## Make a marker by concentration "heatmap" 152 | p.markerByConc <- ggplot(ann.markerConc, aes(x=1, y=Marker, fill=conc_µg_per_mL)) + 153 | geom_tile(col=alpha(col="black",alpha=0.2)) + 154 | geom_hline(data=lines,aes(yintercept=breaks), linetype="dashed", alpha=0.5) + 155 | scale_fill_viridis_c(trans="log2") + 156 | labs(fill="µg/mL") + 157 | theme_get() + 158 | theme(axis.ticks.x=element_blank(), axis.title = element_blank(), axis.text.x=element_blank(), panel.grid=element_blank(), legend.position="right", plot.margin=unit(c(0.1,0.1,0.1,0.1),"mm")) + scale_x_continuous(expand=c(0,0)) 159 | 160 | ## Make UMI counts per Marker plot 161 | p.UMIcountsPerMarker <- ggplot(ADT.matrix.agg, aes(x=marker.byConc,y=log2(sum))) + 162 | geom_line(aes(group=marker), size=1.2, color="#666666") + 163 | geom_point(aes(group=volume, fill=volume), pch=21, size=0.7) + 164 | geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 165 | scale_fill_manual(values=color.volume) + 166 | scale_y_continuous(breaks=c(9:17)) + 167 | ylab("log2(UMI sum)") + 168 | guides(fill=guide_legend(override.aes=list(size=1.5), reverse=TRUE)) + 169 | theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="bottom", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm"), legend.title=element_blank()) + 170 | coord_flip() 171 | 172 | ## Combine plot with markerByConc annotation heatmap 173 | plotUMIcountsPerMarker <- p.markerByConc + guides(fill=F) + p.UMIcountsPerMarker + guides(fill=F) + plot_spacer() + guide_area() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect') 174 | 175 | plotUMIcountsPerMarker 176 | ``` 177 | 178 | ## Compare change in UMI/cell within expressing cluster 179 | 180 | Using a specific percentile may be prone to outliers in small clusters (i.e. the 90th percentile of a cluster of 30 will be the #3 higest cell making it prone to outliers). We thus set a threshold of the value to only be the 90th percentile if cluster contains more than 100 cells. For smaller clusters, the median is used. Expressing cluster is identified in the "preprocessing" vignette. 181 | 182 | ```{r UMIinExpressingCells, fig.width=4.5, fig.height=5} 183 | ## Get the data 184 | ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts")) 185 | ADT.matrix$marker <- rownames(ADT.matrix) 186 | ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"] 187 | ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc)) 188 | 189 | ## Get cell annotations 190 | cell.annotation <- FetchData(object, vars=c("volume", "fineCluster")) 191 | 192 | ## Calculate marker statistics from each dilution within each cluster 193 | ADT.matrix.agg <- ADT.matrix %>% group_by(volume=cell.annotation[name,"volume"], fineCluster=cell.annotation[name,"fineCluster"], marker, conc) %>% summarise(sum=sum(value), median=quantile(value, probs=c(0.9)), nth=nth(value)) 194 | ADT.matrix.agg$tissue == "PBMC" 195 | 196 | ## Use data for the previously determined expressing cluster. 197 | Cluster.max <- markerStats[markerStats$tissue == "PBMC",c("marker","fineCluster")] 198 | Cluster.max$fineCluster <- factor(Cluster.max$fineCluster) 199 | 200 | ADT.matrix.aggByClusterMax <- Cluster.max %>% left_join(ADT.matrix.agg) 201 | ADT.matrix.aggByClusterMax$marker.byConc <- factor(ADT.matrix.aggByClusterMax$marker, levels=marker.order) 202 | 203 | p.UMIinExpressingCells <- ggplot(ADT.matrix.aggByClusterMax, aes(x=marker.byConc, y=log2(nth))) + 204 | geom_line(aes(group=marker), size=1.2, color="#666666") + 205 | geom_point(aes(group=volume, fill=volume), pch=21, size=0.7) + 206 | geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 207 | geom_text(aes(label=paste0(fineCluster," ")), y=Inf, adj=1, size=1.5) + 208 | scale_fill_manual(values=color.volume) + 209 | scale_y_continuous(breaks=c(0:11), labels=2^c(0:11), expand=c(0.05,0.5)) + 210 | ylab("90th percentile UMI of expressing cluster") + 211 | theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="right", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm")) + 212 | coord_flip() 213 | 214 | ## Combine plot with markerByConc annotation heatmap 215 | UMIinExpressingCells <- p.markerByConc + theme(legend.position="none") + p.UMIinExpressingCells + theme(legend.position="none") + plot_spacer() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect') 216 | 217 | UMIinExpressingCells 218 | ``` 219 | 220 | ## Titration examples 221 | 222 | Most markers are largely unaffected by reducing staining volume. However, some antibodies used at low concentrations and targeting abundant epitopes are affected, an example of such is CD31: 223 | 224 | ```{r fig.width=1.4, fig.height=2.3} 225 | ## Make helper function for plotting titration plots 226 | titrationPlot <- function(marker, gate.PBMC=NULL, gate.Lung=NULL, y.axis=FALSE, show.gate=TRUE, legend=FALSE){ 227 | curMarker.name <- marker 228 | 229 | ## Get antibody concentration for legends 230 | curMarker.DF1conc <- abpanel[curMarker.name, "conc_µg_per_mL"] 231 | if(show.gate==TRUE){ 232 | ## Load gating percentages from manually set DSB thresholds 233 | gate <- data.frame(gate=markerStats[markerStats$marker == curMarker.name & markerStats$tissue== "PBMC",c("pct")]) 234 | gate$gate <- 1-(gate$gate/100) 235 | rownames(gate) <- gate$wrap 236 | ## Allow manual gating 237 | if(!is.null(gate.PBMC)) gate <- gate.PBMC 238 | } else { 239 | gate <- NULL 240 | } 241 | 242 | p <- feature_rankplot_hist_custom(data=object, 243 | marker=paste0("adt_",curMarker.name), 244 | group="volume", 245 | barcodeGroup="supercluster", 246 | conc=curMarker.DF1conc, 247 | legend=legend, 248 | yaxis.text=y.axis, 249 | gates=gate, 250 | histogram.colors=color.volume, 251 | title=curMarker.name) 252 | 253 | return(p) 254 | } 255 | 256 | p.CD31 <- titrationPlot("CD31", legend=TRUE) 257 | 258 | p.CD31 259 | ``` 260 | 261 | ## tSNE plots 262 | 263 | Make tSNE plots with raw UMI counts. Use rainbow color scheme to show dynamic range in expression levels. 264 | 265 | ```{r, fig.height=2, fig.width=7} 266 | show_tsne_markers <- c("CD31","CD8") 267 | f.tsne.format <- function(x){ 268 | x + 269 | scale_color_gradientn(colours = c("#000033","#3333FF","#3377FF","#33AAFF","#33CC33","orange","red"), 270 | limits=c(0,NA)) + 271 | scale_y_continuous(expand=c(0,0,0.05,0), limits=c(-45.52796,37.94770)) + 272 | xlim(c(-40.83170,49.63832)) + 273 | theme_get() + 274 | theme(plot.title=element_text(size=7, face="bold", hjust=0.5), 275 | plot.background=element_blank(), 276 | panel.background=element_blank(), 277 | axis.title=element_blank(), 278 | axis.text.x=element_blank(), 279 | axis.text.y=element_blank(), 280 | legend.key.width=unit(3,"mm"), 281 | legend.key.height=unit(2,"mm"), 282 | legend.position=c(1,-0.03), 283 | legend.justification=c(1,0), 284 | legend.background=element_blank(), 285 | legend.direction="horizontal") 286 | } 287 | 288 | maximum <- apply(FetchData(object, vars=paste0("adt_",show_tsne_markers), slot="counts"),2,quantile,probs=c(0.95)) 289 | 290 | p.tsne.1 <- f.tsne.format(FeaturePlot(subset(object, subset=volume=="25µl"), reduction="tsne", sort=TRUE, combine=FALSE, features=paste0("adt_",show_tsne_markers[1]), slot="counts", max.cutoff=maximum[1], pt.size=0.1)[[1]]) 291 | p.tsne.2 <- f.tsne.format(FeaturePlot(subset(object, subset=volume=="50µl"), reduction="tsne", sort=TRUE, combine=FALSE, features=paste0("adt_",show_tsne_markers[1]), slot="counts", max.cutoff=maximum[1], pt.size=0.1)[[1]]) 292 | p.tsne.3 <- f.tsne.format(FeaturePlot(subset(object, subset=volume=="25µl"), reduction="tsne", sort=TRUE, combine=FALSE, features=paste0("adt_",show_tsne_markers[2]), slot="counts", max.cutoff=maximum[2], pt.size=0.1)[[1]]) 293 | p.tsne.4 <- f.tsne.format(FeaturePlot(subset(object, subset=volume=="50µl"), reduction="tsne", sort=TRUE, combine=FALSE, features=paste0("adt_",show_tsne_markers[2]), slot="counts", max.cutoff=maximum[2], pt.size=0.1)[[1]]) 294 | 295 | p.tsne <- list(p.tsne.1 + ggtitle("25µl"),p.tsne.2 + ggtitle("50µl"),p.tsne.3 + ggtitle("25µl"),p.tsne.4 + ggtitle("50µl")) 296 | ## Get common y-axis label 297 | p.tsne[[1]] <- p.tsne[[1]] + theme(axis.title.y=element_text()) 298 | # a bit of a hack to get a common x-axis label 299 | p.tsne[[2]] <- p.tsne[[2]] + theme(axis.title.x=element_text(hjust=1.2)) 300 | 301 | p.UMI.tsne <- cowplot::plot_grid(plotlist=p.tsne, 302 | align="h", 303 | axis="tb", 304 | nrow=1, 305 | rel_widths=c(1.05,1,1,1), 306 | labels=c("E",show_tsne_markers[1],"F",show_tsne_markers[2]), 307 | label_size=panel.label_size, 308 | vjust=panel.label_vjust, 309 | hjust=c(panel.label_hjust,0.5,panel.label_hjust,0.5)) 310 | 311 | p.UMI.tsne 312 | ``` 313 | 314 | ## Final plot 315 | 316 | ```{r figure3, fig.width=7, fig.height=6} 317 | A <- p.UMIcountsPerCondition + theme(legend.key.width=unit(0.3,"cm"), 318 | legend.key.height=unit(0.4,"cm"), 319 | legend.text=element_text(size=unit(5,"pt")), 320 | plot.margin=unit(c(0.3,0,0.5,0),"cm")) 321 | 322 | B1 <- p.markerByConc + theme(text = element_text(size=10), 323 | plot.margin=unit(c(0.3,0,0,0),"cm"), 324 | legend.position="none") 325 | B2 <- p.UMIcountsPerMarker + theme(legend.position="none") 326 | C <- p.UMIinExpressingCells + theme(legend.position="none") 327 | 328 | BC.legend <- cowplot::get_legend(p.UMIcountsPerMarker + 329 | theme(legend.position="bottom", 330 | legend.direction="horizontal", 331 | legend.background=element_blank(), 332 | legend.box.background=element_blank(), legend.key=element_blank())) 333 | 334 | D <- p.CD31 + theme(plot.margin=unit(c(0.5,0,0,0),"cm")) 335 | 336 | AD <- cowplot::plot_grid(A,D,NULL, 337 | ncol=1, 338 | rel_heights = c(13,17,1.5), 339 | labels=c("A","D",""), 340 | label_size=panel.label_size, 341 | vjust=panel.label_vjust, 342 | hjust=panel.label_hjust) 343 | 344 | BC <- cowplot::plot_grid(B1, B2, C, 345 | nrow=1, 346 | rel_widths=c(2,10,10), 347 | align="h", 348 | axis="tb", 349 | labels=c("B", "", "C"), 350 | label_size=panel.label_size, 351 | vjust=panel.label_vjust, 352 | hjust=panel.label_hjust) 353 | 354 | p.figure <- cowplot::plot_grid(cowplot::ggdraw(plot_grid(AD, BC, 355 | nrow=1, 356 | rel_widths=c(1,4), 357 | align="v", 358 | axis="l")) + 359 | cowplot::draw_plot(BC.legend,0.27,0.020,0.2,0.00001), 360 | p.UMI.tsne, rel_heights=c(3,1.35), align="v", axis="lr", ncol=1) 361 | 362 | 363 | png(file=file.path(outdir,"Figure 3.png"), 364 | width=figure.width.full, 365 | height=6, 366 | units = figure.unit, 367 | res=figure.resolution, 368 | antialias=figure.antialias) 369 | 370 | p.figure 371 | 372 | dev.off() 373 | 374 | p.figure 375 | ``` 376 | 377 | ## Individual titration plots 378 | 379 | For supplementary information. 380 | 381 | ```{r suppFig1, fig.width=7, fig.height=10} 382 | plots.columns = 6 383 | rows.max <- 5 384 | 385 | markers <- abpanel[rownames(object[["ADT.kallisto"]]),] 386 | markers <- markers[order(markers$Category, markers$Marker),] 387 | 388 | plots <- list() 389 | 390 | ## Make individual plots for each marker 391 | for(i in 1:nrow(markers)){ 392 | curMarker <- markers[i,] 393 | curMarker.name <- curMarker$Marker 394 | y.axis <- ifelse((i-1) %in% c(0,6,12,18,24,30,36,42,48),TRUE,FALSE) 395 | plots[[curMarker.name]] <- titrationPlot(curMarker.name, y.axis=y.axis) 396 | } 397 | 398 | # a bit of a hack to make celltype legend 399 | p.legend <- cowplot::get_legend(ggplot(data.frame(supercluster=object$supercluster), 400 | aes(color=supercluster,x=1,y=1)) + 401 | geom_point(shape=15, size=1.5) + 402 | scale_color_manual(values=color.supercluster) + 403 | theme(legend.title=element_blank(), 404 | legend.margin=margin(0,0,0,0), 405 | legend.key.size = unit(0.15,"cm"), 406 | legend.position = c(0.98,1.1), 407 | legend.justification=c(1,1), 408 | legend.direction="horizontal")) 409 | 410 | plots.num <- length(plots) 411 | plots.perPage <- plots.columns*rows.max 412 | plots.pages <- ceiling(plots.num/plots.perPage) 413 | 414 | ## Make a supplementary figure split into pages 415 | for(i in 1:plots.pages){ 416 | start <- (i-1)*plots.perPage+1 417 | end <- i*plots.perPage 418 | end <- min(end,plots.num) 419 | curPlots <- c(start:end) 420 | plots.rows <- ceiling(length(curPlots)/plots.columns) 421 | 422 | curPlots <- cowplot::plot_grid(plotlist=plots[curPlots],ncol=plots.columns, rel_widths=c(1.1,1,1,1,1,1), align="h", axis="tb") 423 | curPlots.layout <- cowplot::plot_grid(NULL, p.legend, curPlots, vjust=-0.5, hjust=panel.label_hjust, label_size=panel.label_size, ncol=1, rel_heights= c(0.5, 1.3, 70/5*plots.rows)) 424 | 425 | png(file=file.path(outdir,paste0("Supplementary Figure 3",LETTERS[i],".png")), 426 | units=figure.unit, 427 | res=figure.resolution, 428 | width=figure.width.full, 429 | height=(2*plots.rows), 430 | antialias=figure.antialias) 431 | 432 | print(curPlots.layout) 433 | 434 | dev.off() 435 | 436 | print(curPlots.layout) 437 | } 438 | ``` 439 | -------------------------------------------------------------------------------- /Volume-and-cell-number-titration_files/figure-gfm/UMIcountsPerCondition-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-and-cell-number-titration_files/figure-gfm/UMIcountsPerCondition-1.png -------------------------------------------------------------------------------- /Volume-and-cell-number-titration_files/figure-gfm/UMIinExpressingCells-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-and-cell-number-titration_files/figure-gfm/UMIinExpressingCells-1.png -------------------------------------------------------------------------------- /Volume-and-cell-number-titration_files/figure-gfm/figure3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-and-cell-number-titration_files/figure-gfm/figure3-1.png -------------------------------------------------------------------------------- /Volume-and-cell-number-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-and-cell-number-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png -------------------------------------------------------------------------------- /Volume-and-cell-number-titration_files/figure-gfm/tsnePlots-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-and-cell-number-titration_files/figure-gfm/tsnePlots-1.png -------------------------------------------------------------------------------- /Volume-and-cell-number-titration_files/figure-gfm/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-and-cell-number-titration_files/figure-gfm/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /Volume-titration_files/figure-gfm/UMIcountsPerCondition-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/UMIcountsPerCondition-1.png -------------------------------------------------------------------------------- /Volume-titration_files/figure-gfm/UMIinExpressingCells-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/UMIinExpressingCells-1.png -------------------------------------------------------------------------------- /Volume-titration_files/figure-gfm/figure3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/figure3-1.png -------------------------------------------------------------------------------- /Volume-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/plotUMIcountsPerMarker-1.png -------------------------------------------------------------------------------- /Volume-titration_files/figure-gfm/suppFig1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/suppFig1-1.png -------------------------------------------------------------------------------- /Volume-titration_files/figure-gfm/suppFig1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/suppFig1-2.png -------------------------------------------------------------------------------- /Volume-titration_files/figure-gfm/tsnePlots-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/tsnePlots-1.png -------------------------------------------------------------------------------- /Volume-titration_files/figure-gfm/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /Volume-titration_files/figure-gfm/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/Volume-titration_files/figure-gfm/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /data/5P-CITE-seq_Titration.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/data/5P-CITE-seq_Titration.rds -------------------------------------------------------------------------------- /data/5P-CITE-seq_Titration_full.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/data/5P-CITE-seq_Titration_full.rds -------------------------------------------------------------------------------- /data/Supplementary_Table_1.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/data/Supplementary_Table_1.xlsx -------------------------------------------------------------------------------- /data/markerByClusterStats.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/data/markerByClusterStats.tsv -------------------------------------------------------------------------------- /figures/Figure 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Figure 1.png -------------------------------------------------------------------------------- /figures/Figure 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Figure 2.png -------------------------------------------------------------------------------- /figures/Figure 3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Figure 3.png -------------------------------------------------------------------------------- /figures/Figure 4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Figure 4.png -------------------------------------------------------------------------------- /figures/Figure 5 wMule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Figure 5 wMule.png -------------------------------------------------------------------------------- /figures/Figure 5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Figure 5.png -------------------------------------------------------------------------------- /figures/Figure 6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Figure 6.png -------------------------------------------------------------------------------- /figures/Supplementary Fig S5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Fig S5.png -------------------------------------------------------------------------------- /figures/Supplementary Figure 2A.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 2A.png -------------------------------------------------------------------------------- /figures/Supplementary Figure 2B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 2B.png -------------------------------------------------------------------------------- /figures/Supplementary Figure 2C.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 2C.png -------------------------------------------------------------------------------- /figures/Supplementary Figure 2D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 2D.png -------------------------------------------------------------------------------- /figures/Supplementary Figure 2E.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 2E.png -------------------------------------------------------------------------------- /figures/Supplementary Figure 3A.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 3A.png -------------------------------------------------------------------------------- /figures/Supplementary Figure 3B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 3B.png -------------------------------------------------------------------------------- /figures/Supplementary Figure 4A.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 4A.png -------------------------------------------------------------------------------- /figures/Supplementary Figure 4B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure 4B.png -------------------------------------------------------------------------------- /figures/Supplementary Figure S1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure S1.png -------------------------------------------------------------------------------- /figures/Supplementary Figure S5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure S5.png -------------------------------------------------------------------------------- /figures/Supplementary Figure S6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure S6.png -------------------------------------------------------------------------------- /figures/Supplementary Figure S7A.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure S7A.png -------------------------------------------------------------------------------- /figures/Supplementary Figure S7B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure S7B.png -------------------------------------------------------------------------------- /figures/Supplementary Figure S8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/Supplementary Figure S8.png -------------------------------------------------------------------------------- /figures/review_CD8_protein_rna_correlation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/review_CD8_protein_rna_correlation.png -------------------------------------------------------------------------------- /figures/review_protein_rna_correlations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/review_protein_rna_correlations.png -------------------------------------------------------------------------------- /figures/review_washing_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Terkild/CITE-seq_optimization/1c7fcabb18a1971dc4d6e29bc3ed4f6f36b2361f/figures/review_washing_test.png --------------------------------------------------------------------------------