├── .gitattributes ├── .gitignore ├── Blood cell deconvolution.Rproj ├── LICENSE ├── scripts ├── Clustering tests │ └── PBMC clustering.Rmd ├── annotation_file.R ├── cluster annotation.Rmd ├── cluster-annotation.pdf ├── functions_classification.R ├── functions_graphics.R ├── functions_normalization.R ├── functions_utility.R ├── heatmap generator.rar ├── heatmap generator │ ├── Cell type markers.tsv │ ├── Cluster annotation.tsv │ ├── cell_type_palette.tsv │ ├── cluster_exp.tab │ ├── geninfo_92.tsv │ └── heatmap_generator.R ├── main.Rmd ├── main_HPA21.Rmd └── theme.R └── singlecell_consensus_hierarchy.tsv /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | results 3 | doc 4 | /data 5 | /results 6 | /doc 7 | data/ 8 | doc/ 9 | results/ 10 | .RData 11 | .Rhistory 12 | .Rproj.user 13 | -------------------------------------------------------------------------------- /Blood cell deconvolution.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 maxkarlsson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scripts/Clustering tests/PBMC clustering.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Untitled" 3 | author: "Max J. Karlsson" 4 | date: "2020 M02 26" 5 | output: pdf_document 6 | editor_options: 7 | chunk_output_type: console 8 | --- 9 | 10 | ```{r setup, include=FALSE, echo = FALSE, message = FALSE, warning = FALSE} 11 | 12 | library(tidyverse) 13 | library(magrittr) 14 | library(umap) 15 | library(pcaMethods) 16 | library(pheatmap) 17 | library(MOFA) 18 | library(MultiAssayExperiment) 19 | library(MOFAdata) 20 | 21 | blood_cell_category <- 22 | read_delim("data/bloodcells_hpa_category_92.tsv", delim = "\t") 23 | 24 | 25 | 26 | data_barcodes <- read_delim("data/sc/PBMC/GSM3087619_DTM-X_PBMC_live_barcodes.tsv", delim = "\t", 27 | col_names = "barcode") 28 | data_ensg_id <- read_delim("data/sc/PBMC/GSM3087619_DTM-X_PBMC_live_genes.tsv", delim = "\t", 29 | col_names = c("ensg_id", "gene_name")) 30 | 31 | pbmc_data <- Matrix::readMM("data/sc/PBMC/GSM3087619_DTM-X_PBMC_live_matrix.mtx") 32 | 33 | enriched_genes <- 34 | blood_cell_category %>% 35 | # filter(specificity_category %in% c("Tissue enriched")) %>% 36 | filter(specificity_category %in% c("Tissue enriched", "Group enriched", "Tissue enhanced")) %>% 37 | filter(ensg_id %in% data_ensg_id$ensg_id) 38 | 39 | enriched_genes %>% 40 | separate_rows(enhanced_tissues, sep = ",") %>% 41 | group_by(enhanced_tissues, specificity_category) %>% 42 | summarise(n = n()) %>% 43 | ggplot(aes(enhanced_tissues, n, fill = specificity_category)) + 44 | geom_col() + 45 | geom_text(aes(label = n), 46 | position = "stack", 47 | hjust = 0) + 48 | coord_flip() + 49 | scale_fill_manual(values = gene_category_pal) + 50 | stripped_theme_facet 51 | 52 | 53 | 54 | pbmc_data_filtered <- 55 | pbmc_data %>% 56 | as.matrix() %>% 57 | t() %>% 58 | as_tibble() %>% 59 | set_colnames(data_ensg_id$ensg_id) %>% 60 | select(enriched_genes$ensg_id) %>% 61 | mutate(cell_id = data_barcodes$barcode) %>% 62 | select(cell_id, everything()) 63 | 64 | pbmc_data_long <- 65 | pbmc_data_filtered %>% 66 | gather(ensg_id, count, -1) 67 | 68 | 69 | 70 | cell_stats <- 71 | pbmc_data_long %>% 72 | group_by(cell_id) %>% 73 | summarise(n = sum(count), 74 | fract_0 = length(which(count == 0)) / length(count), 75 | max_count = max(count)) 76 | 77 | gene_stats <- 78 | pbmc_data_long %>% 79 | group_by(ensg_id) %>% 80 | summarise(n = sum(count), 81 | fract_0 = length(which(count == 0)) / length(count), 82 | max_count = max(count)) 83 | 84 | cell_stats %>% 85 | ggplot(aes(n)) + 86 | geom_histogram() + 87 | scale_x_log10() 88 | 89 | cell_stats %>% 90 | ggplot(aes(fract_0)) + 91 | geom_histogram() + 92 | scale_x_log10() 93 | 94 | gene_stats %>% 95 | ggplot(aes(n)) + 96 | geom_histogram() + 97 | scale_x_log10() 98 | 99 | gene_stats %>% 100 | ggplot(aes(fract_0)) + 101 | geom_histogram() + 102 | scale_x_log10() 103 | 104 | filtered_cells <- 105 | cell_stats %>% 106 | filter(n >= 100 & fract_0 < 0.99) 107 | 108 | filtered_genes <- 109 | gene_stats %>% 110 | filter(fract_0 < 0.99 & max_count > 1, n >= 10) 111 | 112 | top_enriched_genes <- 113 | enriched_genes %>% 114 | filter(specificity_category == "Tissue enriched") %>% 115 | left_join(gene_stats) %T>% 116 | {g <- ggplot(., aes(enhanced_tissues, max_count)) + 117 | geom_violin() + 118 | coord_flip() + 119 | scale_y_log10();print(g)} 120 | 121 | # pbmc_data_filtered %>% 122 | # # filter(cell_id %in% filtered_cells$cell_id) %>% 123 | # column_to_rownames("cell_id") %>% 124 | # {log10(. + 1)} %>% 125 | # head(100) %>% 126 | # pheatmap(color = heatmap_palette, 127 | # clustering) 128 | ``` 129 | 130 | 131 | ```{r} 132 | 133 | cell_pca <- 134 | pbmc_data_filtered %>% 135 | select(1, filtered_genes$ensg_id) %>% 136 | # filter(cell_id %in% filtered_cells$cell_id) %>% 137 | column_to_rownames("cell_id") %>% 138 | {log10(. + 1)} %>% 139 | pca_calc(npcs = 1000) 140 | 141 | cell_pca$scores %>% 142 | as_tibble(rownames = "cell_id") %>% 143 | ggplot(aes(PC1, PC2)) + 144 | geom_hex(aes(fill = stat(log10(count))), 145 | bins = 100) + 146 | scale_fill_viridis_c() 147 | 148 | cell_expression_summary <- 149 | pbmc_data_long %>% 150 | filter(ensg_id %in% top_enriched_genes$ensg_id) %>% 151 | inner_join(top_enriched_genes %>% 152 | select(ensg_id, enhanced_tissues)) %>% 153 | mutate(log_count = log10(count + 1)) %>% 154 | # head(100000) %>% 155 | group_by(ensg_id) %>% 156 | mutate(mean_count = mean(log_count), 157 | sd_count = sd(log_count), 158 | z_score = (log_count-mean_count)/sd_count) %>% 159 | ungroup() %>% 160 | filter(sd_count > 0) %>% 161 | group_by(enhanced_tissues, cell_id) %>% 162 | summarise(max_score = max(z_score, na.rm = T), 163 | mean_score = mean(z_score, na.rm = T)) %>% 164 | ungroup() 165 | 166 | 167 | 168 | plot_data <- 169 | cell_pca$scores %>% 170 | as_tibble(rownames = "cell_id") %>% 171 | left_join(cell_expression_summary) 172 | 173 | plot_data %>% 174 | # filter(enhanced_tissues %in% c("neutrophil", "T-reg")) %>% 175 | ggplot(aes(PC1, PC2, z = max_count)) + 176 | stat_summary_hex(fun = "max") + 177 | facet_wrap(~enhanced_tissues) + 178 | scale_fill_viridis_c() 179 | ##### 180 | 181 | cell_umap <- 182 | cell_pca$scores[, 1:100] %>% 183 | umap_calc(npcs = 2) 184 | 185 | 186 | cell_umap$layout %>% 187 | as_tibble(rownames = "cell_id") %>% 188 | ggplot(aes(V1, V2)) + 189 | geom_hex(aes(fill = stat(log10(count))), 190 | bins = 100) + 191 | scale_fill_viridis_c() 192 | 193 | plot_data <- 194 | cell_umap$layout %>% 195 | as_tibble(rownames = "cell_id") %>% 196 | left_join(cell_expression_summary) 197 | 198 | 199 | plot_data %>% 200 | # filter(enhanced_tissues %in% c("neutrophil", "T-reg")) %>% 201 | ggplot(aes(V1, V2, z = max_score)) + 202 | stat_summary_hex(fun = "max") + 203 | facet_wrap(~enhanced_tissues) + 204 | scale_fill_viridis_c() 205 | ``` 206 | 207 | #MOFA test 208 | 209 | ```{r} 210 | 211 | data("scMT_data") 212 | exprmap <- data.frame( 213 | primary = c("Jack", "Jill", "Barbara", "Bob"), 214 | colname = c("array1", "array2", "array3", "array4"), 215 | stringsAsFactors = FALSE) 216 | 217 | methylmap <- data.frame( 218 | primary = c("Jack", "Jack", "Jill", "Barbara", "Bob"), 219 | colname = c("methyl1", "methyl2", "methyl3", "methyl4", "methyl5"), 220 | stringsAsFactors = FALSE) 221 | 222 | rnamap <- data.frame( 223 | primary = c("Jack", "Jill", "Bob", "Barbara"), 224 | colname = c("samparray1", "samparray2", "samparray3", "samparray4"), 225 | stringsAsFactors = FALSE) 226 | 227 | gistmap <- data.frame( 228 | primary = c("Jack", "Bob", "Jill"), 229 | colname = c("samp0", "samp1", "samp2"), 230 | stringsAsFactors = FALSE) 231 | 232 | ## Combine as a named list and convert to a DataFrame 233 | maplist <- list(Affy = exprmap, Methyl450k = methylmap, 234 | RNASeqGene = rnamap, GISTIC = gistmap) 235 | 236 | ## Create a sampleMap 237 | sampMap <- listToMap(maplist) 238 | ## Create an example phenotype data 239 | colDat <- data.frame(sex = c("M", "F", "M", "F"), age = 38:41, 240 | row.names = c("Jack", "Jill", "Bob", "Barbara")) 241 | 242 | ## Create a MultiAssayExperiment instance 243 | mae <- MultiAssayExperiment(experiments = ExpList, colData = colDat, 244 | sampleMap = sampMap) 245 | 246 | 247 | MultiAssayExperiment(colData = pbmc_data_filtered %>% 248 | column_to_rownames("cell_id"), 249 | sampleMap = pbmc_data_filtered %>% 250 | select(1) %>% column_to_rownames("cell_id") 251 | ) 252 | 253 | cell.data <- 254 | pbmc_data_filtered %>% 255 | select(1) %>% 256 | mutate(mock = 1) %>% 257 | column_to_rownames("cell_id") 258 | 259 | 260 | 261 | 262 | PBMC_MAE <- 263 | MultiAssayExperiment(experiments = list("TMT_proteomics" = pbmc_data_filtered %>% 264 | column_to_rownames("cell_id") %>% 265 | 266 | t(), 267 | colData=cell.data)) 268 | 269 | PBMC_MOFA <- createMOFAobject(PBMC_MAE) 270 | 271 | plotDataOverview(PBMC_MOFA) 272 | 273 | DataOptions <- getDefaultDataOptions() 274 | ModelOptions <- getDefaultModelOptions(PBMC_MOFA) 275 | TrainOptions <- getDefaultTrainOptions() 276 | TrainOptions$seed <- 2018 277 | 278 | PBMC_MOFA <- 279 | prepareMOFA(PBMC_MOFA, 280 | DataOptions = DataOptions, 281 | ModelOptions = ModelOptions, 282 | TrainOptions = TrainOptions) 283 | 284 | PBMC_MOFA <- runMOFA(PBMC_MOFA) 285 | 286 | 287 | ``` 288 | 289 | -------------------------------------------------------------------------------- /scripts/annotation_file.R: -------------------------------------------------------------------------------- 1 | 2 | cluster_annotation <- 3 | c("Axillary lymph nodes_Cluster-0" = "Endothelium", 4 | "Axillary lymph nodes_Cluster-1" = "Endothelium", 5 | "Axillary lymph nodes_Cluster-2" = "Endothelium", 6 | "Axillary lymph nodes_Cluster-3" = "Endothelium", 7 | "Axillary lymph nodes_Cluster-4" = "Endothelium", 8 | "Axillary lymph nodes_Cluster-5" = "Endothelium", 9 | "Axillary lymph nodes_Cluster-6" = "Endothelium", 10 | "Axillary lymph nodes_Cluster-7" = "Endothelium", 11 | "Axillary lymph nodes_Cluster-8" = "Endothelium", 12 | "Axillary lymph nodes_Cluster-9" = "Endothelium", 13 | "Axillary lymph nodes_Cluster-10" = "Endothelium", 14 | "Axillary lymph nodes_Cluster-11" = "Endothelium", 15 | "Axillary lymph nodes_Cluster-12" = "B-cell", 16 | "Axillary lymph nodes_Cluster-13" = "Endothelium", 17 | 18 | "Breast_Cluster-0" = "Epithelium", 19 | "Breast_Cluster-1" = "Epithelium", 20 | "Breast_Cluster-2" = "Epithelium", 21 | "Breast_Cluster-3" = "Epithelium", 22 | "Breast_Cluster-4" = "Epithelium", 23 | "Breast_Cluster-5" = "Epithelium", 24 | "Breast_Cluster-6" = "Epithelium", 25 | "Breast_Cluster-7" = "Epithelium", 26 | "Breast_Cluster-8" = "Epithelium", 27 | "Breast_Cluster-9" = "Epithelium", 28 | "Breast_Cluster-10" = "Epithelium", 29 | "Breast_Cluster-11" = "Epithelium", 30 | "Breast_Cluster-12" = "Epithelium", 31 | "Breast_Cluster-13" = "Epithelium", 32 | "Breast_Cluster-14" = "Epithelium", 33 | "Breast_Cluster-15" = "Epithelium", 34 | "Breast_Cluster-16" = "Unknown", 35 | "Breast_Cluster-17" = "Unknown", 36 | 37 | "Colon_Cluster-0" = "Epithelium", 38 | "Colon_Cluster-1" = "Epithelium", 39 | "Colon_Cluster-2" = "Epithelium", 40 | "Colon_Cluster-3" = "Epithelium", 41 | "Colon_Cluster-4" = "Epithelium", 42 | "Colon_Cluster-5" = "Epithelium", 43 | "Colon_Cluster-6" = "Epithelium", 44 | "Colon_Cluster-7" = "Epithelium", 45 | "Colon_Cluster-8" = "Epithelium", 46 | "Colon_Cluster-9" = "Epithelium", 47 | "Colon_Cluster-10" = "Epithelium", 48 | 49 | "Colon 2_Cluster-0" = "Epithelium", 50 | "Colon 2_Cluster-1" = "Epithelium", 51 | "Colon 2_Cluster-2" = "Epithelium", 52 | "Colon 2_Cluster-3" = "Epithelium", 53 | "Colon 2_Cluster-4" = "T/NK-cell", 54 | "Colon 2_Cluster-5" = "Epithelium", 55 | "Colon 2_Cluster-6" = "Epithelium", 56 | "Colon 2_Cluster-7" = "Epithelium", 57 | "Colon 2_Cluster-8" = "Epithelium", 58 | "Colon 2_Cluster-9" = "Epithelium", 59 | "Colon 2_Cluster-10" = "Epithelium", 60 | "Colon 2_Cluster-11" = "Epithelium", 61 | "Colon 2_Cluster-12" = "B-cell", 62 | "Colon 2_Cluster-13" = "Epithelium", 63 | "Colon 2_Cluster-14" = "Epithelium", 64 | "Colon 2_Cluster-15" = "Basophil", 65 | 66 | "Eyes_Cluster-0" = "Unknown", 67 | "Eyes_Cluster-1" = "Unknown", 68 | "Eyes_Cluster-2" = "Unknown", 69 | "Eyes_Cluster-3" = "Unknown", 70 | "Eyes_Cluster-4" = "Unknown", 71 | "Eyes_Cluster-5" = "Unknown", 72 | "Eyes_Cluster-6" = "Unknown", 73 | "Eyes_Cluster-7" = "Unknown", 74 | "Eyes_Cluster-8" = "Unknown", 75 | "Eyes_Cluster-9" = "Unknown", 76 | "Eyes_Cluster-10" = "Unknown", 77 | "Eyes_Cluster-11" = "Endothelium", 78 | 79 | "Eyes macula_Cluster-0" = "T-cell", 80 | "Eyes macula_Cluster-1" = "Unknown", 81 | "Eyes macula_Cluster-2" = "Unknown", 82 | "Eyes macula_Cluster-3" = "Unknown", 83 | "Eyes macula_Cluster-4" = "Unknown", 84 | "Eyes macula_Cluster-5" = "Unknown", 85 | "Eyes macula_Cluster-6" = "Unknown", 86 | "Eyes macula_Cluster-7" = "Unknown", 87 | "Eyes macula_Cluster-8" = "Monocyte", 88 | "Eyes macula_Cluster-9" = "Unknown", 89 | "Eyes macula_Cluster-10" = "B-cell", 90 | "Eyes macula_Cluster-11" = "Unknown", 91 | "Eyes macula_Cluster-12" = "Endothelium", 92 | 93 | "Eyes peripheral_Cluster-0" = "Unknown", 94 | "Eyes peripheral_Cluster-1" = "Unknown", 95 | "Eyes peripheral_Cluster-2" = "Unknown", 96 | "Eyes peripheral_Cluster-3" = "Unknown", 97 | "Eyes peripheral_Cluster-4" = "T/NK-cell", 98 | "Eyes peripheral_Cluster-5" = "Unknown", 99 | "Eyes peripheral_Cluster-6" = "Unknown", 100 | "Eyes peripheral_Cluster-7" = "Endothelium", 101 | "Eyes peripheral_Cluster-8" = "B/DC", 102 | "Eyes peripheral_Cluster-9" = "Unknown", 103 | "Eyes peripheral_Cluster-10" = "Basophil", 104 | "Eyes peripheral_Cluster-11" = "Unknown", 105 | 106 | "Head and neck lymph nodes_Cluster-0" = "Endothelium", 107 | "Head and neck lymph nodes_Cluster-1" = "Endothelium", 108 | "Head and neck lymph nodes_Cluster-2" = "Endothelium", 109 | "Head and neck lymph nodes_Cluster-3" = "Endothelium", 110 | "Head and neck lymph nodes_Cluster-4" = "Endothelium", 111 | "Head and neck lymph nodes_Cluster-5" = "Endothelium", 112 | "Head and neck lymph nodes_Cluster-6" = "Endothelium", 113 | "Head and neck lymph nodes_Cluster-7" = "Endothelium", 114 | "Head and neck lymph nodes_Cluster-8" = "Endothelium", 115 | "Head and neck lymph nodes_Cluster-9" = "Endothelium", 116 | "Head and neck lymph nodes_Cluster-10" = "Endothelium", 117 | "Head and neck lymph nodes_Cluster-11" = "Endothelium", 118 | 119 | "Heart_Cluster-0" = "Unknown", 120 | "Heart_Cluster-1" = "Unknown", 121 | "Heart_Cluster-2" = "Unknown", 122 | "Heart_Cluster-3" = "Endothelium", 123 | "Heart_Cluster-4" = "Endothelium/DC", 124 | "Heart_Cluster-5" = "Endothelium", 125 | "Heart_Cluster-6" = "Endothelium", 126 | "Heart_Cluster-7" = "Unknown", 127 | "Heart_Cluster-8" = "Unknown", 128 | "Heart_Cluster-9" = "Unknown", 129 | 130 | "Ileum_Cluster-0" = "Epithelium", 131 | "Ileum_Cluster-1" = "Epithelium", 132 | "Ileum_Cluster-2" = "Epithelium", 133 | "Ileum_Cluster-3" = "Epithelium", 134 | "Ileum_Cluster-4" = "Epithelium", 135 | "Ileum_Cluster-5" = "Epithelium", 136 | "Ileum_Cluster-6" = "Epithelium", 137 | "Ileum_Cluster-7" = "Epithelium", 138 | "Ileum_Cluster-8" = "Epithelium", 139 | "Ileum_Cluster-9" = "Epithelium", 140 | "Ileum_Cluster-10" = "Epithelium", 141 | "Ileum_Cluster-11" = "Epithelium", 142 | 143 | "Kidney_Cluster-0" = "Unknown", 144 | "Kidney_Cluster-1" = "Unknown", 145 | "Kidney_Cluster-2" = "Unknown", 146 | "Kidney_Cluster-3" = "Unknown", 147 | "Kidney_Cluster-4" = "Unknown", 148 | "Kidney_Cluster-5" = "Unknown", 149 | "Kidney_Cluster-6" = "Unknown", 150 | "Kidney_Cluster-7" = "T/NK-cell", 151 | "Kidney_Cluster-8" = "Monocyte", 152 | "Kidney_Cluster-9" = "Unknown", 153 | "Kidney_Cluster-10" = "Epithelium", 154 | "Kidney_Cluster-11" = "B-cell", 155 | 156 | "Liver_Cluster-0" = "T-cell", 157 | "Liver_Cluster-1" = "Unknown", 158 | "Liver_Cluster-2" = "Monocyte", 159 | "Liver_Cluster-3" = "Endothelium", 160 | "Liver_Cluster-4" = "Unknown", 161 | "Liver_Cluster-5" = "Unknown", 162 | "Liver_Cluster-6" = "Unknown", 163 | "Liver_Cluster-7" = "B-cell", 164 | "Liver_Cluster-8" = "NK-cell", 165 | "Liver_Cluster-9" = "Unknown", 166 | "Liver_Cluster-10" = "Monocyte", 167 | "Liver_Cluster-11" = "NK-cell", 168 | "Liver_Cluster-12" = "Unknown", 169 | "Liver_Cluster-13" = "Endothelium", 170 | "Liver_Cluster-14" = "B-cell", 171 | "Liver_Cluster-15" = "Epithelium", 172 | "Liver_Cluster-16" = "T-cell", 173 | "Liver_Cluster-17" = "Unknown", 174 | 175 | "Liver hep- CD45-_Cluster-0" = "Unknown", 176 | "Liver hep- CD45-_Cluster-1" = "T-cell", 177 | "Liver hep- CD45-_Cluster-2" = "Monocyte", 178 | "Liver hep- CD45-_Cluster-3" = "T-cell", 179 | "Liver hep- CD45-_Cluster-4" = "Monocyte", 180 | "Liver hep- CD45-_Cluster-5" = "Epithelium", 181 | "Liver hep- CD45-_Cluster-6" = "Endothelium", 182 | "Liver hep- CD45-_Cluster-7" = "NK-cell", 183 | "Liver hep- CD45-_Cluster-8" = "NK-cell", 184 | "Liver hep- CD45-_Cluster-9" = "Endothelium", 185 | "Liver hep- CD45-_Cluster-10" = "Endothelium", 186 | "Liver hep- CD45-_Cluster-11" = "Unknown", 187 | "Liver hep- CD45-_Cluster-12" = "B-cell", 188 | "Liver hep- CD45-_Cluster-13" = "Endothelium", 189 | "Liver hep- CD45-_Cluster-14" = "B-cell", 190 | "Liver hep- CD45-_Cluster-15" = "Unknown", 191 | 192 | "Liver hep- CD45+_Cluster-0" = "NK-cell", 193 | "Liver hep- CD45+_Cluster-1" = "T-cell", 194 | "Liver hep- CD45+_Cluster-2" = "T-cell", 195 | "Liver hep- CD45+_Cluster-3" = "Monocyte", 196 | "Liver hep- CD45+_Cluster-4" = "T-cell", 197 | "Liver hep- CD45+_Cluster-5" = "NK-cell", 198 | "Liver hep- CD45+_Cluster-6" = "Monocyte", 199 | "Liver hep- CD45+_Cluster-7" = "T-cell", 200 | "Liver hep- CD45+_Cluster-8" = "Monocyte", 201 | "Liver hep- CD45+_Cluster-9" = "B-cell", 202 | "Liver hep- CD45+_Cluster-10" = "T-cell", 203 | "Liver hep- CD45+_Cluster-11" = "Endothelium", 204 | "Liver hep- CD45+_Cluster-12" = "T/NK-cell", 205 | "Liver hep- CD45+_Cluster-13" = "Dendritic cell", 206 | "Liver hep- CD45+_Cluster-14" = "B/T-cell", 207 | 208 | "Lung_Cluster-0" = "Monocyte", 209 | "Lung_Cluster-1" = "Endothelium", 210 | "Lung_Cluster-2" = "Monocyte", 211 | "Lung_Cluster-3" = "B/T-cell", 212 | "Lung_Cluster-4" = "Granulocytes", 213 | "Lung_Cluster-5" = "Epithelium", 214 | "Lung_Cluster-6" = "Granulocytes", 215 | "Lung_Cluster-7" = "Epithelium", 216 | "Lung_Cluster-8" = "Epithelium", 217 | "Lung_Cluster-9" = "Endothelium", 218 | "Lung_Cluster-10" = "Epithelium", 219 | 220 | "Muscle_Cluster-0" = "Endothelium", 221 | "Muscle_Cluster-1" = "Unknown", 222 | "Muscle_Cluster-2" = "Unknown", 223 | "Muscle_Cluster-3" = "B/T/DC", 224 | "Muscle_Cluster-4" = "Unknown", 225 | "Muscle_Cluster-5" = "Endothelium", 226 | "Muscle_Cluster-6" = "Unknown", 227 | "Muscle_Cluster-7" = "Unknown", 228 | 229 | "NK cells blood_Cluster-0" = "NK-cell", 230 | "NK cells blood_Cluster-1" = "NK-cell", 231 | "NK cells blood_Cluster-2" = "NK-cell", 232 | "NK cells blood_Cluster-3" = "NK-cell", 233 | "NK cells blood_Cluster-4" = "NK-cell", 234 | "NK cells blood_Cluster-5" = "NK-cell", 235 | 236 | "NK cells bone marrow_Cluster-0" = "NK-cell", 237 | "NK cells bone marrow_Cluster-1" = "NK-cell", 238 | "NK cells bone marrow_Cluster-2" = "NK-cell", 239 | "NK cells bone marrow_Cluster-3" = "NK-cell", 240 | "NK cells bone marrow_Cluster-4" = "NK-cell", 241 | "NK cells bone marrow_Cluster-5" = "NK-cell", 242 | "NK cells bone marrow_Cluster-6" = "NK-cell", 243 | "NK cells bone marrow_Cluster-7" = "NK-cell", 244 | "NK cells bone marrow_Cluster-8" = "Dendritic cell", 245 | 246 | "PBMCs_Cluster-0" = "Monocyte", 247 | "PBMCs_Cluster-1" = "T-cell", 248 | "PBMCs_Cluster-2" = "T-cell", 249 | "PBMCs_Cluster-3" = "T-cell", 250 | "PBMCs_Cluster-4" = "T-cell", 251 | "PBMCs_Cluster-5" = "Unknown", 252 | "PBMCs_Cluster-6" = "T-cell", 253 | "PBMCs_Cluster-7" = "Monocyte", 254 | "PBMCs_Cluster-8" = "B-cell", 255 | "PBMCs_Cluster-9" = "Monocyte", 256 | "PBMCs_Cluster-10" = "Dendritic cell", 257 | "PBMCs_Cluster-11" = "Unknown", 258 | 259 | "Placenta_Cluster-0" = "Unknown", 260 | "Placenta_Cluster-1" = "Unknown", 261 | "Placenta_Cluster-2" = "Unknown", 262 | "Placenta_Cluster-3" = "Unknown", 263 | "Placenta_Cluster-4" = "Unknown", 264 | "Placenta_Cluster-5" = "Unknown", 265 | "Placenta_Cluster-6" = "Trophoblast", 266 | "Placenta_Cluster-7" = "Unknown", 267 | "Placenta_Cluster-8" = "Unknown", 268 | "Placenta_Cluster-9" = "Trophoblast", 269 | "Placenta_Cluster-10" = "Unknown", 270 | "Placenta_Cluster-11" = "Epithelium", 271 | "Placenta_Cluster-12" = "Unknown", 272 | "Placenta_Cluster-13" = "Unknown", 273 | "Placenta_Cluster-14" = "Unknown", 274 | "Placenta_Cluster-15" = "Unknown", 275 | "Placenta_Cluster-16" = "Unknown", 276 | "Placenta_Cluster-17" = "Unknown", 277 | "Placenta_Cluster-18" = "Unknown", 278 | "Placenta_Cluster-19" = "Endothelium", 279 | "Placenta_Cluster-20" = "Epithelium", 280 | 281 | "Placenta blood_Cluster-0" = "T-cell", 282 | "Placenta blood_Cluster-1" = "T-cell", 283 | "Placenta blood_Cluster-2" = "T/NK-cell", 284 | "Placenta blood_Cluster-3" = "T-cell", 285 | "Placenta blood_Cluster-4" = "T-cell", 286 | "Placenta blood_Cluster-5" = "Monocyte", 287 | "Placenta blood_Cluster-6" = "T-cell", 288 | "Placenta blood_Cluster-7" = "Monocyte", 289 | "Placenta blood_Cluster-8" = "T/NK-cell", 290 | "Placenta blood_Cluster-9" = "T-cell/Granulocyte", 291 | "Placenta blood_Cluster-10" = "Dendritic cell", 292 | "Placenta blood_Cluster-11" = "Monocyte", 293 | 294 | "Prostate_Cluster-0" = "Epithelium", 295 | "Prostate_Cluster-1" = "Epithelium", 296 | "Prostate_Cluster-2" = "Epithelium", 297 | "Prostate_Cluster-3" = "Unknown", 298 | "Prostate_Cluster-4" = "Epithelium", 299 | "Prostate_Cluster-5" = "Endothelium", 300 | "Prostate_Cluster-6" = "Unknown", 301 | "Prostate_Cluster-7" = "Epithelium", 302 | "Prostate_Cluster-8" = "Epithelium", 303 | "Prostate_Cluster-9" = "Epithelium", 304 | "Prostate_Cluster-10" = "Epithelium", 305 | "Prostate_Cluster-11" = "Unknown", 306 | "Prostate_Cluster-12" = "Epithelium", 307 | "Prostate_Cluster-13" = "Endothelium", 308 | "Prostate_Cluster-14" = "Unknown", 309 | "Prostate_Cluster-15" = "Epithelium", 310 | 311 | "Prostate 2_Cluster-0" = "Epithelium", 312 | "Prostate 2_Cluster-1" = "Endothelium", 313 | "Prostate 2_Cluster-2" = "Epithelium", 314 | "Prostate 2_Cluster-3" = "Unknown", 315 | "Prostate 2_Cluster-4" = "Epithelium", 316 | "Prostate 2_Cluster-5" = "Unknown", 317 | "Prostate 2_Cluster-6" = "Epithelium", 318 | "Prostate 2_Cluster-7" = "Epithelium", 319 | "Prostate 2_Cluster-8" = "Epithelium", 320 | "Prostate 2_Cluster-9" = "Endothelium", 321 | "Prostate 2_Cluster-10" = "Epithelium", 322 | "Prostate 2_Cluster-11" = "Endothelium", 323 | "Prostate 2_Cluster-12" = "Epithelium", 324 | 325 | "Prostate 3_Cluster-0" = "Epithelium", 326 | "Prostate 3_Cluster-1" = "Epithelium", 327 | "Prostate 3_Cluster-2" = "Epithelium", 328 | "Prostate 3_Cluster-3" = "Epithelium", 329 | "Prostate 3_Cluster-4" = "Epithelium", 330 | "Prostate 3_Cluster-5" = "Epithelium", 331 | "Prostate 3_Cluster-6" = "Epithelium", 332 | "Prostate 3_Cluster-7" = "Epithelium", 333 | "Prostate 3_Cluster-8" = "Epithelium", 334 | "Prostate 3_Cluster-9" = "Endothelium", 335 | "Prostate 3_Cluster-10" = "Epithelium", 336 | "Prostate 3_Cluster-11" = "Unknown", 337 | "Prostate 3_Cluster-12" = "Unknown", 338 | "Prostate 3_Cluster-13" = "Epithelium", 339 | "Prostate 3_Cluster-14" = "Epithelium", 340 | "Prostate 3_Cluster-15" = "Monocyte", 341 | "Prostate 3_Cluster-16" = "T-cell/Granulocyte", 342 | 343 | "Rectum_Cluster-0" = "Epithelium", 344 | "Rectum_Cluster-1" = "Epithelium", 345 | "Rectum_Cluster-2" = "Epithelium", 346 | "Rectum_Cluster-3" = "Epithelium", 347 | "Rectum_Cluster-4" = "Epithelium", 348 | "Rectum_Cluster-5" = "Epithelium", 349 | "Rectum_Cluster-6" = "Epithelium", 350 | "Rectum_Cluster-7" = "Epithelium", 351 | "Rectum_Cluster-8" = "Epithelium", 352 | "Rectum_Cluster-9" = "Epithelium", 353 | "Rectum_Cluster-10" = "Epithelium", 354 | "Rectum_Cluster-11" = "Epithelium", 355 | 356 | "Testis_Cluster-0" = "Unknown", 357 | "Testis_Cluster-1" = "Unknown", 358 | "Testis_Cluster-2" = "Unknown", 359 | "Testis_Cluster-3" = "Epithelium", 360 | "Testis_Cluster-4" = "Epithelium", 361 | "Testis_Cluster-5" = "Unknown", 362 | "Testis_Cluster-6" = "Unknown", 363 | "Testis_Cluster-7" = "Epithelium", 364 | "Testis_Cluster-8" = "Endothelium", 365 | "Testis_Cluster-9" = "Endothelium", 366 | "Testis_Cluster-10" = "Unknown", 367 | "Testis_Cluster-11" = "Epithelium", 368 | "Testis_Cluster-12" = "Epithelium", 369 | "Testis_Cluster-13" = "Unknown", 370 | "Testis_Cluster-14" = "Unknown", 371 | "Testis_Cluster-15" = "Monocyte/DC", 372 | "Testis_Cluster-16" = "Endothelium", 373 | "Testis_Cluster-17" = "Unknown", 374 | "Testis_Cluster-18" = "Unknown", 375 | "Testis_Cluster-19" = "Unknown", 376 | "Testis_Cluster-20" = "Unknown", 377 | 378 | "Testis 2_Cluster-0" = "Unknown", 379 | "Testis 2_Cluster-1" = "Endothelium", 380 | "Testis 2_Cluster-2" = "Unknown", 381 | "Testis 2_Cluster-3" = "Unknown", 382 | "Testis 2_Cluster-4" = "Unknown", 383 | "Testis 2_Cluster-5" = "Unknown", 384 | "Testis 2_Cluster-6" = "Epithelium", 385 | "Testis 2_Cluster-7" = "Unknown", 386 | "Testis 2_Cluster-8" = "Unknown", 387 | "Testis 2_Cluster-9" = "Monocyte", 388 | "Testis 2_Cluster-10" = "Endothelium", 389 | "Testis 2_Cluster-11" = "Unknown", 390 | "Testis 2_Cluster-12" = "Unknown", 391 | "Testis 2_Cluster-13" = "Unknown", 392 | "Testis 2_Cluster-14" = "Unknown", 393 | "Testis 2_Cluster-15" = "Unknown", 394 | "Testis 2_Cluster-16" = "Unknown", 395 | 396 | "Testis 3_Cluster-0" = "Unknown", 397 | "Testis 3_Cluster-1" = "Unknown", 398 | "Testis 3_Cluster-2" = "Unknown", 399 | "Testis 3_Cluster-3" = "Endothelium", 400 | "Testis 3_Cluster-4" = "Monocyte", 401 | "Testis 3_Cluster-5" = "Unknown", 402 | "Testis 3_Cluster-6" = "Unknown", 403 | "Testis 3_Cluster-7" = "Epithelium", 404 | "Testis 3_Cluster-8" = "Unknown", 405 | "Testis 3_Cluster-9" = "Unknown", 406 | "Testis 3_Cluster-10" = "Unknown", 407 | "Testis 3_Cluster-11" = "Unknown", 408 | "Testis 3_Cluster-12" = "Endothelium", 409 | "Testis 3_Cluster-13" = "T-cell") %>% 410 | enframe("cluster", "cluster_annotation") %>% 411 | separate(cluster, into = c("dataset", "cluster_id"), sep = "_", remove = F) %>% 412 | mutate(celltype = case_when(cluster_annotation %in% c("Too few cells", "Unknown") ~ "Unknown", 413 | T ~ cluster_annotation)) %>% 414 | group_by(celltype) %>% 415 | mutate(unique_cluster_id = paste(celltype, row_number())) %>% 416 | ungroup() %>% 417 | left_join(c("Axillary lymph nodes_Cluster-0" = "", 418 | "Axillary lymph nodes_Cluster-1" = "", 419 | "Axillary lymph nodes_Cluster-2" = "", 420 | "Axillary lymph nodes_Cluster-3" = "", 421 | "Axillary lymph nodes_Cluster-4" = "", 422 | "Axillary lymph nodes_Cluster-5" = "", 423 | "Axillary lymph nodes_Cluster-6" = "", 424 | "Axillary lymph nodes_Cluster-7" = "", 425 | "Axillary lymph nodes_Cluster-8" = "", 426 | "Axillary lymph nodes_Cluster-9" = "", 427 | "Axillary lymph nodes_Cluster-10" = "", 428 | "Axillary lymph nodes_Cluster-11" = "", 429 | "Axillary lymph nodes_Cluster-12" = "", 430 | "Axillary lymph nodes_Cluster-13" = "", 431 | 432 | "Breast_Cluster-0" = "", 433 | "Breast_Cluster-1" = "", 434 | "Breast_Cluster-2" = "", 435 | "Breast_Cluster-3" = "", 436 | "Breast_Cluster-4" = "", 437 | "Breast_Cluster-5" = "", 438 | "Breast_Cluster-6" = "", 439 | "Breast_Cluster-7" = "", 440 | "Breast_Cluster-8" = "", 441 | "Breast_Cluster-9" = "", 442 | "Breast_Cluster-10" = "", 443 | "Breast_Cluster-11" = "", 444 | "Breast_Cluster-12" = "", 445 | "Breast_Cluster-13" = "", 446 | "Breast_Cluster-14" = "", 447 | "Breast_Cluster-15" = "Maybe immune cell mix", 448 | "Breast_Cluster-16" = "", 449 | "Breast_Cluster-17" = "", 450 | 451 | "Colon_Cluster-0" = "", 452 | "Colon_Cluster-1" = "", 453 | "Colon_Cluster-2" = "", 454 | "Colon_Cluster-3" = "", 455 | "Colon_Cluster-4" = "", 456 | "Colon_Cluster-5" = "", 457 | "Colon_Cluster-6" = "", 458 | "Colon_Cluster-7" = "", 459 | "Colon_Cluster-8" = "", 460 | "Colon_Cluster-9" = "", 461 | "Colon_Cluster-10" = "", 462 | 463 | "Colon 2_Cluster-0" = "", 464 | "Colon 2_Cluster-1" = "", 465 | "Colon 2_Cluster-2" = "", 466 | "Colon 2_Cluster-3" = "", 467 | "Colon 2_Cluster-4" = "", 468 | "Colon 2_Cluster-5" = "", 469 | "Colon 2_Cluster-6" = "", 470 | "Colon 2_Cluster-7" = "", 471 | "Colon 2_Cluster-8" = "", 472 | "Colon 2_Cluster-9" = "Possibly monocytes (CD14)", 473 | "Colon 2_Cluster-10" = "Possibly monocytes (CD14)", 474 | "Colon 2_Cluster-11" = "", 475 | "Colon 2_Cluster-12" = "Maybe also some monocytes", 476 | "Colon 2_Cluster-13" = "", 477 | "Colon 2_Cluster-14" = "", 478 | "Colon 2_Cluster-15" = "", 479 | 480 | "Eyes_Cluster-0" = "", 481 | "Eyes_Cluster-1" = "", 482 | "Eyes_Cluster-2" = "", 483 | "Eyes_Cluster-3" = "", 484 | "Eyes_Cluster-4" = "", 485 | "Eyes_Cluster-5" = "", 486 | "Eyes_Cluster-6" = "", 487 | "Eyes_Cluster-7" = "", 488 | "Eyes_Cluster-8" = "", 489 | "Eyes_Cluster-9" = "", 490 | "Eyes_Cluster-10" = "", 491 | "Eyes_Cluster-11" = "", 492 | 493 | "Eyes macula_Cluster-0" = "", 494 | "Eyes macula_Cluster-1" = "", 495 | "Eyes macula_Cluster-2" = "", 496 | "Eyes macula_Cluster-3" = "", 497 | "Eyes macula_Cluster-4" = "", 498 | "Eyes macula_Cluster-5" = "", 499 | "Eyes macula_Cluster-6" = "", 500 | "Eyes macula_Cluster-7" = "", 501 | "Eyes macula_Cluster-8" = "Or monocyte", 502 | "Eyes macula_Cluster-9" = "", 503 | "Eyes macula_Cluster-10" = "", 504 | "Eyes macula_Cluster-11" = "", 505 | "Eyes macula_Cluster-12" = "", 506 | 507 | "Eyes peripheral_Cluster-0" = "", 508 | "Eyes peripheral_Cluster-1" = "", 509 | "Eyes peripheral_Cluster-2" = "", 510 | "Eyes peripheral_Cluster-3" = "", 511 | "Eyes peripheral_Cluster-4" = "", 512 | "Eyes peripheral_Cluster-5" = "", 513 | "Eyes peripheral_Cluster-6" = "", 514 | "Eyes peripheral_Cluster-7" = "", 515 | "Eyes peripheral_Cluster-8" = "B-cell and monocyte mix?", 516 | "Eyes peripheral_Cluster-9" = "", 517 | "Eyes peripheral_Cluster-10" = "", 518 | "Eyes peripheral_Cluster-11" = "", 519 | 520 | "Head and neck lymph nodes_Cluster-0" = "", 521 | "Head and neck lymph nodes_Cluster-1" = "", 522 | "Head and neck lymph nodes_Cluster-2" = "", 523 | "Head and neck lymph nodes_Cluster-3" = "", 524 | "Head and neck lymph nodes_Cluster-4" = "Not sure at all", 525 | "Head and neck lymph nodes_Cluster-5" = "", 526 | "Head and neck lymph nodes_Cluster-6" = "", 527 | "Head and neck lymph nodes_Cluster-7" = "", 528 | "Head and neck lymph nodes_Cluster-8" = "", 529 | "Head and neck lymph nodes_Cluster-9" = "", 530 | "Head and neck lymph nodes_Cluster-10" = "Not sure at all", 531 | "Head and neck lymph nodes_Cluster-11" = "Not sure at all", 532 | 533 | "Heart_Cluster-0" = "", 534 | "Heart_Cluster-1" = "", 535 | "Heart_Cluster-2" = "", 536 | "Heart_Cluster-3" = "", 537 | "Heart_Cluster-4" = "Maybe mixed with immune cells", 538 | "Heart_Cluster-5" = "", 539 | "Heart_Cluster-6" = "", 540 | "Heart_Cluster-7" = "", 541 | "Heart_Cluster-8" = "", 542 | "Heart_Cluster-9" = "", 543 | 544 | "Ileum_Cluster-0" = "", 545 | "Ileum_Cluster-1" = "", 546 | "Ileum_Cluster-2" = "", 547 | "Ileum_Cluster-3" = "", 548 | "Ileum_Cluster-4" = "", 549 | "Ileum_Cluster-5" = "", 550 | "Ileum_Cluster-6" = "", 551 | "Ileum_Cluster-7" = "", 552 | "Ileum_Cluster-8" = "", 553 | "Ileum_Cluster-9" = "", 554 | "Ileum_Cluster-10" = "", 555 | "Ileum_Cluster-11" = "", 556 | 557 | "Kidney_Cluster-0" = "", 558 | "Kidney_Cluster-1" = "", 559 | "Kidney_Cluster-2" = "", 560 | "Kidney_Cluster-3" = "", 561 | "Kidney_Cluster-4" = "", 562 | "Kidney_Cluster-5" = "", 563 | "Kidney_Cluster-6" = "", 564 | "Kidney_Cluster-7" = "", 565 | "Kidney_Cluster-8" = "", 566 | "Kidney_Cluster-9" = "", 567 | "Kidney_Cluster-10" = "", 568 | "Kidney_Cluster-11" = "", 569 | 570 | "Liver_Cluster-0" = "", 571 | "Liver_Cluster-1" = "", 572 | "Liver_Cluster-2" = "", 573 | "Liver_Cluster-3" = "", 574 | "Liver_Cluster-4" = "", 575 | "Liver_Cluster-5" = "", 576 | "Liver_Cluster-6" = "", 577 | "Liver_Cluster-7" = "", 578 | "Liver_Cluster-8" = "", 579 | "Liver_Cluster-9" = "", 580 | "Liver_Cluster-10" = "", 581 | "Liver_Cluster-11" = "", 582 | "Liver_Cluster-12" = "", 583 | "Liver_Cluster-13" = "", 584 | "Liver_Cluster-14" = "", 585 | "Liver_Cluster-15" = "", 586 | "Liver_Cluster-16" = "", 587 | "Liver_Cluster-17" = "", 588 | 589 | "Liver hep- CD45-_Cluster-0" = "", 590 | "Liver hep- CD45-_Cluster-1" = "", 591 | "Liver hep- CD45-_Cluster-2" = "", 592 | "Liver hep- CD45-_Cluster-3" = "", 593 | "Liver hep- CD45-_Cluster-4" = "", 594 | "Liver hep- CD45-_Cluster-5" = "", 595 | "Liver hep- CD45-_Cluster-6" = "", 596 | "Liver hep- CD45-_Cluster-7" = "", 597 | "Liver hep- CD45-_Cluster-8" = "", 598 | "Liver hep- CD45-_Cluster-9" = "", 599 | "Liver hep- CD45-_Cluster-10" = "", 600 | "Liver hep- CD45-_Cluster-11" = "", 601 | "Liver hep- CD45-_Cluster-12" = "", 602 | "Liver hep- CD45-_Cluster-13" = "", 603 | "Liver hep- CD45-_Cluster-14" = "", 604 | "Liver hep- CD45-_Cluster-15" = "", 605 | 606 | "Liver hep- CD45+_Cluster-0" = "", 607 | "Liver hep- CD45+_Cluster-1" = "", 608 | "Liver hep- CD45+_Cluster-2" = "", 609 | "Liver hep- CD45+_Cluster-3" = "", 610 | "Liver hep- CD45+_Cluster-4" = "", 611 | "Liver hep- CD45+_Cluster-5" = "", 612 | "Liver hep- CD45+_Cluster-6" = "", 613 | "Liver hep- CD45+_Cluster-7" = "", 614 | "Liver hep- CD45+_Cluster-8" = "", 615 | "Liver hep- CD45+_Cluster-9" = "", 616 | "Liver hep- CD45+_Cluster-10" = "", 617 | "Liver hep- CD45+_Cluster-11" = "", 618 | "Liver hep- CD45+_Cluster-12" = "", 619 | "Liver hep- CD45+_Cluster-13" = "", 620 | "Liver hep- CD45+_Cluster-14" = "", 621 | 622 | "Lung_Cluster-0" = "", 623 | "Lung_Cluster-1" = "", 624 | "Lung_Cluster-2" = "", 625 | "Lung_Cluster-3" = "Could contain granulocytes too", 626 | "Lung_Cluster-4" = "", 627 | "Lung_Cluster-5" = "", 628 | "Lung_Cluster-6" = "", 629 | "Lung_Cluster-7" = "", 630 | "Lung_Cluster-8" = "", 631 | "Lung_Cluster-9" = "", 632 | "Lung_Cluster-10" = "", 633 | 634 | "Muscle_Cluster-0" = "", 635 | "Muscle_Cluster-1" = "", 636 | "Muscle_Cluster-2" = "", 637 | "Muscle_Cluster-3" = "", 638 | "Muscle_Cluster-4" = "", 639 | "Muscle_Cluster-5" = "", 640 | "Muscle_Cluster-6" = "", 641 | "Muscle_Cluster-7" = "", 642 | 643 | "NK cells blood_Cluster-0" = "", 644 | "NK cells blood_Cluster-1" = "", 645 | "NK cells blood_Cluster-2" = "", 646 | "NK cells blood_Cluster-3" = "", 647 | "NK cells blood_Cluster-4" = "", 648 | "NK cells blood_Cluster-5" = "", 649 | 650 | "NK cells bone marrow_Cluster-0" = "", 651 | "NK cells bone marrow_Cluster-1" = "", 652 | "NK cells bone marrow_Cluster-2" = "", 653 | "NK cells bone marrow_Cluster-3" = "", 654 | "NK cells bone marrow_Cluster-4" = "", 655 | "NK cells bone marrow_Cluster-5" = "", 656 | "NK cells bone marrow_Cluster-6" = "", 657 | "NK cells bone marrow_Cluster-7" = "", 658 | "NK cells bone marrow_Cluster-8" = "", 659 | 660 | "PBMCs_Cluster-0" = "Monocyte", 661 | "PBMCs_Cluster-1" = "", 662 | "PBMCs_Cluster-2" = "Possibly MAIT T-cells - high KLRB1", 663 | "PBMCs_Cluster-3" = "", 664 | "PBMCs_Cluster-4" = "", 665 | "PBMCs_Cluster-5" = "T-cell and possibly NK-cell mix. Maybe also DC", 666 | "PBMCs_Cluster-6" = "T-cell", 667 | "PBMCs_Cluster-7" = "", 668 | "PBMCs_Cluster-8" = "", 669 | "PBMCs_Cluster-9" = "Remove - Too few cells", 670 | "PBMCs_Cluster-10" = "Probably dendritic cells based on classification. Likely plasmacytoid DC - has low CD11c (ITGAX)", 671 | "PBMCs_Cluster-11" = "Remove - Too few cells", 672 | 673 | "Placenta_Cluster-0" = "", 674 | "Placenta_Cluster-1" = "", 675 | "Placenta_Cluster-2" = "", 676 | "Placenta_Cluster-3" = "", 677 | "Placenta_Cluster-4" = "", 678 | "Placenta_Cluster-5" = "", 679 | "Placenta_Cluster-6" = "", 680 | "Placenta_Cluster-7" = "", 681 | "Placenta_Cluster-8" = "", 682 | "Placenta_Cluster-9" = "", 683 | "Placenta_Cluster-10" = "", 684 | "Placenta_Cluster-11" = "", 685 | "Placenta_Cluster-12" = "", 686 | "Placenta_Cluster-13" = "", 687 | "Placenta_Cluster-14" = "", 688 | "Placenta_Cluster-15" = "", 689 | "Placenta_Cluster-16" = "", 690 | "Placenta_Cluster-17" = "", 691 | "Placenta_Cluster-18" = "", 692 | "Placenta_Cluster-19" = "", 693 | "Placenta_Cluster-20" = "", 694 | 695 | "Placenta blood_Cluster-0" = "", 696 | "Placenta blood_Cluster-1" = "", 697 | "Placenta blood_Cluster-2" = "T-cell and possibly NK-cell mix. Maybe also DC", 698 | "Placenta blood_Cluster-3" = "", 699 | "Placenta blood_Cluster-4" = "Possibly MAIT T-cells - high KLRB1", 700 | "Placenta blood_Cluster-5" = "", 701 | "Placenta blood_Cluster-6" = "", 702 | "Placenta blood_Cluster-7" = "Monocytes and/or dendritic based on classification. Possibly Non-classical monocytes based on low CD14", 703 | "Placenta blood_Cluster-8" = "T-cell and possibly NK-cell mix. Maybe also DC", 704 | "Placenta blood_Cluster-9" = "Likely gdT-cell - has low CD8 and CD4", 705 | "Placenta blood_Cluster-10" = "Possibly dendritic cells based on classification. Likely plasmacytoid DC - has low CD11c (ITGAX)", 706 | "Placenta blood_Cluster-11" = "possibly granulocyte contamination based on classification", 707 | 708 | "Prostate_Cluster-0" = "", 709 | "Prostate_Cluster-1" = "", 710 | "Prostate_Cluster-2" = "", 711 | "Prostate_Cluster-3" = "", 712 | "Prostate_Cluster-4" = "", 713 | "Prostate_Cluster-5" = "", 714 | "Prostate_Cluster-6" = "", 715 | "Prostate_Cluster-7" = "", 716 | "Prostate_Cluster-8" = "", 717 | "Prostate_Cluster-9" = "", 718 | "Prostate_Cluster-10" = "", 719 | "Prostate_Cluster-11" = "", 720 | "Prostate_Cluster-12" = "", 721 | "Prostate_Cluster-13" = "", 722 | "Prostate_Cluster-14" = "", 723 | "Prostate_Cluster-15" = "", 724 | 725 | "Prostate 2_Cluster-0" = "", 726 | "Prostate 2_Cluster-1" = "", 727 | "Prostate 2_Cluster-2" = "", 728 | "Prostate 2_Cluster-3" = "", 729 | "Prostate 2_Cluster-4" = "", 730 | "Prostate 2_Cluster-5" = "", 731 | "Prostate 2_Cluster-6" = "", 732 | "Prostate 2_Cluster-7" = "", 733 | "Prostate 2_Cluster-8" = "", 734 | "Prostate 2_Cluster-9" = "", 735 | "Prostate 2_Cluster-10" = "", 736 | "Prostate 2_Cluster-11" = "", 737 | "Prostate 2_Cluster-12" = "", 738 | 739 | "Prostate 3_Cluster-0" = "", 740 | "Prostate 3_Cluster-1" = "", 741 | "Prostate 3_Cluster-2" = "", 742 | "Prostate 3_Cluster-3" = "", 743 | "Prostate 3_Cluster-4" = "", 744 | "Prostate 3_Cluster-5" = "", 745 | "Prostate 3_Cluster-6" = "", 746 | "Prostate 3_Cluster-7" = "", 747 | "Prostate 3_Cluster-8" = "", 748 | "Prostate 3_Cluster-9" = "", 749 | "Prostate 3_Cluster-10" = "", 750 | "Prostate 3_Cluster-11" = "", 751 | "Prostate 3_Cluster-12" = "", 752 | "Prostate 3_Cluster-13" = "", 753 | "Prostate 3_Cluster-14" = "", 754 | "Prostate 3_Cluster-15" = "", 755 | "Prostate 3_Cluster-16" = "", 756 | 757 | "Rectum_Cluster-0" = "", 758 | "Rectum_Cluster-1" = "", 759 | "Rectum_Cluster-2" = "", 760 | "Rectum_Cluster-3" = "", 761 | "Rectum_Cluster-4" = "", 762 | "Rectum_Cluster-5" = "", 763 | "Rectum_Cluster-6" = "", 764 | "Rectum_Cluster-7" = "", 765 | "Rectum_Cluster-8" = "", 766 | "Rectum_Cluster-9" = "", 767 | "Rectum_Cluster-10" = "", 768 | "Rectum_Cluster-11" = "", 769 | 770 | "Testis_Cluster-0" = "", 771 | "Testis_Cluster-1" = "", 772 | "Testis_Cluster-2" = "", 773 | "Testis_Cluster-3" = "", 774 | "Testis_Cluster-4" = "", 775 | "Testis_Cluster-5" = "", 776 | "Testis_Cluster-6" = "", 777 | "Testis_Cluster-7" = "", 778 | "Testis_Cluster-8" = "", 779 | "Testis_Cluster-9" = "", 780 | "Testis_Cluster-10" = "", 781 | "Testis_Cluster-11" = "", 782 | "Testis_Cluster-12" = "", 783 | "Testis_Cluster-13" = "", 784 | "Testis_Cluster-14" = "", 785 | "Testis_Cluster-15" = "", 786 | "Testis_Cluster-16" = "", 787 | "Testis_Cluster-17" = "", 788 | "Testis_Cluster-18" = "", 789 | "Testis_Cluster-19" = "", 790 | "Testis_Cluster-20" = "", 791 | 792 | "Testis 2_Cluster-0" = "", 793 | "Testis 2_Cluster-1" = "", 794 | "Testis 2_Cluster-2" = "", 795 | "Testis 2_Cluster-3" = "", 796 | "Testis 2_Cluster-4" = "", 797 | "Testis 2_Cluster-5" = "", 798 | "Testis 2_Cluster-6" = "", 799 | "Testis 2_Cluster-7" = "", 800 | "Testis 2_Cluster-8" = "", 801 | "Testis 2_Cluster-9" = "", 802 | "Testis 2_Cluster-10" = "", 803 | "Testis 2_Cluster-11" = "", 804 | "Testis 2_Cluster-12" = "", 805 | "Testis 2_Cluster-13" = "", 806 | "Testis 2_Cluster-14" = "", 807 | "Testis 2_Cluster-15" = "", 808 | "Testis 2_Cluster-16" = "", 809 | 810 | "Testis 3_Cluster-0" = "", 811 | "Testis 3_Cluster-1" = "", 812 | "Testis 3_Cluster-2" = "", 813 | "Testis 3_Cluster-3" = "", 814 | "Testis 3_Cluster-4" = "", 815 | "Testis 3_Cluster-5" = "", 816 | "Testis 3_Cluster-6" = "", 817 | "Testis 3_Cluster-7" = "", 818 | "Testis 3_Cluster-8" = "", 819 | "Testis 3_Cluster-9" = "", 820 | "Testis 3_Cluster-10" = "", 821 | "Testis 3_Cluster-11" = "", 822 | "Testis 3_Cluster-12" = "", 823 | "Testis 3_Cluster-13" = "") %>% 824 | enframe("cluster", "comment")) %>% 825 | mutate(dataset_i = unclass(as_factor(dataset))) 826 | 827 | 828 | cluster_annotation %>% 829 | mutate(celltype = ifelse(celltype == "Unknown", "", celltype)) %>% 830 | select(`dataset number` = dataset_i, 831 | dataset, 832 | cluster = cluster_id, 833 | `annotation 1 (Max)` = celltype, 834 | `comment 1 (Max)` = comment) %>% 835 | write_csv(savepath("20200305 Cluster annotation Max.csv")) 836 | 837 | -------------------------------------------------------------------------------- /scripts/cluster annotation.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Classification" 3 | author: "Max J. Karlsson" 4 | date: "2020 M03 3" 5 | output: pdf_document 6 | editor_options: 7 | chunk_output_type: console 8 | --- 9 | 10 | #Setup 11 | 12 | ```{r setup, include=FALSE} 13 | 14 | library(tidyverse) 15 | library(magrittr) 16 | library(umap) 17 | library(pcaMethods) 18 | library(pheatmap) 19 | library(ggalluvial) 20 | library(dendextend) 21 | library(ggdendro) 22 | library(patchwork) 23 | library(ggraph) 24 | library(igraph) 25 | 26 | setwd("C:/Users/max.karlsson/Documents/Scilifelab/Projects/Blood cell deconvolution") 27 | 28 | source("scripts/theme.R") 29 | source("scripts/functions_classification.R") 30 | source("scripts/functions_graphics.R") 31 | source("scripts/functions_utility.R") 32 | 33 | 34 | blood_cell_category <- 35 | read_delim("data/bloodcells_hpa_regional_category_92.tsv", delim = "\t") 36 | 37 | blood_cell_category_fine <- 38 | read_delim("data/bloodcells_hpa_category_92.tsv", delim = "\t") 39 | 40 | blood_cell_hierarchy <- 41 | read_delim("data/meta/blood_atlas_hierarchy.txt", delim = "\t") 42 | 43 | gene_info92 <- 44 | read_delim("data/geninfo_92.tsv", delim = "\t") 45 | 46 | CD_marker_list <- 47 | read_delim("data/20180628 CD markers ProteinAtlas.tsv", delim = "\t") 48 | 49 | 50 | 51 | datafile_list <- 52 | list.files("data/EXPresults_used_for_demo/", 53 | include.dirs = T, recursive = T) %>% 54 | enframe("file", "filepath") %>% 55 | group_by(filepath) %>% 56 | mutate(dataset = gsub("/.*", "", filepath), 57 | filename = gsub(paste0(dataset, "/"), "", filepath), 58 | 59 | filetype = case_when(filename == dataset ~ "folder", 60 | grepl("cluster2cellNames.txt$", filename) ~ "cluster names", 61 | grepl("Top100DiffExpGenesPerGroup.txt$", filename) ~ "top genes", 62 | grepl("corrMat_HPA_BLOOD", filename) ~ "blood correlation", 63 | grepl("corrMat_HPA_TISSUE", filename) ~ "tissue correlation", 64 | grepl("count_grouped.txt$", filename) ~ "counts", 65 | grepl("scEXP_cell_types_standard.txt$", filename) ~ "normalized counts")) %>% 66 | ungroup() %>% 67 | filter(dataset != "discard") 68 | 69 | datafile_list %>% 70 | group_by(filetype) %>% 71 | summarise(n = n()) 72 | 73 | all_datasets <- 74 | datafile_list %>% 75 | pull(dataset) %>% 76 | unique() 77 | 78 | load_datasets <- 79 | # c("PBMCs", "Placenta blood") 80 | all_datasets 81 | 82 | cluster_names <- 83 | datafile_list %>% 84 | filter(dataset %in% load_datasets) %>% 85 | filter(filetype == "cluster names") %>% 86 | group_by(dataset) %>% 87 | do({ 88 | read_delim(paste0("data/EXPresults_used_for_demo/", .$filepath), delim = "\t", 89 | col_names = c("cluster_id", "name")) 90 | }) %>% 91 | ungroup() %>% 92 | mutate(cluster_id = factor(cluster_id, levels = cluster_levels), 93 | dataset_i = unclass(as_factor(dataset))) 94 | 95 | cluster_top_genes <- 96 | datafile_list %>% 97 | filter(dataset %in% load_datasets) %>% 98 | filter(filetype == "top genes") %>% 99 | group_by(dataset) %>% 100 | do({ 101 | 102 | read_delim(paste0("data/EXPresults_used_for_demo/", .$filepath), delim = "\t") %>% 103 | mutate(order = row_number()) %>% 104 | gather(cluster_id, gene, -order) 105 | }) %>% 106 | ungroup() %>% 107 | mutate(cluster_id = factor(cluster_id, levels = cluster_levels)) 108 | 109 | 110 | cluster_blood_cor <- 111 | datafile_list %>% 112 | filter(dataset %in% load_datasets) %>% 113 | filter(filetype == "blood correlation") %>% 114 | group_by(dataset) %>% 115 | do({ 116 | 117 | read_delim(paste0("data/EXPresults_used_for_demo/", .$filepath), delim = "\t") %>% 118 | gather(cluster, correlation, -1) 119 | }) %>% 120 | ungroup() %>% 121 | rename(cell_type = `Blood cell types`) %>% 122 | mutate(cluster_id = gsub("\\(.*\\)", "", cluster), 123 | n_cells = gsub("\\(|\\)", "", str_extract(cluster, "\\(.*\\)$"))) %>% 124 | select(1:3, cluster_id, n_cells, correlation) %>% 125 | mutate(cluster_id = factor(cluster_id, levels = cluster_levels)) 126 | 127 | cluster_norm_count <- 128 | datafile_list %>% 129 | filter(dataset %in% load_datasets) %>% 130 | filter(filetype == "normalized counts") %>% 131 | group_by(dataset) %>% 132 | do({ 133 | 134 | read_delim(paste0("data/EXPresults_used_for_demo/", .$filepath), delim = "\t") %>% 135 | gather(cluster_id, norm_count, -1) 136 | }) %>% 137 | ungroup() %>% 138 | filter(Features != "Cell No.") %>% 139 | rename(ensg_id = Features) %>% 140 | filter(cluster_id != "TOTAL") %>% 141 | mutate(cluster_id = factor(cluster_id, levels = cluster_levels)) 142 | 143 | cluster_blood_cor %>% 144 | select(dataset, cluster_id, n_cells) %>% 145 | unique() %>% 146 | filter(cluster_id != "TOTAL") %>% 147 | ggplot(aes(cluster_id, as.numeric(n_cells), fill = as.numeric(n_cells) > 200)) + 148 | geom_col() + 149 | facet_wrap(~dataset) + 150 | coord_flip() 151 | 152 | source("scripts/annotation_file.R") 153 | cluster_annotation 154 | 155 | ``` 156 | 157 | #Basic plots 158 | 159 | ```{r} 160 | 161 | 162 | cluster_top_genes %>% 163 | left_join(cluster_annotation) %>% 164 | left_join(gene_info92 %>% 165 | select(1, 2), 166 | by = c("gene" = "gene_name")) %>% 167 | left_join(blood_cell_category) %>% 168 | separate_rows(enhanced_tissues, sep = ",") %>% 169 | mutate(enhanced_tissues = ifelse(is.na(enhanced_tissues), 170 | specificity_category, 171 | enhanced_tissues)) %>% 172 | filter(specificity_category %in% c("Tissue enriched", "Group enriched")) %>% 173 | group_by(dataset, cluster, cluster_id, unique_cluster_id, enhanced_tissues) %>% 174 | summarise(n = n()) %>% 175 | ungroup() %>% 176 | mutate(enhanced_tissues = str_to_sentence(enhanced_tissues), 177 | cluster_id = factor(cluster_id, 178 | levels = paste0("Cluster-", 0:100))) %>% 179 | ggplot(aes(unique_cluster_id, n, fill = enhanced_tissues)) + 180 | geom_col() + 181 | coord_flip() + 182 | scale_fill_manual(values = c(tissue_colors, gene_category_pal)) + 183 | facet_wrap(~dataset, scales = "free") 184 | 185 | 186 | cluster_blood_cor %>% 187 | mutate(dataset_cluster = paste(dataset, cluster_id, sep = "_")) %>% 188 | select(dataset_cluster, cell_type, correlation) %>% 189 | spread(cell_type, correlation) %>% 190 | column_to_rownames("dataset_cluster") %>% 191 | pheatmap(clustering_method = "ward.D2", 192 | cutree_cols = 6, 193 | cutree_rows = 4, 194 | annotation_row = cluster_annotation %>% 195 | select(cluster, celltype_annotation = celltype, dataset) %>% 196 | column_to_rownames("cluster"), 197 | annotation_col = blood_cell_hierarchy %>% 198 | select(content, celltype = content_l1) %>% 199 | column_to_rownames("content"), 200 | annotation_colors = list(celltype = tissue_colors, 201 | celltype_annotation = tissue_colors), 202 | annotation_legend = F) 203 | 204 | ``` 205 | 206 | ##CD marker PCA 207 | 208 | ```{r} 209 | 210 | pca_data <- 211 | cluster_norm_count %>% 212 | left_join(cluster_annotation, 213 | by = c("dataset", "cluster_id")) %>% 214 | select(unique_cluster_id, ensg_id, norm_count) %>% 215 | spread(unique_cluster_id, norm_count) %>% 216 | column_to_rownames("ensg_id") %>% 217 | {log10(. + 1)} %>% 218 | t() 219 | 220 | cluster_umap1 <- 221 | pca_data %>% 222 | umap_calc(npcs = 2, n_neighbors = 200) 223 | 224 | cluster_umap1$layout %>% 225 | as_tibble(rownames = "unique_cluster_id") %>% 226 | left_join(cluster_annotation) %>% 227 | 228 | 229 | ggplot(aes(V1, V2, color = celltype)) + 230 | 231 | stat_density_2d(data = . %>% 232 | group_by(celltype) %>% 233 | mutate(n = n()) %>% 234 | ungroup() %>% 235 | filter(n > 1) %>% 236 | filter(celltype != "Unknown"), 237 | geom = "polygon", 238 | aes(V1, V2, 239 | fill = celltype, 240 | alpha = as.factor(..level..)), 241 | inherit.aes = F, 242 | bins = 20, 243 | show.legend = F, h = c(1, 1)) + 244 | geom_point(size = 5, 245 | alpha = 0.7) + 246 | geom_text(aes(label = paste(dataset_i, str_extract(cluster_id, "\\d*$"))), 247 | color = "black") + 248 | scale_color_manual(values = tissue_colors) + 249 | scale_fill_manual(values = tissue_colors) + 250 | scale_alpha_discrete(range = c(0, 0.1)) + 251 | 252 | stripped_theme 253 | 254 | ggsave(savepath("total UMAP.pdf"), width = 10, height = 10) 255 | 256 | #### 257 | CD_pca_data <- 258 | cluster_norm_count %>% 259 | filter(ensg_id %in% CD_marker_list$Ensembl) %>% 260 | left_join(cluster_annotation, 261 | by = c("dataset", "cluster_id")) %>% 262 | select(unique_cluster_id, ensg_id, norm_count) %>% 263 | spread(unique_cluster_id, norm_count) %>% 264 | column_to_rownames("ensg_id") %>% 265 | {log10(. + 1)} %>% 266 | t() 267 | 268 | 269 | cluster_norm_spearman <- 270 | cluster_norm_count %>% 271 | left_join(cluster_annotation, 272 | by = c("dataset", "cluster_id")) %>% 273 | select(cluster, ensg_id, norm_count) %>% 274 | spread(cluster, norm_count) %>% 275 | column_to_rownames("ensg_id") %>% 276 | {log10(. + 1)} %>% 277 | cor(method = "spearman") 278 | 279 | cluster_norm_spearman %>% 280 | pheatmap(annotation_row = cluster_annotation %>% 281 | select(cluster, celltype) %>% 282 | column_to_rownames("cluster"), 283 | annotation_colors = list(celltype = tissue_colors)) 284 | 285 | cluster_CD_pca <- 286 | CD_pca_data %>% 287 | pca_calc(npcs = 10) 288 | 289 | PC1_lims <- 290 | cluster_CD_pca$scores[,"PC1"] %>% 291 | {c(min(.), max(.))} 292 | 293 | PC2_lims <- 294 | cluster_CD_pca$scores[,"PC2"] %>% 295 | {c(min(.), max(.))} 296 | 297 | cluster_CD_pca$scores %>% 298 | as_tibble(rownames = "unique_cluster_id") %>% 299 | left_join(cluster_annotation) %>% 300 | ggplot(aes(PC1, PC2, color = celltype)) + 301 | 302 | stat_density_2d(data = . %>% 303 | group_by(celltype) %>% 304 | mutate(n = n()) %>% 305 | ungroup() %>% 306 | filter(n > 1) %>% 307 | filter(celltype != "Unknown"), 308 | geom = "polygon", 309 | aes(PC1, PC2, 310 | fill = celltype, 311 | alpha = as.factor(..level..)), 312 | inherit.aes = F, 313 | bins = 50, 314 | show.legend = F, h = c(4, 4)) + 315 | 316 | # scale_x_continuous(expand = expand_scale(0.5))+ 317 | # scale_y_continuous(expand = expand_scale(0.5))+ 318 | 319 | geom_text(data = cluster_CD_pca$loadings %>% 320 | as_tibble(rownames = "ensg_id") %>% 321 | left_join(gene_info92) %>% 322 | select(1:3, gene_name) %>% 323 | mutate(PC1 = scales::rescale(PC1, PC1_lims), 324 | PC2 = scales::rescale(PC2, PC2_lims), 325 | len = sqrt(PC1^2 + PC2^2)) %>% 326 | arrange(-len) %>% 327 | head(2000), 328 | aes(PC1, PC2, label = gene_name ), 329 | inherit.aes = F, 330 | alpha = 0.4) + 331 | geom_point(size = 5, 332 | alpha = 0.7) + 333 | geom_text(aes(label = paste(dataset_i, str_extract(cluster_id, "\\d*$"))), 334 | color = "black") + 335 | scale_color_manual(values = tissue_colors) + 336 | scale_fill_manual(values = tissue_colors) + 337 | scale_alpha_discrete(range = c(0, 0.1)) + 338 | 339 | 340 | stripped_theme 341 | ggsave(savepath("All CD marker PCA.pdf"), width = 10, height = 10) 342 | 343 | 344 | cluster_CD_umap1 <- 345 | CD_pca_data %>% 346 | umap_calc(npcs = 2, n_neighbors = 200) 347 | 348 | cluster_CD_umap1$layout %>% 349 | as_tibble(rownames = "unique_cluster_id") %>% 350 | left_join(cluster_annotation) %>% 351 | ggplot(aes(V1, V2, color = celltype)) + 352 | stat_density_2d(data = . %>% 353 | group_by(celltype) %>% 354 | mutate(n = n()) %>% 355 | ungroup() %>% 356 | filter(n > 1) %>% 357 | filter(celltype != "Unknown"), 358 | geom = "polygon", 359 | aes(V1, V2, 360 | fill = celltype, 361 | alpha = as.factor(..level..)), 362 | inherit.aes = F, 363 | bins = 50, 364 | show.legend = F, h = c(2, 2)) + 365 | geom_point(size = 5, 366 | alpha = 0.7) + 367 | geom_text(aes(label = paste(dataset_i, str_extract(cluster_id, "\\d*$"))), 368 | color = "black") + 369 | scale_color_manual(values = tissue_colors) + 370 | scale_fill_manual(values = tissue_colors) + 371 | scale_alpha_discrete(range = c(0, 0.1)) + 372 | stripped_theme 373 | 374 | ggsave(savepath("All CD marker UMAP.pdf"), width = 10, height = 10) 375 | 376 | search_genes <- 377 | c("CCR3" = "Granulocytes", 378 | "CD3D" = "T-cell", 379 | "CD3E" = "T-cell", 380 | "CD4" = "T, DC, mono", 381 | "CD8A" = "T-cell", 382 | "CD8B" = "T-cell", 383 | "CD14" = "Mono, DC", 384 | "CD15" = "All", 385 | "CD19" = "B-cell", 386 | "CD27" = "B-cell, T-cell", 387 | "CD33" = "DC myeloid", 388 | "CD34" = "Endothelium", 389 | "CD38" = "Hematopoetic cells", 390 | "CDH5" = "Endothelium, immune = 0", 391 | "CD48" = "Hematopoetic cells, endo", 392 | "CEACAM8" = "Granu (+mono?)", 393 | "CLEC4G" = "Endothelium", 394 | "CR2" = "Epithelium, B-cell", 395 | "ECSCR" = "Endothelium", 396 | "EPCAM" = "Epithelium", 397 | "ERG" = "Endothelium", 398 | "ESAM" = "Endothelium", 399 | "FCGR2B" = "Endothelium", 400 | "FCN3" = "Endothelium", 401 | "GYPA" = "Erythrocyte", 402 | "GZMB" = "DC plasmacytoid", 403 | "HLA-DRA" = "B, DC, mono", 404 | 405 | 406 | 407 | 408 | 409 | 410 | "HLA-DRB1" = "B, DC, mono", 411 | "HLA-DRB5" = "B, DC, mono", 412 | "IL3RA" = "DC, Baso", 413 | "ITGAX" = "Mono, DC, NK", 414 | "JCHAIN" = "B, pDC", 415 | "KRT6A" = "Epithelium", 416 | "KRT6B" = "Epithelium", 417 | "KRT13" = "Epithelium", 418 | "KRT16" = "Epithelium", 419 | "KRT19" = "Epithelium", 420 | "KRT20" = "Epithelium", 421 | "KRT75" = "Epithelium", 422 | "KRT82" = "Epithelium", 423 | 424 | "KLRB1" = "NK, MAIT", 425 | "MCAM" = "Endothelial", 426 | "MME" = "Endoth, Neutro", 427 | "MS4A1" = "B-cell", 428 | "MZB1" = "B-cell", 429 | "NCAM1" = "NK-cell", 430 | "NRP2" = "Endothelium", 431 | "PECAM1" = "Endothelium", 432 | "POU2AF1" = "B-cell", 433 | "PTPRC" = "Hematopoetic cells", 434 | "ROBO4" = "Endothelium", 435 | "SELE" = "Endothelium (activated)", 436 | "TRBC2" = "T-cell", 437 | "VCAM1" = "Endothelium") %>% 438 | enframe("gene_name", "marker_for") %>% 439 | left_join(gene_info92 %>% select(gene_name, ensg_id)) 440 | 441 | # cluster_norm_count %>% 442 | # # filter(ensg_id %in% CD_marker_list$Ensembl) %>% 443 | # left_join(cluster_annotation) %>% 444 | # left_join(gene_info92) %>% 445 | # inner_join(search_genes) %>% 446 | # # group_by(ensg_id) %>% 447 | # # mutate(IQR = IQR(log10(norm_count + 1))) %T>% 448 | # # {g <- ggplot(., aes(IQR)) + 449 | # # geom_density(); print(g)} %>% 450 | # # ungroup() %>% 451 | # # filter(IQR > 1) %>% 452 | # # filter(gene_name == sample(gene_name, 1)) %>% 453 | # ggplot(aes(cluster, norm_count, fill = celltype)) + 454 | # geom_col() + 455 | # coord_flip() + 456 | # scale_fill_manual(values = tissue_colors) + 457 | # facet_wrap(~gene_name + marker_for, scales = "free_x") 458 | 459 | 460 | CD_pca_data2 <- 461 | cluster_norm_count %>% 462 | filter(ensg_id %in% search_genes$ensg_id) %>% 463 | left_join(cluster_annotation, 464 | by = c("dataset", "cluster_id")) %>% 465 | select(unique_cluster_id, ensg_id, norm_count) %>% 466 | group_by(ensg_id) %>% 467 | mutate(norm_count = log10(norm_count + 1), 468 | norm_count = norm_count / sqrt(sum(norm_count^2)/(length(norm_count)-1))) %>% 469 | spread(unique_cluster_id, norm_count) %>% 470 | column_to_rownames("ensg_id") %>% 471 | t() 472 | 473 | cluster_CD_pca2 <- 474 | CD_pca_data2 %>% 475 | pca_calc(npcs = 10) 476 | 477 | PC1_lims <- 478 | cluster_CD_pca2$scores[,"PC1"] %>% 479 | {c(min(.), max(.))} 480 | 481 | PC2_lims <- 482 | cluster_CD_pca2$scores[,"PC2"] %>% 483 | {c(min(.), max(.))} 484 | 485 | cluster_CD_pca2$scores %>% 486 | as_tibble(rownames = "unique_cluster_id") %>% 487 | left_join(cluster_annotation) %>% 488 | ggplot(aes(PC1, PC2, color = celltype)) + 489 | stat_density_2d(data = . %>% 490 | group_by(celltype) %>% 491 | mutate(n = n()) %>% 492 | ungroup() %>% 493 | filter(n > 1) %>% 494 | filter(celltype != "Unknown"), 495 | geom = "polygon", 496 | aes(PC1, PC2, 497 | fill = celltype, 498 | alpha = as.factor(..level..)), 499 | inherit.aes = F, 500 | bins = 20, 501 | show.legend = F, h = c(4, 4)) + 502 | geom_point(size = 5, 503 | alpha = 0.7) + 504 | geom_text(aes(label = paste(dataset_i, str_extract(cluster_id, "\\d*$"))), 505 | color = "black") + 506 | scale_color_manual(values = tissue_colors) + 507 | scale_fill_manual(values = tissue_colors) + 508 | scale_alpha_discrete(range = c(0, 0.1)) + 509 | 510 | geom_text(data = cluster_CD_pca2$loadings %>% 511 | as_tibble(rownames = "ensg_id") %>% 512 | left_join(gene_info92) %>% 513 | select(1:3, gene_name) %>% 514 | mutate(PC1 = scales::rescale(PC1, PC1_lims), 515 | PC2 = scales::rescale(PC2, PC2_lims), 516 | len = sqrt(PC1^2 + PC2^2)) %>% 517 | arrange(-len) %>% 518 | head(150), 519 | aes(PC1, PC2, label = gene_name ), 520 | inherit.aes = F, 521 | alpha = 0.5) + 522 | stripped_theme 523 | ggsave(savepath("Selected CD marker PCA.pdf"), width = 10, height = 10) 524 | 525 | 526 | 527 | cluster_CD_umap <- 528 | CD_pca_data2 %>% 529 | umap_calc(npcs = 2) 530 | 531 | g1 <- 532 | cluster_CD_umap$layout %>% 533 | as_tibble(rownames = "unique_cluster_id") %>% 534 | left_join(cluster_annotation) %>% 535 | ggplot(aes(V1, V2, color = celltype)) + 536 | stat_density_2d(data = . %>% 537 | group_by(celltype) %>% 538 | mutate(n = n()) %>% 539 | ungroup() %>% 540 | filter(n > 1) %>% 541 | filter(celltype != "Unknown"), 542 | geom = "polygon", 543 | aes(V1, V2, 544 | fill = celltype, 545 | alpha = as.factor(..level..)), 546 | inherit.aes = F, 547 | bins = 20, 548 | show.legend = F, h = c(4, 4)) + 549 | geom_point(size = 5, 550 | alpha = 0.7) + 551 | geom_text(aes(label = paste(dataset_i, str_extract(cluster_id, "\\d*$"))), 552 | color = "black") + 553 | scale_color_manual(values = tissue_colors) + 554 | scale_fill_manual(values = tissue_colors) + 555 | scale_alpha_discrete(range = c(0, 0.1)) + 556 | stripped_theme 557 | 558 | ggsave(savepath("Selected CD marker UMAP.pdf"), plot = g1, width = 10, height = 10) 559 | 560 | 561 | CD_umap_immune_assignment <- 562 | cluster_CD_umap$layout %>% 563 | as_tibble(rownames = "unique_cluster_id") %>% 564 | mutate(suspected_immune = V1 < 0 & V2 < -1) 565 | 566 | g2 <- 567 | CD_umap_immune_assignment %>% 568 | left_join(cluster_annotation) %>% 569 | ggplot(aes(V1, V2, color = suspected_immune)) + 570 | 571 | geom_point(size = 5, 572 | alpha = 0.7) + 573 | # scale_color_manual(values = tissue_colors) + 574 | # scale_fill_manual(values = tissue_colors) + 575 | 576 | stripped_theme 577 | 578 | g1 + g2 579 | 580 | 581 | cluster_norm_count %>% 582 | left_join(gene_info92 %>% 583 | select(ensg_id, gene_name)) %>% 584 | filter(gene_name %in% c("PTPRC", "PTPRCAP", "CDH5", "CD48")) %>% 585 | left_join(cluster_annotation, 586 | by = c("dataset", "cluster_id")) %>% 587 | group_by(ensg_id) %>% 588 | # mutate(norm_count = log10(norm_count + 1), 589 | # norm_count = norm_count / sqrt(sum(norm_count^2)/(length(norm_count)-1))) %>% 590 | ungroup() %>% 591 | select(-ensg_id) %>% 592 | 593 | spread(gene_name, norm_count) %>% 594 | ggplot(aes(PTPRC, CD48, color = celltype)) + 595 | stat_density_2d(data = . %>% 596 | group_by(celltype) %>% 597 | mutate(n = n()) %>% 598 | ungroup() %>% 599 | filter(n > 1) %>% 600 | filter(celltype != "Unknown"), 601 | geom = "polygon", 602 | aes(PTPRC, CD48, 603 | fill = celltype, 604 | alpha = as.factor(..level..)), 605 | inherit.aes = F, 606 | bins = 20, 607 | show.legend = F, h = c(4, 4)) + 608 | geom_point(size = 5, 609 | alpha = 0.7) + 610 | geom_text(aes(label = paste(dataset_i, str_extract(cluster_id, "\\d*$"))), 611 | color = "black") + 612 | scale_color_manual(values = tissue_colors) + 613 | scale_fill_manual(values = tissue_colors) + 614 | scale_alpha_discrete(range = c(0, 0.1)) + 615 | scale_x_log10() + 616 | scale_y_log10() + 617 | stripped_theme + 618 | ggtitle("CD48 and PTPRC (CD45)", "Only expressed in immune cells") 619 | 620 | 621 | 622 | ``` 623 | 624 | ##PCA per cell type 625 | 626 | ```{r} 627 | 628 | pca_plot <- function(indata, gene_list) { 629 | a_pca <- 630 | indata %>% 631 | filter(ensg_id %in% gene_list$ensg_id) %>% 632 | left_join(cluster_annotation, 633 | by = c("dataset", "cluster_id")) %>% 634 | select(unique_cluster_id, ensg_id, norm_count) %>% 635 | group_by(ensg_id) %>% 636 | mutate(norm_count = log10(norm_count + 1), 637 | norm_count = norm_count / sqrt(sum(norm_count^2)/(length(norm_count)-1))) %>% 638 | spread(unique_cluster_id, norm_count) %>% 639 | column_to_rownames("ensg_id") %>% 640 | t() %>% 641 | pca_calc(npcs = 2) 642 | 643 | PC1_lims <- 644 | a_pca$scores[,"PC1"] %>% 645 | {c(min(.), max(.))} 646 | 647 | PC2_lims <- 648 | a_pca$scores[,"PC2"] %>% 649 | {c(min(.), max(.))} 650 | 651 | a_pca$scores %>% 652 | as_tibble(rownames = "unique_cluster_id") %>% 653 | left_join(cluster_annotation) %>% 654 | ggplot(aes(PC1, PC2, color = celltype)) + 655 | stat_density_2d(data = . %>% 656 | group_by(celltype) %>% 657 | mutate(n = n()) %>% 658 | ungroup() %>% 659 | filter(n > 1) %>% 660 | filter(celltype != "Unknown"), 661 | geom = "polygon", 662 | aes(PC1, PC2, 663 | fill = celltype, 664 | alpha = as.factor(..level..)), 665 | inherit.aes = F, 666 | bins = 20, 667 | show.legend = F, h = c(2, 2)) + 668 | geom_point(size = 5, 669 | alpha = 0.7) + 670 | geom_text(aes(label = paste(dataset_i, str_extract(cluster_id, "\\d*$"))), 671 | color = "black") + 672 | scale_color_manual(values = tissue_colors) + 673 | scale_fill_manual(values = tissue_colors) + 674 | scale_alpha_discrete(range = c(0, 0.1)) + 675 | geom_text(data = a_pca$loadings %>% 676 | as_tibble(rownames = "ensg_id") %>% 677 | left_join(gene_info92) %>% 678 | select(1:3, gene_name) %>% 679 | mutate(PC1 = scales::rescale(PC1, PC1_lims), 680 | PC2 = scales::rescale(PC2, PC2_lims), 681 | len = sqrt(PC1^2 + PC2^2)) %>% 682 | arrange(-len) %>% 683 | head(150), 684 | aes(PC1, PC2, label = gene_name ), 685 | inherit.aes = F, 686 | alpha = 0.5) + 687 | stripped_theme 688 | } 689 | 690 | search_genes_B <- 691 | search_genes %>% 692 | filter(grepl("B(,|-)|ematop", marker_for)) 693 | 694 | search_genes_T <- 695 | search_genes %>% 696 | filter(grepl("T(,|-)|MAIT|ematop", marker_for)) 697 | 698 | search_genes_NK <- 699 | search_genes %>% 700 | filter(grepl("NK|ematop", marker_for)) 701 | 702 | search_genes_DC <- 703 | search_genes %>% 704 | filter(grepl("DC|ematop", marker_for)) 705 | 706 | search_genes_mono <- 707 | search_genes %>% 708 | filter(grepl("ono|ematop", marker_for)) 709 | 710 | search_genes_endoth <- 711 | search_genes %>% 712 | filter(grepl("ndo", marker_for)) 713 | 714 | search_genes_epith <- 715 | search_genes %>% 716 | filter(grepl("pith", marker_for)) 717 | 718 | search_genes_hemato <- 719 | search_genes %>% 720 | filter(grepl("ematop", marker_for)) 721 | 722 | search_genes_granulocytes <- 723 | blood_cell_category_fine %>% 724 | filter(enhanced_tissues %in% c("basophil", "eosinophil", "neutrophil")) %>% 725 | arrange(-ts_score) %>% 726 | group_by(enhanced_tissues) %>% 727 | top_n(10, ts_score) %>% 728 | left_join(gene_info92) 729 | 730 | pca_plot(cluster_norm_count, search_genes_B) + ggtitle("B-cell") 731 | ggsave(savepath("B-cell marker PCA.pdf"), width = 10, height = 10) 732 | pca_plot(cluster_norm_count, search_genes_T) + ggtitle("T-cell") 733 | ggsave(savepath("T-cell marker PCA.pdf"), width = 10, height = 10) 734 | pca_plot(cluster_norm_count, search_genes_NK) + ggtitle("NK-cell") 735 | ggsave(savepath("NK-cell marker PCA.pdf"), width = 10, height = 10) 736 | pca_plot(cluster_norm_count, search_genes_DC) + ggtitle("DC-cell") 737 | ggsave(savepath("DC marker PCA.pdf"), width = 10, height = 10) 738 | pca_plot(cluster_norm_count, search_genes_mono) + ggtitle("Monocyte") 739 | ggsave(savepath("Monocyte marker PCA.pdf"), width = 10, height = 10) 740 | pca_plot(cluster_norm_count, search_genes_endoth) + ggtitle("Endothelium") 741 | ggsave(savepath("Endothelium marker PCA.pdf"), width = 10, height = 10) 742 | pca_plot(cluster_norm_count, search_genes_epith) + ggtitle("Epithelium") 743 | ggsave(savepath("Epithelium marker PCA.pdf"), width = 10, height = 10) 744 | 745 | 746 | 747 | pca_plot(cluster_norm_count, search_genes_granulocytes) + ggtitle("Granulocytes") 748 | ggsave(savepath("Granulocytes marker PCA.pdf"), width = 10, height = 10) 749 | 750 | a_pca <- 751 | cluster_norm_count %>% 752 | filter(ensg_id %in% search_genes_granulocytes$ensg_id) %>% 753 | left_join(cluster_annotation, 754 | by = c("dataset", "cluster_id")) %>% 755 | select(unique_cluster_id, ensg_id, norm_count) %>% 756 | group_by(ensg_id) %>% 757 | mutate(norm_count = log10(norm_count + 1), 758 | norm_count = norm_count / sqrt(sum(norm_count^2)/(length(norm_count)-1))) %>% 759 | spread(unique_cluster_id, norm_count) %>% 760 | column_to_rownames("ensg_id") %>% 761 | t() %>% 762 | pca_calc(npcs = 2) 763 | 764 | PC1_lims <- 765 | a_pca$scores[,"PC1"] %>% 766 | {c(min(.), max(.))} 767 | 768 | PC2_lims <- 769 | a_pca$scores[,"PC2"] %>% 770 | {c(min(.), max(.))} 771 | 772 | a_pca$scores %>% 773 | as_tibble(rownames = "unique_cluster_id") %>% 774 | left_join(cluster_annotation) %>% 775 | ggplot(aes(PC1, PC2, color = celltype)) + 776 | stat_density_2d(data = . %>% 777 | group_by(celltype) %>% 778 | mutate(n = n()) %>% 779 | ungroup() %>% 780 | filter(n > 1) %>% 781 | filter(celltype != "Unknown"), 782 | geom = "polygon", 783 | aes(PC1, PC2, 784 | fill = celltype, 785 | alpha = as.factor(..level..)), 786 | inherit.aes = F, 787 | bins = 20, 788 | show.legend = F, h = c(2, 2)) + 789 | geom_point(size = 5, 790 | alpha = 0.7) + 791 | geom_text(aes(label = paste(dataset_i, str_extract(cluster_id, "\\d*$"))), 792 | color = "black") + 793 | scale_color_manual(values = c(tissue_colors, "EO" = "red", "NE" = "blue", "BA" = "purple")) + 794 | scale_fill_manual(values = tissue_colors) + 795 | scale_alpha_discrete(range = c(0, 0.1)) + 796 | geom_text(data = a_pca$loadings %>% 797 | as_tibble(rownames = "ensg_id") %>% 798 | left_join(gene_info92) %>% 799 | left_join(search_genes_granulocytes) %>% 800 | # select(1:3, gene_name) %>% 801 | mutate(type = case_when(enhanced_tissues == "eosinophil" ~ "EO", 802 | enhanced_tissues == "neutrophil" ~ "NE", 803 | enhanced_tissues == "basophil" ~ "BA"), 804 | PC1 = scales::rescale(PC1, PC1_lims), 805 | PC2 = scales::rescale(PC2, PC2_lims), 806 | len = sqrt(PC1^2 + PC2^2)) , 807 | aes(PC1, PC2, label = gene_name, color = type), 808 | inherit.aes = F, 809 | alpha = 0.5) + 810 | stripped_theme 811 | 812 | 813 | 814 | 815 | gran_data <- 816 | cluster_norm_count %>% 817 | filter(ensg_id %in% c(search_genes_granulocytes$ensg_id, search_genes_hemato$ensg_id)) %>% 818 | left_join(cluster_annotation) %>% 819 | left_join(gene_info92) %>% 820 | left_join(search_genes) %>% 821 | left_join(search_genes_granulocytes) %>% 822 | mutate(marker_for = ifelse(is.na(marker_for), 823 | enhanced_tissues, 824 | marker_for)) %>% 825 | select(1, 2, 3, 4, 5, 6, 7, 8, gene_name, marker_for) %>% 826 | group_by(unique_cluster_id) %>% 827 | mutate(blood_cell = norm_count[which(gene_name == "PTPRC")] > 150) %>% 828 | filter(blood_cell) %>% 829 | ungroup() 830 | 831 | 832 | gran_data %>% 833 | ggplot(aes(cluster, norm_count, fill = celltype)) + 834 | geom_col() + 835 | coord_flip() + 836 | scale_fill_manual(values = tissue_colors) + 837 | facet_wrap(~gene_name + marker_for, scales = "free_x") 838 | 839 | 840 | 841 | a_pca <- 842 | gran_data %>% 843 | select(unique_cluster_id, ensg_id, norm_count) %>% 844 | group_by(ensg_id) %>% 845 | mutate(norm_count = log10(norm_count + 1), 846 | norm_count = norm_count / sqrt(sum(norm_count^2)/(length(norm_count)-1))) %>% 847 | ungroup() %>% 848 | spread(unique_cluster_id, norm_count) %>% 849 | filter(complete.cases(.)) %>% 850 | column_to_rownames("ensg_id") %>% 851 | 852 | t() %>% 853 | pca_calc(npcs = 2) 854 | 855 | PC1_lims <- 856 | a_pca$scores[,"PC1"] %>% 857 | {c(min(.), max(.))} 858 | 859 | PC2_lims <- 860 | a_pca$scores[,"PC2"] %>% 861 | {c(min(.), max(.))} 862 | 863 | a_pca$scores %>% 864 | as_tibble(rownames = "unique_cluster_id") %>% 865 | left_join(cluster_annotation) %>% 866 | ggplot(aes(PC1, PC2, color = celltype)) + 867 | stat_density_2d(data = . %>% 868 | group_by(celltype) %>% 869 | mutate(n = n()) %>% 870 | ungroup() %>% 871 | filter(n > 1) %>% 872 | filter(celltype != "Unknown"), 873 | geom = "polygon", 874 | aes(PC1, PC2, 875 | fill = celltype, 876 | alpha = as.factor(..level..)), 877 | inherit.aes = F, 878 | bins = 20, 879 | show.legend = F, h = c(2, 2)) + 880 | geom_point(size = 5, 881 | alpha = 0.7) + 882 | geom_text(aes(label = paste(dataset_i, str_extract(cluster_id, "\\d*$"))), 883 | color = "black") + 884 | scale_color_manual(values = c(tissue_colors, "EO" = "red", "NE" = "blue", "BA" = "purple")) + 885 | scale_fill_manual(values = tissue_colors) + 886 | scale_alpha_discrete(range = c(0, 0.1)) + 887 | geom_text(data = a_pca$loadings %>% 888 | as_tibble(rownames = "ensg_id") %>% 889 | left_join(gene_info92) %>% 890 | left_join(search_genes_granulocytes) %>% 891 | # select(1:3, gene_name) %>% 892 | mutate(type = case_when(enhanced_tissues == "eosinophil" ~ "EO", 893 | enhanced_tissues == "neutrophil" ~ "NE", 894 | enhanced_tissues == "basophil" ~ "BA"), 895 | PC1 = scales::rescale(PC1, PC1_lims), 896 | PC2 = scales::rescale(PC2, PC2_lims), 897 | len = sqrt(PC1^2 + PC2^2)) , 898 | aes(PC1, PC2, label = gene_name, color = type), 899 | inherit.aes = F, 900 | alpha = 0.5) + 901 | stripped_theme 902 | 903 | 904 | possible_gran_clusters <- 905 | a_pca$scores %>% 906 | as_tibble(rownames = "unique_cluster_id") %>% 907 | left_join(cluster_annotation) %>% 908 | mutate(gran = PC1 < 0 | PC2 < -1) 909 | 910 | possible_gran_clusters %>% 911 | ggplot(aes(PC1, PC2, color = gran)) + 912 | stat_density_2d(data = . %>% 913 | group_by(celltype) %>% 914 | mutate(n = n()) %>% 915 | ungroup() %>% 916 | filter(n > 1) %>% 917 | filter(celltype != "Unknown"), 918 | geom = "polygon", 919 | aes(PC1, PC2, 920 | fill = celltype, 921 | alpha = as.factor(..level..)), 922 | inherit.aes = F, 923 | bins = 20, 924 | show.legend = F, h = c(2, 2)) + 925 | geom_point(size = 5, 926 | alpha = 0.7) + 927 | geom_text(aes(label = paste(dataset_i, str_extract(cluster_id, "\\d*$"))), 928 | color = "black") + 929 | # scale_color_manual(values = c(tissue_colors, "EO" = "red", "NE" = "blue", "BA" = "purple")) + 930 | # scale_fill_manual(values = tissue_colors) + 931 | scale_alpha_discrete(range = c(0, 0.1)) + 932 | geom_text(data = a_pca$loadings %>% 933 | as_tibble(rownames = "ensg_id") %>% 934 | left_join(gene_info92) %>% 935 | left_join(search_genes_granulocytes) %>% 936 | # select(1:3, gene_name) %>% 937 | mutate(type = case_when(enhanced_tissues == "eosinophil" ~ "EO", 938 | enhanced_tissues == "neutrophil" ~ "NE", 939 | enhanced_tissues == "basophil" ~ "BA"), 940 | PC1 = scales::rescale(PC1, PC1_lims), 941 | PC2 = scales::rescale(PC2, PC2_lims), 942 | len = sqrt(PC1^2 + PC2^2)) , 943 | aes(PC1, PC2, label = gene_name, color = type), 944 | inherit.aes = F, 945 | alpha = 0.5) + 946 | stripped_theme 947 | 948 | ``` 949 | 950 | ##Immunecell markers 951 | 952 | ```{r} 953 | 954 | cluster_norm_count %>% 955 | left_join(gene_info92 %>% 956 | select(ensg_id, gene_name)) %>% 957 | filter(gene_name %in% c("PTPRC", "PTPRCAP", "CDH5", "CD48")) %>% 958 | left_join(cluster_annotation, 959 | by = c("dataset", "cluster_id")) %>% 960 | ggplot(aes(gene_name, norm_count, fill = celltype)) + 961 | geom_violin(alpha = 0.5, scale = "width") + 962 | geom_hline(data = tibble(gene_name = c("CD48", "CDH5", "PTPRC", "PTPRCAP"), 963 | norm_count = c(30, 0, 30, 0)), 964 | aes(yintercept = norm_count)) + 965 | # geom_density(alpha = 0.5) + 966 | facet_wrap(~gene_name, scales = "free_x", nrow = 1) + 967 | # scale_x_log10() + 968 | scale_y_log10() + 969 | scale_fill_manual(values = tissue_colors) + 970 | stripped_theme_facet + 971 | theme(panel.spacing = unit(0, "mm")) 972 | 973 | 974 | 975 | 976 | 977 | 978 | immune_pca <- 979 | cluster_norm_count %>% 980 | left_join(gene_info92 %>% 981 | select(ensg_id, gene_name)) %>% 982 | filter(gene_name %in% c("CD48", "CDH5", "PTPRC", "PTPRCAP")) %>% 983 | left_join(cluster_annotation, 984 | by = c("dataset", "cluster_id")) %>% 985 | select(unique_cluster_id, ensg_id, norm_count) %>% 986 | group_by(ensg_id) %>% 987 | mutate(norm_count = log10(norm_count + 1), 988 | norm_count = norm_count / sqrt(sum(norm_count^2)/(length(norm_count)-1))) %>% 989 | spread(unique_cluster_id, norm_count) %>% 990 | column_to_rownames("ensg_id") %>% 991 | t() %>% 992 | pca_calc(npcs = 2) 993 | 994 | PC1_lims <- 995 | immune_pca$scores[,"PC1"] %>% 996 | {c(min(.), max(.))} 997 | 998 | PC2_lims <- 999 | immune_pca$scores[,"PC2"] %>% 1000 | {c(min(.), max(.))} 1001 | 1002 | g1 <- 1003 | immune_pca$scores %>% 1004 | as_tibble(rownames = "unique_cluster_id") %>% 1005 | left_join(cluster_annotation) %>% 1006 | ggplot(aes(PC1, PC2, color = celltype)) + 1007 | stat_density_2d(data = . %>% 1008 | group_by(celltype) %>% 1009 | mutate(n = n()) %>% 1010 | ungroup() %>% 1011 | filter(n > 1) %>% 1012 | filter(celltype != "Unknown"), 1013 | geom = "polygon", 1014 | aes(PC1, PC2, 1015 | fill = celltype, 1016 | alpha = as.factor(..level..)), 1017 | inherit.aes = F, 1018 | bins = 50, 1019 | show.legend = F, h = c(1, 1)) + 1020 | geom_point(size = 5, 1021 | alpha = 0.7) + 1022 | geom_text(aes(label = paste(dataset_i, str_extract(cluster_id, "\\d*$"))), 1023 | color = "black") + 1024 | scale_color_manual(values = tissue_colors) + 1025 | scale_fill_manual(values = tissue_colors) + 1026 | scale_alpha_discrete(range = c(0, 0.05)) + 1027 | 1028 | geom_text(data = immune_pca$loadings %>% 1029 | as_tibble(rownames = "ensg_id") %>% 1030 | left_join(gene_info92) %>% 1031 | select(1:3, gene_name) %>% 1032 | mutate(PC1 = scales::rescale(PC1, PC1_lims), 1033 | PC2 = scales::rescale(PC2, PC2_lims), 1034 | len = sqrt(PC1^2 + PC2^2)) %>% 1035 | arrange(-len) %>% 1036 | head(150), 1037 | aes(PC1, PC2, label = gene_name ), 1038 | inherit.aes = F, 1039 | alpha = 0.5) + 1040 | stripped_theme 1041 | 1042 | g2 <- 1043 | immune_pca$scores %>% 1044 | as_tibble(rownames = "unique_cluster_id") %>% 1045 | left_join(cluster_annotation) %>% 1046 | left_join(CD_umap_immune_assignment) %>% 1047 | ggplot(aes(PC1, PC2, color = suspected_immune)) + 1048 | 1049 | geom_point(size = 5, 1050 | alpha = 0.7) + 1051 | 1052 | 1053 | stripped_theme 1054 | 1055 | g1 + g2 1056 | ggsave(savepath("Immune PCA.pdf"), width = 16, height = 10) 1057 | 1058 | 1059 | ``` 1060 | 1061 | 1062 | ##Barplot of CD markers 1063 | 1064 | ```{r} 1065 | 1066 | plot_data <- 1067 | cluster_norm_count %>% 1068 | # filter(ensg_id %in% CD_marker_list$Ensembl) %>% 1069 | left_join(cluster_annotation) %>% 1070 | left_join(gene_info92) %>% 1071 | inner_join(search_genes) 1072 | 1073 | 1074 | plots <- 1075 | lapply(cluster_annotation$cluster, 1076 | function(chosen_cluster) { 1077 | plot_data %>% 1078 | 1079 | mutate(chosen = cluster == chosen_cluster) %>% 1080 | group_by(chosen, ensg_id, gene_name, cluster_annotation, marker_for, celltype) %>% 1081 | summarise(norm_count = mean(norm_count)) %>% 1082 | filter(cluster_annotation != "Unknown" | chosen) %>% 1083 | ggplot(aes(chosen, norm_count, fill = celltype)) + 1084 | geom_col(position = "dodge") + 1085 | coord_flip() + 1086 | scale_fill_manual(values = tissue_colors) + 1087 | facet_wrap(~marker_for + gene_name, scales = "free_x") + 1088 | ggtitle(chosen_cluster) + 1089 | stripped_theme_facet 1090 | }) 1091 | pdf(savepath("CD marker facet plots per cluster.pdf"), width = 13, height = 13) 1092 | plots 1093 | dev.off() 1094 | 1095 | ##### 1096 | 1097 | plot_data <- 1098 | cluster_norm_count %>% 1099 | # filter(ensg_id %in% CD_marker_list$Ensembl) %>% 1100 | left_join(cluster_annotation) %>% 1101 | left_join(CD_umap_immune_assignment) %>% 1102 | left_join(gene_info92) %>% 1103 | inner_join(search_genes) 1104 | 1105 | mis_class_clusters <- 1106 | CD_umap_immune_assignment %>% 1107 | left_join(cluster_annotation) %>% 1108 | filter((suspected_immune & celltype %in% c("Unknown", "Endothelial")) | 1109 | (!suspected_immune & !celltype %in% c("Unknown", "Endothelial"))) 1110 | 1111 | plots <- 1112 | lapply(sort(mis_class_clusters$cluster), 1113 | function(chosen_cluster) { 1114 | plot_data %>% 1115 | 1116 | mutate(chosen = cluster == chosen_cluster) %>% 1117 | group_by(chosen, ensg_id, gene_name, cluster_annotation, marker_for, celltype) %>% 1118 | summarise(norm_count = mean(norm_count)) %>% 1119 | filter(cluster_annotation != "Unknown" | chosen) %>% 1120 | ggplot(aes(chosen, norm_count, fill = celltype)) + 1121 | geom_col(position = "dodge") + 1122 | coord_flip() + 1123 | scale_fill_manual(values = tissue_colors) + 1124 | facet_wrap(~marker_for + gene_name, scales = "free_x") + 1125 | ggtitle(chosen_cluster) + 1126 | stripped_theme_facet 1127 | }) 1128 | pdf(savepath("CD marker facet plots per cluster - potentially misclass.pdf"), width = 13, height = 13) 1129 | plots 1130 | dev.off() 1131 | 1132 | ##### 1133 | 1134 | plot_data <- 1135 | cluster_norm_count %>% 1136 | # filter(ensg_id %in% CD_marker_list$Ensembl) %>% 1137 | left_join(cluster_annotation) %>% 1138 | left_join(CD_umap_immune_assignment) %>% 1139 | left_join(gene_info92) %>% 1140 | inner_join(search_genes) 1141 | 1142 | no_class_clusters <- 1143 | cluster_annotation %>% 1144 | filter(celltype == "Unknown") 1145 | 1146 | plots <- 1147 | lapply(sort(no_class_clusters$cluster), 1148 | function(chosen_cluster) { 1149 | plot_data %>% 1150 | 1151 | mutate(chosen = cluster == chosen_cluster) %>% 1152 | group_by(chosen, ensg_id, gene_name, cluster_annotation, marker_for, celltype) %>% 1153 | summarise(norm_count = mean(norm_count)) %>% 1154 | filter(cluster_annotation != "Unknown" | chosen) %>% 1155 | ggplot(aes(chosen, norm_count, fill = celltype)) + 1156 | geom_col(position = "dodge") + 1157 | coord_flip() + 1158 | scale_fill_manual(values = tissue_colors) + 1159 | facet_wrap(~marker_for + gene_name, scales = "free_x") + 1160 | ggtitle(chosen_cluster) + 1161 | stripped_theme_facet 1162 | }) 1163 | pdf(savepath("CD marker facet plots per cluster - no class.pdf"), width = 13, height = 13) 1164 | plots 1165 | dev.off() 1166 | 1167 | 1168 | ##### 1169 | 1170 | plot_data <- 1171 | cluster_norm_count %>% 1172 | # filter(ensg_id %in% CD_marker_list$Ensembl) %>% 1173 | left_join(cluster_annotation) %>% 1174 | left_join(CD_umap_immune_assignment) %>% 1175 | left_join(gene_info92) %>% 1176 | filter(ensg_id %in% gran_data$ensg_id) %>% 1177 | left_join(gran_data %>% 1178 | select(gene_name, marker_for)) 1179 | 1180 | gran_clusters <- 1181 | gran_data %>% 1182 | pull(cluster) %>% 1183 | unique() 1184 | 1185 | plots <- 1186 | lapply(sort(gran_clusters), 1187 | function(chosen_cluster) { 1188 | plot_data %>% 1189 | 1190 | mutate(chosen = cluster == chosen_cluster) %>% 1191 | group_by(chosen, ensg_id, gene_name, cluster_annotation, marker_for, celltype) %>% 1192 | summarise(norm_count = mean(norm_count)) %>% 1193 | filter(cluster_annotation != "Unknown" | chosen) %>% 1194 | ggplot(aes(chosen, norm_count, fill = celltype)) + 1195 | geom_col(position = "dodge") + 1196 | coord_flip() + 1197 | scale_fill_manual(values = tissue_colors) + 1198 | facet_wrap(~marker_for + gene_name, scales = "free_x") + 1199 | ggtitle(chosen_cluster) + 1200 | stripped_theme_facet 1201 | }) 1202 | pdf(savepath("CD marker facet plots per cluster - grans.pdf"), width = 13, height = 13) 1203 | plots 1204 | dev.off() 1205 | 1206 | ``` 1207 | 1208 | 1209 | 1210 | # Classification 1211 | 1212 | ```{r} 1213 | 1214 | celltype_max_norm_count <- 1215 | cluster_norm_count %>% 1216 | left_join(cluster_annotation) %>% 1217 | group_by(celltype, ensg_id) %>% 1218 | summarise(norm_count = max(norm_count)) %>% 1219 | ungroup() 1220 | 1221 | classification_max_norm_count <- 1222 | celltype_max_norm_count %>% 1223 | filter(celltype != "Unknown") %>% 1224 | hpa_gene_classification(expression_col = "norm_count", 1225 | tissue_col = "celltype", 1226 | gene_col = "ensg_id", 1227 | 1228 | enr_fold = 4, 1229 | max_group_n = 2, 1230 | det_lim = 1) 1231 | 1232 | classification_sep_norm_count <- 1233 | cluster_norm_count %>% 1234 | left_join(cluster_annotation) %>% 1235 | # filter(celltype != "Unknown") %>% 1236 | hpa_gene_classification_multi_sample(expression_col = "norm_count", 1237 | tissue_col = "celltype", 1238 | gene_col = "ensg_id", 1239 | sample_col = "unique_cluster_id", 1240 | enr_fold = 4, 1241 | max_group_n = 2, 1242 | det_lim = 1) 1243 | ``` 1244 | 1245 | ##Classification plots 1246 | 1247 | ```{r} 1248 | classification_max_norm_count %>% 1249 | mutate(spec_category = str_to_sentence(spec_category), 1250 | dist_category = str_to_sentence(dist_category)) %>% 1251 | 1252 | multi_alluvial_plot(vars = c("Specificity" = "spec_category", 1253 | "Distribution" = "dist_category"), 1254 | chunk_levels = c('Tissue enriched', 'Group enriched', 1255 | 'Tissue enhanced', 'Low tissue specificity', 1256 | 'Detected in single', 1257 | 'Detected in some', 1258 | 'Detected in many', 1259 | 'Detected in all', 1260 | 'Not detected'), 1261 | pal = c(gene_category_pal, elevation_identity_pal), 1262 | color_by = c(1, 1)) 1263 | ggsave(savepath("single class comb n_genes alluvial.pdf"), width = 4, height = 6, useDingbats = F) 1264 | 1265 | 1266 | class_table_temp <- 1267 | classification_max_norm_count %>% 1268 | select(gene, spec_category, enriched_tissues) %>% 1269 | separate_rows(enriched_tissues, sep = ";") %>% 1270 | mutate(spec_category = factor(spec_category, levels = rev(spec_category_levels)), 1271 | enriched_tissues = str_to_sentence(enriched_tissues)) 1272 | 1273 | plot_dendro <- 1274 | celltype_max_norm_count %>% 1275 | spread(celltype, norm_count) %>% 1276 | column_to_rownames("ensg_id") %>% 1277 | cor(method = "spearman") %>% 1278 | {1 - .} %>% 1279 | as.dist() %>% 1280 | hclust(method = "average") %>% 1281 | dendro_data() 1282 | 1283 | 1284 | dendro_plot_data <- 1285 | left_join(plot_dendro$segments, 1286 | plot_dendro$labels, 1287 | by = c("x" = "x", "yend" = "y")) 1288 | 1289 | left_plot <- 1290 | dendro_plot_data %>% 1291 | ggplot() + 1292 | geom_segment(aes(x=y, y=x, xend=yend, yend=xend, group = label))+ 1293 | geom_rect(aes(xmin=0, ymin=x + 0.5, 1294 | xmax=-0.02, ymax=xend - 0.5, 1295 | fill = label), 1296 | show.legend = F) + 1297 | scale_color_manual(values = celltype_pal)+ 1298 | scale_fill_manual(values = celltype_pal)+ 1299 | scale_x_reverse(expand = expand_scale(mult = 0.25), position = "top")+ 1300 | 1301 | theme(axis.text.y = element_blank(), 1302 | axis.title = element_blank(), 1303 | axis.ticks.y = element_blank(), 1304 | plot.margin = unit(c(1,1,1,1), units = "mm"), 1305 | panel.background = element_blank()) 1306 | 1307 | right_plot <- 1308 | class_table_temp %>% 1309 | filter(!is.na(enriched_tissues)) %>% 1310 | group_by(enriched_tissues, spec_category) %>% 1311 | summarise(n_genes = n()) %>% 1312 | ungroup() %>% 1313 | mutate(enriched_tissues = factor(enriched_tissues, levels = plot_dendro$labels$label)) %>% 1314 | ggplot(aes(enriched_tissues, n_genes, fill = spec_category)) + 1315 | geom_col(width = 0.8, size = 0.1) + 1316 | simple_theme + 1317 | scale_fill_manual(values = gene_category_pal, name = "Specificity") + 1318 | coord_flip() + 1319 | xlab("Tissue") + 1320 | ylab("Number of genes") + 1321 | scale_y_continuous(position = "bottom", expand = c(0,0)) + 1322 | 1323 | theme(axis.text.y = element_text(hjust = 0.5), 1324 | legend.position = c(0.7, 0.5), 1325 | axis.title.y = element_blank(), 1326 | panel.border = element_blank()) 1327 | 1328 | left_plot + right_plot 1329 | 1330 | ggsave(savepath("N enr genes per tissue + dendro 2.pdf"), width = 5, height = 3) 1331 | 1332 | classification_max_norm_count %>% 1333 | left_join(gene_info92, by = c("gene" = "ensg_id")) %>% 1334 | select(1, gene_name, 2, 3, enriched_tissues) %>% 1335 | separate_rows(enriched_tissues, sep = ";") %>% 1336 | 1337 | left_join(blood_cell_category %>% 1338 | separate_rows(enhanced_tissues, sep = ",") %>% 1339 | select(1:3, enriched_tissues = enhanced_tissues), 1340 | by = c("gene" = "ensg_id"), 1341 | suffix = c("_singe_cell", "_blood_class")) %>% 1342 | filter(spec_category == "tissue enriched" | specificity_category == "Tissue enriched") %>% 1343 | group_by(enriched_tissues_singe_cell, enriched_tissues_blood_class) %>% 1344 | summarise(n = n()) %>% 1345 | ungroup() %>% 1346 | mutate(enriched_tissues_singe_cell = ifelse(is.na(enriched_tissues_singe_cell), 1347 | "Only enriched in bulk", 1348 | enriched_tissues_singe_cell), 1349 | enriched_tissues_blood_class = ifelse(is.na(enriched_tissues_blood_class), 1350 | "Only enriched in single", 1351 | enriched_tissues_blood_class)) %>% 1352 | mutate(enriched_tissues_blood_class = factor(enriched_tissues_blood_class, 1353 | levels = c("T-cells", 1354 | "B-cells", 1355 | "NK-cells", 1356 | "monocytes", 1357 | "granulocytes", 1358 | "dendritic cells", 1359 | "Only enriched in single")), 1360 | enriched_tissues_singe_cell = factor(enriched_tissues_singe_cell, 1361 | levels = c("T-cell", 1362 | "B-cell", 1363 | "Monocyte", 1364 | "Dendritic cell", 1365 | "Only enriched in bulk"))) %>% 1366 | ggplot(aes(enriched_tissues_singe_cell, n, fill = enriched_tissues_blood_class)) + 1367 | geom_col() + 1368 | scale_fill_manual(values = c(tissue_colors, "Only enriched in single" = "darkgray"), name = "Bulk classification") + 1369 | ggtitle("Comparison of celltye enriched genes", "Genes that are celltype type enriched in either classification") + 1370 | xlab("Single cell classification") + 1371 | stripped_theme + 1372 | coord_flip() 1373 | ggsave(savepath("N enriched genes bulk - single comparison.pdf"), width = 5, height = 3) 1374 | 1375 | classification_max_norm_count %>% 1376 | left_join(gene_info92, by = c("gene" = "ensg_id")) %>% 1377 | select(1, gene_name, 2, 3, 4, enriched_tissues, tissues_detected) %>% 1378 | filter(gene_name %in% c("CD3D", 1379 | "CD3E", 1380 | "CD19")) 1381 | ``` 1382 | 1383 | ```{r class_network_2_whole_groups, echo=FALSE, message=FALSE, warning=FALSE} 1384 | 1385 | enrichment_whole_group <- 1386 | classification_max_norm_count %>% 1387 | select(1, 2, enriched_tissues) %>% 1388 | filter(spec_category %in% c("tissue enriched", "group enriched")) %>% 1389 | group_by(enriched_tissues, spec_category) %>% 1390 | summarise(n_genes = n()) %>% 1391 | ungroup() 1392 | 1393 | net_data <- 1394 | enrichment_whole_group %>% 1395 | mutate(all_enriched_tissues = enriched_tissues) %>% 1396 | separate_rows(enriched_tissues, sep = ";") %>% 1397 | group_by(enriched_tissues, spec_category) %>% 1398 | mutate(rank = rank(-n_genes, ties.method = "min")) %>% 1399 | group_by(all_enriched_tissues) %>% 1400 | mutate(any_low_rank = any(rank <= 2)) %>% 1401 | ungroup() %>% 1402 | 1403 | mutate(edge_id = paste("enriched:", all_enriched_tissues)) %>% 1404 | arrange(n_genes) 1405 | 1406 | net_edges <- 1407 | net_data %$% 1408 | tibble(node1 = enriched_tissues, node2 = edge_id, n = n_genes) %>% 1409 | unique() 1410 | 1411 | g <- 1412 | net_edges %>% 1413 | graph_from_data_frame(directed = FALSE) %>% 1414 | ggraph(layout = "kk") 1415 | 1416 | link_map <- 1417 | net_edges %>% 1418 | gather(node, id, -(3:4)) %>% 1419 | mutate(tissue_node = node == "node1", 1420 | color_id = ifelse(tissue_node, id, !grepl(";", id)), 1421 | label = ifelse(tissue_node, color_id, n)) %>% 1422 | select(n, node, id, tissue_node, color_id, label) %>% 1423 | unique() 1424 | 1425 | 1426 | edge_data <- get_edges()(g$data) 1427 | node_data <- 1428 | get_nodes()(g$data) %>% 1429 | as_tibble() %>% 1430 | left_join(link_map, 1431 | by = c("name" = "id")) 1432 | 1433 | 1434 | g + 1435 | geom_edge_arc(aes(width = n), 1436 | color = "gray", 1437 | strength = 0, 1438 | show.legend = F) + 1439 | scale_edge_alpha_continuous(range = c(0.3, 1)) + 1440 | scale_edge_width_continuous(range = c(1, 3)) + 1441 | 1442 | geom_node_point(data = node_data %>% 1443 | filter(!tissue_node), 1444 | aes(size = log(n), 1445 | fill = color_id), 1446 | stroke = 1, 1447 | # size = 10, 1448 | shape = 21, 1449 | show.legend = F)+ 1450 | geom_node_point(data = node_data %>% 1451 | filter(tissue_node), 1452 | aes(fill = color_id), 1453 | stroke = 1, 1454 | size = 20, 1455 | shape = 21, 1456 | show.legend = F)+ 1457 | geom_node_text(data = node_data, 1458 | aes(label = label), 1459 | size = 4) + 1460 | scale_size_continuous(range = c(5, 10)) + 1461 | scale_fill_manual(values = c(celltype_pal, gene_category_pal)) + 1462 | 1463 | theme_void() 1464 | 1465 | ggsave(savepath("Network enrichment whole group.pdf"), width = 16, height = 12, useDingbats = F) 1466 | ggsave(savepath("Network enrichment whole group.png"), width = 16, height = 12) 1467 | 1468 | 1469 | # Save for cytoscape 1470 | 1471 | 1472 | cyto_summary <- 1473 | net_edges %>% 1474 | mutate(category = ifelse(!grepl(";", node2), "Tissue enriched", "Group enriched"), 1475 | node_id = unclass(factor(node2)), 1476 | node1 = str_to_sentence(node1), 1477 | n_sqrt = sqrt(n), 1478 | str_len = str_length(node1)) %>% 1479 | select(category, node1, node2, node_id, n, n_sqrt, str_len) 1480 | 1481 | cyto_summary %T>% 1482 | write_delim(savepath("cytoscape nodes summary whole group.txt"), delim = "\t") %>% 1483 | write_csv(savepath("cytoscape nodes summary whole group.csv")) 1484 | 1485 | bind_rows(cyto_summary %>% 1486 | left_join(celltype_pal %>% 1487 | enframe("node1", "color")) %>% 1488 | select(node_id = node1, 1489 | color) %>% 1490 | unique() %>% 1491 | mutate(node_type = "Tissue"), 1492 | cyto_summary %>% 1493 | mutate(color = case_when(category == "Tissue enriched" ~ "#e41a1c", 1494 | category == "Group enriched" ~ "#FF9D00"), 1495 | node_id = as.character(node_id)) %>% 1496 | select(node_id, color) %>% 1497 | unique() %>% 1498 | 1499 | mutate(node_type = "Enrichment")) %>% 1500 | mutate(color2 = case_when(node_type == "Enrichment" ~ color, 1501 | node_type == "Tissue" ~ "#D3D3D3FF"), 1502 | color3 = case_when(node_type == "Enrichment" ~ color, 1503 | node_type == "Tissue" ~ "#BEBEBEFF")) %T>% 1504 | 1505 | write_delim(savepath("cytoscape nodes color whole group.txt"), delim = "\t") %>% 1506 | write_csv(savepath("cytoscape nodes color whole group.csv")) 1507 | 1508 | bind_rows(cyto_summary %>% 1509 | select(node_id = node1) %>% 1510 | mutate(label = node_id) %>% 1511 | unique(), 1512 | cyto_summary %>% 1513 | mutate(node_id = as.character(node_id), 1514 | label = as.character(n)) %>% 1515 | select(node_id, label) %>% 1516 | unique()) %T>% 1517 | write_delim(savepath("cytoscape nodes label whole group.txt"), delim = "\t") %>% 1518 | write_csv(savepath("cytoscape nodes label whole group.csv")) 1519 | ``` 1520 | 1521 | ##Multisample classification 1522 | 1523 | ```{r} 1524 | classification_sep_norm_count %>% 1525 | filter(spec_category %in% c("tissue enriched")) %>% 1526 | select(gene, spec_category, enriched_tissues, enriched_samples) %>% 1527 | 1528 | separate_rows(enriched_samples, sep = ";") %>% 1529 | separate_rows(enriched_tissues, sep = ";") %>% 1530 | group_by(enriched_samples, enriched_tissues) %>% 1531 | summarise(n = n()) %>% 1532 | ungroup() %>% 1533 | select(1:3) %>% 1534 | spread(enriched_tissues, n, fill = 0) %>% 1535 | column_to_rownames("enriched_samples") %>% 1536 | pheatmap(color = heatmap_palette) 1537 | 1538 | classification_sep_norm_count %>% 1539 | left_join(blood_cell_category, 1540 | by = c("gene" = "ensg_id")) %>% 1541 | filter(spec_category %in% c("tissue enriched", "group enriched")) %>% 1542 | select(gene, spec_category, enriched_samples, enhanced_tissues) %>% 1543 | 1544 | separate_rows(enriched_samples, sep = ";") %>% 1545 | separate_rows(enhanced_tissues, sep = ",") %>% 1546 | group_by(spec_category, enriched_samples, enhanced_tissues) %>% 1547 | summarise(n = n()) %>% 1548 | ungroup() %>% 1549 | filter(!is.na(enriched_samples)) %>% 1550 | 1551 | mutate(enhanced_tissues = ifelse(is.na(enhanced_tissues), 1552 | "Only enriched in single", 1553 | enhanced_tissues)) %>% 1554 | mutate(enhanced_tissues = factor(enhanced_tissues, 1555 | levels = c("T-cells", 1556 | "B-cells", 1557 | "NK-cells", 1558 | "monocytes", 1559 | "granulocytes", 1560 | "dendritic cells", 1561 | "Only enriched in single"))) %>% 1562 | left_join(cluster_annotation, 1563 | by = c("enriched_samples" = "unique_cluster_id")) %>% 1564 | ggplot(aes(cluster, n, fill = enhanced_tissues)) + 1565 | geom_col() + 1566 | geom_text(data = . %>% 1567 | group_by(cluster, enriched_samples) %>% 1568 | summarise(n = sum(n)), 1569 | aes(cluster, n, 1570 | label = enriched_samples), 1571 | inherit.aes = F, 1572 | hjust = 0, 1573 | size = 2) + 1574 | scale_fill_manual(values = c(tissue_colors, "Only enriched in single" = "darkgray"), name = "Bulk classification") + 1575 | xlab("Single cell classification") + 1576 | stripped_theme + 1577 | coord_flip() + 1578 | scale_y_continuous(expand = expand_scale(c(0,0.15))) 1579 | ggsave(savepath("N enriched genes bulk - single sample comparison.pdf"), width = 7, height = 5) 1580 | 1581 | ``` 1582 | 1583 | 1584 | ```{r multisample_class, echo=FALSE, message=FALSE, warning=FALSE} 1585 | 1586 | 1587 | 1588 | net_data <- 1589 | classification_sep_norm_count %>% 1590 | filter(spec_category %in% c("tissue enriched")) %>% 1591 | select(gene, spec_category, enriched_tissues, enriched_samples) %>% 1592 | 1593 | separate_rows(enriched_samples, sep = ";") %>% 1594 | separate_rows(enriched_tissues, sep = ";") %>% 1595 | group_by(enriched_samples, enriched_tissues) %>% 1596 | summarise(n = n()) %>% 1597 | ungroup() %>% 1598 | arrange(-n) %>% 1599 | mutate(edge_id = paste("enriched:", enriched_samples, enriched_tissues)) 1600 | 1601 | 1602 | 1603 | 1604 | net_edges <- 1605 | net_data %$% 1606 | tibble(node1 = c(enriched_samples), 1607 | node2 = c(enriched_tissues), 1608 | n = c(n)) %>% 1609 | unique() 1610 | 1611 | g <- 1612 | net_edges %>% 1613 | graph_from_data_frame(directed = FALSE) %>% 1614 | ggraph(layout = "kk") 1615 | 1616 | # link_map <- 1617 | # net_edges %>% 1618 | # gather(node, id, -n, -type) %>% 1619 | # mutate(sample_node = type == "sample", 1620 | # color_id = ifelse(sample_node, gsub(" \\d*$", "", id), "group"), 1621 | # label = ifelse(sample_node, id, n)) %>% 1622 | # select(n, node, id, sample_node, color_id, label) %>% 1623 | # unique() 1624 | 1625 | 1626 | edge_data <- get_edges()(g$data) 1627 | node_data <- 1628 | get_nodes()(g$data) %>% 1629 | as_tibble() 1630 | 1631 | 1632 | g + 1633 | geom_edge_arc(aes(label = n, 1634 | width = sqrt(n), 1635 | alpha = sqrt(n)), 1636 | color = "gray", 1637 | strength = 0, 1638 | show.legend = F) + 1639 | scale_edge_alpha_continuous(range = c(0.3, 1)) + 1640 | scale_edge_width_continuous(range = c(1, 3)) + 1641 | 1642 | geom_node_point(data = node_data, 1643 | aes(fill = gsub(" \\d*$", "", name)), 1644 | stroke = 1, 1645 | size = 20, 1646 | shape = 21, 1647 | show.legend = F)+ 1648 | geom_node_text(data = node_data, 1649 | aes(label = name), 1650 | size = 4) + 1651 | scale_size_continuous(range = c(5, 10)) + 1652 | scale_fill_manual(values = tissue_colors) + 1653 | 1654 | theme_void() 1655 | 1656 | ggsave(savepath("Network enrichment whole group.pdf"), width = 16, height = 12, useDingbats = F) 1657 | ggsave(savepath("Network enrichment whole group.png"), width = 16, height = 12) 1658 | 1659 | 1660 | ``` 1661 | -------------------------------------------------------------------------------- /scripts/cluster-annotation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxkarlsson/HPA-SingleCellType/b7b70d7e4b164956fe674531d415f9c0068b4397/scripts/cluster-annotation.pdf -------------------------------------------------------------------------------- /scripts/functions_classification.R: -------------------------------------------------------------------------------- 1 | 2 | # expression_col = "nx" 3 | # tissue_col = "consensus_tissue_name" 4 | # gene_col = "enssscg_id" 5 | # enr_fold = 4 6 | # max_group_n = 5 7 | # det_lim = 1 8 | # data <- pig_atlas_consensus 9 | 10 | hpa_gene_classification <- 11 | function(data, expression_col, tissue_col, gene_col, enr_fold, max_group_n, det_lim = 1) { 12 | data_ <- 13 | data %>% 14 | select(gene = gene_col, 15 | expression = expression_col, 16 | tissue = tissue_col) %>% 17 | mutate(expression = round(expression, 4)) 18 | 19 | if(any(is.na(data_$expression))) stop("NAs in expression column") 20 | if(any(is.na(data_$gene))) stop("NAs in gene column") 21 | if(any(is.na(data_$tissue))) stop("NAs in tissue column") 22 | 23 | n_groups <- length(unique(data_$tissue)) 24 | 25 | gene_class_info <- 26 | data_ %>% 27 | group_by(gene) %>% 28 | summarise( 29 | 30 | # Gene expression distribution metrics 31 | mean_exp = mean(expression, na.rm = T), 32 | min_exp = min(expression, na.rm = T), 33 | max_exp = max(expression, na.rm = T), 34 | max_2nd = sort(expression)[length(expression)-1], 35 | 36 | # Expression frequency metrics 37 | n_exp = length(which(expression >= det_lim)), 38 | frac_exp = n_exp/length(expression[!is.na(expression)])*100, 39 | 40 | # Limit of enhancement metrics 41 | lim = max_exp/enr_fold, 42 | 43 | exps_over_lim = list(expression[which(expression >= lim & expression >= det_lim)]), 44 | n_over = length(exps_over_lim[[1]]), 45 | mean_over = mean(exps_over_lim[[1]]), 46 | min_over = ifelse(n_over == 0, NA, 47 | min(exps_over_lim[[1]])), 48 | 49 | max_under_lim = max(expression[which(expression < min_over)], det_lim*0.1), 50 | 51 | 52 | exps_enhanced = list(which(expression/mean_exp >= enr_fold & expression >= det_lim)), 53 | 54 | 55 | 56 | 57 | # Expression patterns 58 | enrichment_group = paste(sort(tissue[which(expression >= lim & expression >= det_lim)]), collapse=";"), 59 | 60 | n_enriched = length(tissue[which(expression >= lim & expression >= det_lim)]), 61 | n_enhanced = length(exps_enhanced[[1]]), 62 | enhanced_in = paste(sort(tissue[exps_enhanced[[1]]]), collapse=";"), 63 | n_na = n_groups - length(expression), 64 | max_2nd_or_lim = max(max_2nd, det_lim*0.1), 65 | tissues_not_detected = paste(sort(tissue[which(expression < det_lim)]), collapse=";"), 66 | tissues_detected = paste(sort(tissue[which(expression >= det_lim)]), collapse=";")) 67 | 68 | 69 | gene_categories <- 70 | gene_class_info %>% 71 | 72 | mutate( 73 | spec_category = case_when(n_exp == 0 ~ "not detected", 74 | 75 | # Genes with expression fold times more than anything else are tissue enriched 76 | max_exp/max_2nd_or_lim >= enr_fold ~ "tissue enriched", 77 | 78 | # Genes with expression fold times more than other tissues in groups of max group_n - 1 are group enriched 79 | max_exp >= lim & 80 | n_over <= max_group_n & n_over > 1 & 81 | mean_over/max_under_lim >= enr_fold ~ "group enriched", 82 | 83 | # Genes with expression in tissues fold times more than the mean are tissue enhance 84 | n_enhanced > 0 ~ "tissue enhanced", 85 | 86 | # Genes expressed with low tissue specificity 87 | T ~ "low tissue specificity"), 88 | 89 | 90 | dist_category = case_when(frac_exp == 100 ~ "detected in all", 91 | frac_exp >= 31 ~ "detected in many", 92 | n_exp > 1 ~ "detected in some", 93 | n_exp == 1 ~ "detected in single", 94 | n_exp == 0 ~ "not detected"), 95 | 96 | spec_score = case_when(spec_category == "tissue enriched" ~ max_exp/max_2nd_or_lim, 97 | spec_category == "group enriched" ~ mean_over/max_under_lim, 98 | spec_category == "tissue enhanced" ~ max_exp/mean_exp)) 99 | 100 | 101 | 102 | 103 | ##### Rename and format 104 | gene_categories %>% 105 | mutate(enriched_tissues = case_when(spec_category %in% c("tissue enriched", "group enriched") ~ enrichment_group, 106 | spec_category == "tissue enhanced" ~ enhanced_in), 107 | n_enriched = case_when(spec_category %in% c("tissue enriched", "group enriched") ~ n_enriched, 108 | spec_category == "tissue enhanced" ~ n_enhanced)) %>% 109 | select(gene, 110 | spec_category, 111 | dist_category, 112 | spec_score, 113 | n_expressed = n_exp, 114 | fraction_expressed = frac_exp, 115 | max_exp = max_exp, 116 | enriched_tissues, 117 | n_enriched, 118 | n_na = n_na, 119 | tissues_not_detected, 120 | tissues_detected) 121 | 122 | 123 | 124 | } 125 | 126 | hpa_gene_classification_multi_sample <- 127 | function(data, expression_col, tissue_col, gene_col, sample_col, enr_fold, max_group_n, det_lim = 1) { 128 | data_ <- 129 | data %>% 130 | select(gene = gene_col, 131 | expression = expression_col, 132 | tissue = tissue_col, 133 | sample = sample_col) %>% 134 | mutate(expression = round(expression, 4)) 135 | 136 | if(any(is.na(data_$expression))) stop("NAs in expression column") 137 | if(any(is.na(data_$gene))) stop("NAs in gene column") 138 | if(any(is.na(data_$tissue))) stop("NAs in tissue column") 139 | if(any(is.na(data_$sample))) stop("NAs in sample column") 140 | 141 | n_tissues <- length(unique(data_$tissue)) 142 | n_samples <- length(unique(data_$sample)) 143 | 144 | gene_wise_info <- 145 | data_ %>% 146 | group_by(gene) %>% 147 | mutate(max_exp = sample == sample[order(expression, decreasing = T)][1], 148 | max_2nd = sample == sample[which(tissue != tissue[which(max_exp)][1])][order(expression[which(tissue != tissue[which(max_exp)][1])], decreasing = T)][1], 149 | 150 | 151 | lim = max(max(expression) / enr_fold, det_lim), 152 | 153 | expressed = expression >= det_lim, 154 | enriched = expression >= lim & expressed, 155 | enhanced = expression/mean(expression) >= enr_fold & expressed, 156 | 157 | min_over_lim = sample == sample[which(enriched)][order(expression[which(enriched)], decreasing = F)][1], 158 | 159 | max_under_lim = sample == sample[which(!enriched)][order(expression[which(!enriched)], decreasing = T)][1]) %>% 160 | arrange(gene, -expression) %>% 161 | mutate(min_over_lim = ifelse(is.na(min_over_lim), F, T), 162 | max_under_lim = ifelse(is.na(max_under_lim), F, T)) 163 | 164 | gene_categories <- 165 | gene_wise_info %>% 166 | summarise(n_sample_exp = length(which(expressed)), 167 | n_tissue_exp = n_distinct(tissue[which(expressed)]), 168 | 169 | frac_sample_exp = n_sample_exp / n_samples, 170 | frac_tissue_exp = n_tissue_exp / n_tissues, 171 | 172 | n_sample_enr = length(which(enriched)), 173 | n_tissue_enr = n_distinct(tissue[which(enriched)]), 174 | 175 | n_sample_enh = length(which(enhanced)), 176 | n_tissue_enh = n_distinct(tissue[which(enhanced)]), 177 | 178 | mean_enriched = mean(expression[which(enriched)]), 179 | mean_expression = mean(expression), 180 | 181 | enriched_samples = paste(sort(sample[which(enriched)]), collapse = ";"), 182 | enriched_tissues = paste(unique(sort(tissue[which(enriched)])), collapse = ";"), 183 | 184 | enhanced_samples = paste(sort(sample[which(enhanced)]), collapse = ";"), 185 | enhanced_tissues = paste(unique(sort(tissue[which(enhanced)])), collapse = ";"), 186 | 187 | tissue_enriched_score = expression[max_exp] / max(expression[max_2nd], det_lim*0.1), 188 | group_enriched_score = ifelse(any(max_under_lim), 189 | mean_enriched / expression[max_under_lim], 190 | 0), 191 | tissue_enhanced_score = expression[max_exp] / mean_expression, 192 | 193 | spec_category = case_when(n_sample_exp == 0 ~ "not detected", 194 | 195 | # Genes with expression fold times more than anything else are tissue enriched 196 | n_tissue_enr == 1 ~ "tissue enriched", 197 | 198 | # Genes with expression fold times more than other tissues in groups of max group_n - 1 are group enriched 199 | n_tissue_enr > 1 & 200 | n_tissue_enr <= max_group_n ~ "group enriched", 201 | 202 | # Genes with expression in tissues fold times more than the mean are tissue enhance 203 | n_tissue_enh > 0 ~ "tissue enhanced", 204 | 205 | # Genes expressed with low tissue specificity 206 | T ~ "low tissue specificity"), 207 | 208 | 209 | dist_category = case_when(frac_tissue_exp == 100 ~ "detected in all", 210 | frac_tissue_exp >= 31 ~ "detected in many", 211 | n_tissue_exp > 1 ~ "detected in some", 212 | n_tissue_exp == 1 ~ "detected in single", 213 | n_tissue_exp == 0 ~ "not detected")) %>% 214 | mutate(spec_score = case_when(spec_category == "tissue enriched" ~ 215 | tissue_enriched_score, 216 | spec_category == "group enriched" ~ 217 | group_enriched_score, 218 | spec_category == "tissue enhanced" ~ 219 | tissue_enhanced_score, 220 | T ~ 0)) 221 | 222 | ##### Rename and format 223 | gene_categories %>% 224 | mutate(enriched_tissues = case_when(spec_category %in% c("tissue enriched", "group enriched") ~ enriched_tissues, 225 | spec_category == "tissue enhanced" ~ enhanced_tissues), 226 | enriched_samples = case_when(spec_category %in% c("tissue enriched", "group enriched") ~ enriched_samples, 227 | spec_category == "tissue enhanced" ~ enhanced_samples), 228 | n_tissues_enriched = case_when(spec_category %in% c("tissue enriched", "group enriched") ~ n_tissue_enr, 229 | spec_category == "tissue enhanced" ~ n_tissue_enr), 230 | n_samples_enriched = case_when(spec_category %in% c("tissue enriched", "group enriched") ~ n_sample_enr, 231 | spec_category == "tissue enhanced" ~ n_sample_enr)) %>% 232 | select(gene, 233 | spec_category, 234 | dist_category, 235 | spec_score, 236 | n_samples_expressed = n_sample_exp, 237 | n_tissues_expressed = n_tissue_exp, 238 | fraction_samples_expressed = frac_sample_exp, 239 | fraction_tissues_expressed = frac_tissue_exp, 240 | 241 | enriched_samples, 242 | enriched_tissues, 243 | n_samples_enriched = n_sample_enr, 244 | n_tissues_enriched = n_tissue_enr) 245 | 246 | 247 | 248 | } 249 | 250 | calc_gene_correlations <- 251 | function(data, var1, var2, val1, val2, cor_method = "spearman", p_adjust_method = "BH", alternative = "two.sided") { 252 | 253 | data_ <- 254 | data %>% 255 | rename(var1 = var1, 256 | var2 = var2, 257 | val1 = val1, 258 | val2 = val2) 259 | 260 | 261 | data_ %>% 262 | group_by(var1, var2) %>% 263 | do(if(cor_method == "pearson") { 264 | cor.test(.$val1, .$val2, method = cor_method, alternative = alternative) %$% 265 | tibble(pval = p.value, 266 | cor = estimate, 267 | lo_confint = conf.int[1], 268 | hi_confint = conf.int[2]) 269 | } else if(cor_method == "spearman") { 270 | cor.test(.$val1, .$val2, method = cor_method, alternative = alternative) %$% 271 | tibble(pval = p.value, 272 | cor = estimate) 273 | }) %>% 274 | ungroup() %>% 275 | mutate(padj = p.adjust(pval, method = p_adjust_method), 276 | significant = padj <= 0.05, 277 | log10P = -log10(padj)) %>% 278 | arrange(padj) %>% 279 | set_colnames(c(var1, var2, colnames(.)[-c(1, 2)])) 280 | } 281 | 282 | calc_gene_distance <- 283 | function(data, var1, var2, val1, val2) { 284 | 285 | data_ <- 286 | data %>% 287 | rename(var1 = var1, 288 | var2 = var2, 289 | val1 = val1, 290 | val2 = val2) 291 | 292 | 293 | data_ %>% 294 | group_by(var1, var2) %>% 295 | summarise(dist = rbind(val1, val2) %>% 296 | dist() %>% 297 | as.numeric(), 298 | mean_var1 = mean(val1), 299 | mean_var2 = mean(val2), 300 | common_mean = mean(c(val1, val2))) %>% 301 | ungroup() %>% 302 | arrange(dist) %>% 303 | set_colnames(c(var1, var2, "dist", paste0("mean_", c(var1, var2)), "common_mean")) 304 | } 305 | 306 | 307 | 308 | make_ortholog_net <- 309 | function(orthologs) { 310 | edges <- 311 | orthologs %>% 312 | select(from = 1, 313 | to = 2) %>% 314 | mutate(weight = 1) 315 | 316 | nodes <- 317 | orthologs %$% 318 | c(enssscg_id, 319 | ensg_id) %>% 320 | unique() 321 | 322 | 323 | orth_net <- 324 | graph_from_data_frame(d = edges, 325 | vertices = nodes, 326 | directed = F) 327 | 328 | ortholog_communities <- 329 | components(orth_net) %$% 330 | left_join(enframe(membership, 331 | "node", "community"), 332 | enframe(csize, 333 | "community", "community_size"), 334 | by = "community") 335 | 336 | list(edges = edges, 337 | nodes = nodes, 338 | network = orth_net, 339 | communities = ortholog_communities) 340 | } 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | -------------------------------------------------------------------------------- /scripts/functions_normalization.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | tmm_norm_median_ref <- function(x, ...) { 4 | 5 | median_column <- 6 | apply(x, 7 | MARGIN = 1, 8 | median) 9 | 10 | x <- cbind(x, median_column) 11 | 12 | norm_data <- tmm(x, refColumn = dim(x)[2], ...) 13 | 14 | norm_data[, -(dim(x)[2])] 15 | } 16 | 17 | 18 | -------------------------------------------------------------------------------- /scripts/functions_utility.R: -------------------------------------------------------------------------------- 1 | 2 | savepath <- 3 | function(savename) { 4 | wd <- getwd() 5 | result_folder <- paste0(str_extract(wd, ".*HPA-SingleCellType"), "/results/", Sys.Date()) 6 | 7 | dir.create(result_folder, showWarnings = FALSE) 8 | 9 | paste0(result_folder, "/", savename) 10 | } 11 | 12 | omega_sq <- function(aov_in, neg2zero=T){ 13 | aovtab <- summary(aov_in)[[1]] 14 | n_terms <- length(aovtab[["Sum Sq"]]) - 1 15 | output <- rep(-1, n_terms) 16 | SSr <- aovtab[["Sum Sq"]][n_terms + 1] 17 | MSr <- aovtab[["Mean Sq"]][n_terms + 1] 18 | SSt <- sum(aovtab[["Sum Sq"]]) 19 | for(i in 1:n_terms){ 20 | SSm <- aovtab[["Sum Sq"]][i] 21 | DFm <- aovtab[["Df"]][i] 22 | output[i] <- (SSm-DFm*MSr)/(SSt+MSr) 23 | if(neg2zero & output[i] < 0){output[i] <- 0} 24 | } 25 | output <- c(output, 1 - sum(output)) 26 | names(output) <- c(rownames(aovtab)[1:n_terms], "Residuals") 27 | 28 | return(output) 29 | } 30 | 31 | multispread <- function(df, key, value) { 32 | 33 | # quote key 34 | keyq <- rlang::enquo(key) 35 | # break value vector into quotes 36 | valueq <- rlang::enquo(value) 37 | s <- rlang::quos(!!valueq) 38 | df %>% gather(variable, value, !!!s) %>% 39 | unite(temp, !!keyq, variable) %>% 40 | spread(temp, value) 41 | } 42 | 43 | 44 | rotate_coords <- 45 | function(x, y, rotate_angle, rotate_center = c(0, 0)) { 46 | 47 | # Center data 48 | rotdata <- 49 | tibble(x, y) %>% 50 | mutate(x = x + rotate_center[1], 51 | y = y + rotate_center[2], 52 | 53 | # Calculate quadrants 54 | quadrant = case_when(x >= 0 & y >= 0 ~ 1, 55 | x < 0 & y >= 0 ~ 2, 56 | x < 0 & y < 0 ~ 3, 57 | x >= 0 & y < 0 ~ 4), 58 | 59 | # Hypotenuse 60 | hyp = sqrt(x^2 + y^2), 61 | 62 | # Angle 63 | angle = case_when(x == 0 & y == 0 ~ 0, 64 | quadrant %in% 1:2 ~ acos(x/hyp), 65 | quadrant %in% 3:4 ~ 2 * pi - acos(x/hyp)) + rotate_angle, 66 | 67 | # New coordinates 68 | x = cos(angle) * hyp, 69 | y = sin(angle) * hyp, 70 | 71 | # Recenter coordinates 72 | x = x - rotate_center[1], 73 | y = y - rotate_center[2]) 74 | 75 | 76 | rotdata 77 | } 78 | 79 | shrink_rotation_coords <- 80 | function(x, y, shrink_angle, rotate_center = c(0, 0)) { 81 | 82 | 83 | shrink_factor <- 84 | 1 - shrink_angle / (2 * pi) 85 | 86 | # Center data 87 | rotdata <- 88 | tibble(x, y) %>% 89 | mutate(x = x + rotate_center[1], 90 | y = y + rotate_center[2], 91 | 92 | 93 | 94 | # Calculate quadrants 95 | quadrant = case_when(x >= 0 & y >= 0 ~ 1, 96 | x < 0 & y >= 0 ~ 2, 97 | x < 0 & y < 0 ~ 3, 98 | x >= 0 & y < 0 ~ 4), 99 | 100 | 101 | 102 | # Hypotenuse 103 | hyp = sqrt(x^2 + y^2), 104 | 105 | 106 | # Angle 107 | 108 | angle = case_when(x == 0 & y == 0 ~ 0, 109 | quadrant %in% 1:2 ~ acos(x/hyp), 110 | quadrant %in% 3:4 ~ 2 * pi - acos(x/hyp)), 111 | 112 | # Shrink angle 113 | # angle = case_when(x == 0 & y == 0 ~ 0, 114 | # quadrant %in% c(1,4) ~ angle + shrink_angle, 115 | # quadrant %in% c(2,3) ~ angle - shrink_angle), 116 | 117 | angle = angle * shrink_factor, 118 | 119 | # New coordinates 120 | x = cos(angle) * hyp, 121 | y = sin(angle) * hyp, 122 | 123 | # Recenter coordinates 124 | x = x - rotate_center[1], 125 | y = y - rotate_center[2]) 126 | 127 | 128 | rotdata 129 | } 130 | 131 | 132 | calculate_retina_cut_angle <- 133 | function(clust) { 134 | dendrogram <- 135 | clust %>% 136 | as.dendrogram() 137 | 138 | g <- 139 | ggraph(dendrogram, layout = 'dendrogram', circular = T) 140 | 141 | g_edgepoints <- 142 | g$data %>% 143 | as_tibble() %>% 144 | filter(height == 0) %>% 145 | left_join(cutree(clust, k = 2) %>% 146 | enframe("label", 147 | "cluster"), 148 | by = "label") %>% 149 | mutate(angle = calculate_coord_angle(x, y)) 150 | 151 | expand_grid(node1 = g_edgepoints$.ggraph.index, 152 | node2 = g_edgepoints$.ggraph.index) %>% 153 | left_join(g_edgepoints %>% 154 | select(node1 = .ggraph.index, 155 | angle1 = angle, 156 | cluster1 = cluster), 157 | by = "node1") %>% 158 | left_join(g_edgepoints %>% 159 | select(node2 = .ggraph.index, 160 | angle2 = angle, 161 | cluster2 = cluster), 162 | by = "node2") %>% 163 | filter(cluster1 == 1, 164 | cluster2 == 2) %>% 165 | group_by_all() %>% 166 | mutate(dist = c(angle1 - angle2, 167 | (angle1 - 2 * pi) - angle2, 168 | angle1 - (angle2 - 2 * pi), 169 | (angle1 - 2 * pi) - (angle2 - 2 * pi)) %>% 170 | abs() %>% 171 | min()) %>% 172 | ungroup() %>% 173 | arrange(dist) %>% 174 | slice(1:2) %>% 175 | mutate(cut_angle = (angle1 + angle2) / 2) %>% 176 | pull(cut_angle) %>% 177 | {2 * pi - .} 178 | } 179 | 180 | 181 | calculate_coord_angle <- 182 | function(x, y, rotate_center = c(0, 0)) { 183 | tibble(x, y) %>% 184 | mutate(x = x + rotate_center[1], 185 | y = y + rotate_center[2], 186 | 187 | 188 | 189 | # Calculate quadrants 190 | quadrant = case_when(x >= 0 & y >= 0 ~ 1, 191 | x < 0 & y >= 0 ~ 2, 192 | x < 0 & y < 0 ~ 3, 193 | x >= 0 & y < 0 ~ 4), 194 | 195 | 196 | 197 | # Hypotenuse 198 | hyp = sqrt(x^2 + y^2), 199 | 200 | 201 | # Angle 202 | 203 | angle = case_when(x == 0 & y == 0 ~ 0, 204 | quadrant %in% 1:2 ~ acos(x/hyp), 205 | quadrant %in% 3:4 ~ 2 * pi - acos(x/hyp))) %>% 206 | pull(angle) 207 | } 208 | 209 | -------------------------------------------------------------------------------- /scripts/heatmap generator.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maxkarlsson/HPA-SingleCellType/b7b70d7e4b164956fe674531d415f9c0068b4397/scripts/heatmap generator.rar -------------------------------------------------------------------------------- /scripts/heatmap generator/Cell type markers.tsv: -------------------------------------------------------------------------------- 1 | Tissue Cell type group Cell type Marker Comment Good IHC example (y/n) 2 | _General Adipose tissue Adipocytes LIPE 3 | _General Adipose tissue Adipocytes PLIN1 4 | _General Adipose tissue Adipocytes FABP4 5 | _General Adipose tissue Adipocytes SLC7A10 6 | _General Adipose tissue Adipocytes PLIN4 7 | _General Epithelial cells neuroendocrine cells CHGA 8 | _General Epithelial cells neuroendocrine cells SCG2 9 | _General Epithelial cells neuroendocrine cells SCG5 10 | _General Epithelial cells neuroendocrine cells PCSK1N 11 | _General Epithelial cells neuroendocrine cells SCG3 12 | _General Epithelial cells neuroendocrine cells CHGB 13 | _General Epithelial cells neuroendocrine cells NPTX2 14 | _General Epithelial cells neuroendocrine cells VWA5B2 15 | _General Epithelial cells neuroendocrine cells CPLX2 16 | _General Epithelial cells neuroendocrine cells SYP also ganglion cells 17 | _General Epithelial cells neuroendocrine cells PRPH ganglion cells 18 | _General Immune cells Granulocytes CPA3 19 | _General Immune cells Granulocytes CCR3 20 | _General Immune cells Granulocytes TPSD1 21 | _General Immune cells Inflammatory cells PTPRC General marker of inflammatory cells 22 | _General Immune cells B lymphocytes MS4A1 23 | _General Immune cells B lymphocytes CD79A 24 | _General Immune cells B lymphocytes CD19 25 | _General Immune cells B lymphocytes CR2 26 | _General Immune cells B lymphocytes CD72 27 | _General Immune cells B lymphocytes CD22 28 | _General Immune cells B lymphocytes MS4A1 CD20, low level expression also in T lymphocytes 29 | _General Immune cells Macrophages CD68 30 | _General Immune cells Macrophages CD163 31 | _General Immune cells Macrophages MRC1 CD206 32 | _General Immune cells Macrophages MARCO 33 | _General Immune cells Macrophages C1QB 34 | _General Immune cells Macrophages CCR1 35 | _General Immune cells Macrophages MSR1 36 | _General Immune cells Macrophages MRC1 37 | _General Immune cells Macrophages ITGAM 38 | _General Immune cells Macrophages CD33 39 | _General Immune cells Fibroblasts COL1A2 40 | _General Immune cells Fibroblasts COL1A1 41 | _General Immune cells Fibroblasts COL3A1 42 | _General Immune cells Fibroblasts COL6A3 43 | _General Immune cells Fibroblasts COL5A1 44 | _General Immune cells Fibroblasts COL5A2 45 | _General Immune cells Fibroblasts FBN1 46 | _General Immune cells Fibroblasts MFAP2 47 | _General Immune cells T lymphocytes CD3E 48 | _General Immune cells T lymphocytes CD2 49 | _General Immune cells T lymphocytes LCK 50 | _General Immune cells T lymphocytes CD3D 51 | _General Immune cells T lymphocytes CD3G 52 | _General Immune cells T lymphocytes CD8A 53 | _General Immune cells T lymphocytes CD5 54 | _General Immune cells T lymphocytes UBASH3A 55 | _General Immune cells T lymphocytes CD8B 56 | _General Mesenchymal cells Mesenchymal cells VIM General marker for mesenchymal cells 57 | _General Muscle cells Smooth muscle cells ACTA2 58 | _General Muscle cells Smooth muscle cells MYL9 59 | _General Muscle cells Smooth muscle cells ACTG2 60 | _General Muscle cells Smooth muscle cells FLNA 61 | _General Muscle cells Smooth muscle cells CALD1 62 | _General Muscle cells Smooth muscle cells CNN1 63 | _General Muscle cells Smooth muscle cells KCNMB1 64 | _General Muscle cells Smooth muscle cells MYH11 65 | _General Vascular cells Endothelial cells FCN3 ??, makes sense for lung at least 66 | _General Vascular cells Endothelial cells SELE 67 | _General Vascular cells Endothelial cells PECAM1 68 | _General Vascular cells Endothelial cells CLEC14A 69 | _General Vascular cells Endothelial cells CDH5 70 | _General Vascular cells Endothelial cells VWF 71 | _General Vascular cells Endothelial cells ECSCR 72 | _General Vascular cells Endothelial cells ERG 73 | _General Vascular cells Endothelial cells FLT4 74 | _General Vascular cells Endothelial cells CD34 75 | _General Vascular cells Endothelial cells ENG 76 | _General Vascular cells Endothelial cells ESAM 77 | _General Vascular cells Endothelial cells ROBO4 78 | Breast Epithelial cells myoepithelial cells (coexpression of epithelial markers and smooth muscle markers) MYH11 smooth muscle marker 79 | Breast Epithelial cells myoepithelial cells (coexpression of epithelial markers and smooth muscle markers) CNN1 smooth muscle marker 80 | Breast Epithelial cells myoepithelial cells (coexpression of epithelial markers and smooth muscle markers) NGFR 81 | Breast Epithelial cells myoepithelial cells (coexpression of epithelial markers and smooth muscle markers) TP63 82 | Breast Epithelial cells myoepithelial cells (coexpression of epithelial markers and smooth muscle markers) CDH5 83 | Breast Epithelial cells myoepithelial cells (coexpression of epithelial markers and smooth muscle markers) SERPINB5 epithelial marker 84 | Breast Epithelial cells myoepithelial cells (coexpression of epithelial markers and smooth muscle markers) MME uncertain 85 | Breast Epithelial cells myoepithelial cells (coexpression of epithelial markers and smooth muscle markers) KRT5 epithelial marker 86 | Breast Epithelial cells myoepithelial cells (coexpression of epithelial markers and smooth muscle markers) KRT14 epithelial marker 87 | Breast Epithelial cells luminal epithelial cells SCGB2A2 88 | Breast Epithelial cells luminal epithelial cells PIP 89 | Breast Epithelial cells luminal epithelial cells KRT7 90 | Breast Epithelial cells luminal epithelial cells KRT8 91 | Breast Epithelial cells luminal epithelial cells KRT18 92 | Breast Epithelial cells luminal epithelial cells KRT19 93 | 94 | _General Immune cells Macrophages CD14 95 | 96 | _General Immune cells Macrophages CD4 97 | _General Immune cells Dendritic cells CD4 98 | _General Immune cells T lymphocytes CD4 CD4 T cells and T-reg specifically 99 | _General Immune cells T lymphocytes CCR4 T-reg 100 | _General Immune cells T lymphocytes IL2RA T-reg 101 | _General Immune cells Monocytes FCGR3A Non-classical and intermediate 102 | _General Immune cells Dendritic cells ITGAX Myeloid 103 | _General Immune cells Granulocytes ITGAX Eosinophils & Neutrophils 104 | _General Immune cells B lymphocytes CD27 Memory specifically 105 | _General Immune cells T lymphocytes CD28 106 | _General Immune cells Natural killer cells NCAM1 107 | _General Immune cells Basophils IL3RA 108 | _General Immune cells Dendritic cells IL3RA Plasmacytoid DC specifically 109 | _General Immune cells Dendritic cells GZMB Plasmacytoid DC specifically 110 | _General Immune cells T lymphocytes IL7R T-regs are low 111 | _General Immune cells T lymphocytes KLRB1 MAIT are high 112 | _General Immune cells Basophils ENPP3 113 | _General Immune cells Dendritic cells HLA-DRA 114 | _General Immune cells Dendritic cells HLA-DRB1 115 | _General Immune cells Dendritic cells HLA-DRB5 116 | _General Immune cells B lymphocytes HLA-DRA 117 | _General Immune cells B lymphocytes HLA-DRB1 118 | _General Immune cells B lymphocytes HLA-DRB5 119 | _General Immune cells Monocytes HLA-DRA 120 | _General Immune cells Monocytes HLA-DRB1 121 | _General Immune cells Monocytes HLA-DRB5 -------------------------------------------------------------------------------- /scripts/heatmap generator/Cluster annotation.tsv: -------------------------------------------------------------------------------- 1 | dataset_id dataset cluster_id cell_type 2 | 1 Axillary lymph nodes 0 Endothelial cells 3 | 1 Axillary lymph nodes 1 Endothelial cells 4 | 1 Axillary lymph nodes 2 Endothelial cells 5 | 1 Axillary lymph nodes 3 Endothelial cells 6 | 1 Axillary lymph nodes 4 Endothelial cells 7 | 1 Axillary lymph nodes 5 Endothelial cells 8 | 1 Axillary lymph nodes 6 Endothelial cells 9 | 1 Axillary lymph nodes 7 Endothelial cells 10 | 1 Axillary lymph nodes 8 Endothelial cells 11 | 1 Axillary lymph nodes 9 Endothelial cells 12 | 1 Axillary lymph nodes 10 Endothelial cells 13 | 1 Axillary lymph nodes 11 Endothelial cells 14 | 1 Axillary lymph nodes 12 B cells 15 | 1 Axillary lymph nodes 13 Endothelial cells 16 | 2 Breast 0 Luminal 2 17 | 2 Breast 1 Basal 18 | 2 Breast 2 Basal 19 | 2 Breast 3 Luminal 2 20 | 2 Breast 4 Luminal 1 21 | 2 Breast 5 Basal 22 | 2 Breast 6 Luminal 1 23 | 2 Breast 7 Basal 24 | 2 Breast 8 Luminal 1 25 | 2 Breast 9 Luminal 2 26 | 2 Breast 10 Luminal 2 27 | 2 Breast 11 Luminal 1 28 | 2 Breast 12 Luminal 2 29 | 2 Breast 13 Luminal 2 30 | 2 Breast 14 Basal 31 | 2 Breast 15 Luminal 2 32 | 2 Breast 16 Luminal 1 33 | 2 Breast 17 Luminal 2 34 | 3 Colon 0 Progenitor cells 35 | 3 Colon 1 stem cells 36 | 3 Colon 2 Progenitor cells 37 | 3 Colon 3 Goblet 38 | 3 Colon 4 Enterocytes 39 | 3 Colon 5 Progenitor cells 40 | 3 Colon 6 Goblet 41 | 3 Colon 7 Transient amplifying cells 42 | 3 Colon 8 Paneth cells 43 | 3 Colon 9 Paneth cells 44 | 3 Colon 10 Paneth cells 45 | 4 Colon 2 0 Goblet 46 | 4 Colon 2 1 Progenitor cells 47 | 4 Colon 2 2 Progenitor cells 48 | 4 Colon 2 3 Paneth cells 49 | 4 Colon 2 4 T cells 50 | 4 Colon 2 5 Enterocytes 51 | 4 Colon 2 6 Enterocytes 52 | 4 Colon 2 7 Enterocytes 53 | 4 Colon 2 8 Paneth cells 54 | 4 Colon 2 9 Enterocytes 55 | 4 Colon 2 10 Enterocytes 56 | 4 Colon 2 11 Paneth cells 57 | 4 Colon 2 12 B cells 58 | 4 Colon 2 13 T cells 59 | 4 Colon 2 14 Epithelial cells 60 | 4 Colon 2 15 NA 61 | 5 Eyes 0 Rods 62 | 5 Eyes 1 Rods 63 | 5 Eyes 2 Muller 64 | 5 Eyes 3 Horizontal 65 | 5 Eyes 4 Bipolar 66 | 5 Eyes 5 Muller 67 | 5 Eyes 6 Muller 68 | 5 Eyes 7 Bipolar 69 | 5 Eyes 8 Retinal ganglion cells 70 | 5 Eyes 9 Cones 71 | 5 Eyes 10 Horizontal 72 | 5 Eyes 11 Muller 73 | 6 Eyes macula 0 T cells 74 | 6 Eyes macula 1 Retinal pigment Epithelial cells 75 | 6 Eyes macula 2 Retinal pigment Epithelial cells 76 | 6 Eyes macula 3 Retinal pigment Epithelial cells 77 | 6 Eyes macula 4 Retinal pigment Epithelial cells 78 | 6 Eyes macula 5 Retinal pigment Epithelial cells 79 | 6 Eyes macula 6 Retinal pigment Epithelial cells 80 | 6 Eyes macula 7 Muller 81 | 6 Eyes macula 8 Monocytes 82 | 6 Eyes macula 9 Bipolar 83 | 6 Eyes macula 10 Myeloid 84 | 6 Eyes macula 11 Retinal pigment Epithelial cells 85 | 6 Eyes macula 12 Myeloid 86 | 7 Eyes peripheral 0 Amacrine 87 | 7 Eyes peripheral 1 Muller 88 | 7 Eyes peripheral 2 Muller 89 | 7 Eyes peripheral 3 Retinal pigment Epithelial cells 90 | 7 Eyes peripheral 4 T cells 91 | 7 Eyes peripheral 5 Muller 92 | 7 Eyes peripheral 6 Retinal pigment Epithelial cells 93 | 7 Eyes peripheral 7 Muller 94 | 7 Eyes peripheral 8 Muller 95 | 7 Eyes peripheral 9 Muller 96 | 7 Eyes peripheral 10 Muller 97 | 7 Eyes peripheral 11 Muller 98 | 8 Head and neck lymph nodes 0 Endothelial cells 99 | 8 Head and neck lymph nodes 1 Endothelial cells 100 | 8 Head and neck lymph nodes 2 Endothelial cells 101 | 8 Head and neck lymph nodes 3 Endothelial cells 102 | 8 Head and neck lymph nodes 4 Endothelial cells 103 | 8 Head and neck lymph nodes 5 Endothelial cells 104 | 8 Head and neck lymph nodes 6 Endothelial cells 105 | 8 Head and neck lymph nodes 7 Endothelial cells 106 | 8 Head and neck lymph nodes 8 Endothelial cells 107 | 8 Head and neck lymph nodes 9 Endothelial cells 108 | 8 Head and neck lymph nodes 10 Endothelial cells 109 | 8 Head and neck lymph nodes 11 Endothelial cells 110 | 9 Heart 0 Ventricular Cardiomyocyte 111 | 9 Heart 1 Ventricular Cardiomyocyte 112 | 9 Heart 2 Ventricular Cardiomyocyte 113 | 9 Heart 3 Endothelial cells 114 | 9 Heart 4 Macrophages 115 | 9 Heart 5 Endothelial cells 116 | 9 Heart 6 Smooth muscle cells 117 | 9 Heart 7 Fibroblastss 118 | 9 Heart 8 Fibroblastss 119 | 9 Heart 9 Atrial Cardiomyocyte 120 | 10 Ileum 0 Epithelial cells 121 | 10 Ileum 1 Epithelial cells 122 | 10 Ileum 2 Epithelial cells 123 | 10 Ileum 3 Epithelial cells 124 | 10 Ileum 4 Epithelial cells 125 | 10 Ileum 5 Epithelial cells 126 | 10 Ileum 6 Epithelial cells 127 | 10 Ileum 7 Epithelial cells 128 | 10 Ileum 8 Epithelial cells 129 | 10 Ileum 9 Epithelial cells 130 | 10 Ileum 10 Epithelial cells 131 | 10 Ileum 11 Epithelial cells 132 | 11 Kidney 0 Proximal tubule 133 | 11 Kidney 1 Vascular 134 | 11 Kidney 2 Proximal tubule 135 | 11 Kidney 3 Proximal tubule 136 | 11 Kidney 4 Glomerular parietal Epithelial cells 137 | 11 Kidney 5 Proximal tubule 138 | 11 Kidney 6 Proximal tubule 139 | 11 Kidney 7 T cells 140 | 11 Kidney 8 Monocytes 141 | 11 Kidney 9 Glomerular parietal Epithelial cells 142 | 11 Kidney 10 Glomerular parietal Epithelial cells 143 | 11 Kidney 11 B cells 144 | 12 Liver 0 T cells 145 | 12 Liver 1 Hepatocytes 146 | 12 Liver 2 Monocytes 147 | 12 Liver 3 Endothelial cells 148 | 12 Liver 4 Hepatocytes 149 | 12 Liver 5 Hepatocytes 150 | 12 Liver 6 Hepatocytes 151 | 12 Liver 7 B cells 152 | 12 Liver 8 NK cells 153 | 12 Liver 9 Hepatocytes 154 | 12 Liver 10 Monocytes 155 | 12 Liver 11 NK cells 156 | 12 Liver 12 Hepatocytes 157 | 12 Liver 13 Endothelial cells 158 | 12 Liver 14 T cells 159 | 12 Liver 15 Epithelial cells 160 | 12 Liver 16 NK cells 161 | 12 Liver 17 ? 162 | 13 Liver hep- CD45- 0 Stellate cells 163 | 13 Liver hep- CD45- 1 T cells 164 | 13 Liver hep- CD45- 2 Macrophages 165 | 13 Liver hep- CD45- 3 T cells 166 | 13 Liver hep- CD45- 4 Dendritic cells 167 | 13 Liver hep- CD45- 5 Hepatocytes 168 | 13 Liver hep- CD45- 6 Endothelial cells 169 | 13 Liver hep- CD45- 7 NK cells 170 | 13 Liver hep- CD45- 8 NK cells 171 | 13 Liver hep- CD45- 9 Endothelial cells 172 | 13 Liver hep- CD45- 10 Endothelial cells 173 | 13 Liver hep- CD45- 11 Hepatocytes 174 | 13 Liver hep- CD45- 12 B cells 175 | 13 Liver hep- CD45- 13 Endothelial cells 176 | 13 Liver hep- CD45- 14 B cells 177 | 13 Liver hep- CD45- 15 Stellate cells 178 | 14 Liver hep- CD45+ 0 NK cells 179 | 14 Liver hep- CD45+ 1 T cells 180 | 14 Liver hep- CD45+ 2 T cells 181 | 14 Liver hep- CD45+ 3 Monocytes 182 | 14 Liver hep- CD45+ 4 T cells 183 | 14 Liver hep- CD45+ 5 NK cells 184 | 14 Liver hep- CD45+ 6 Monocytes 185 | 14 Liver hep- CD45+ 7 T cells 186 | 14 Liver hep- CD45+ 8 Monocytes 187 | 14 Liver hep- CD45+ 9 B cells 188 | 14 Liver hep- CD45+ 10 T cells 189 | 14 Liver hep- CD45+ 11 Endothelial cells 190 | 14 Liver hep- CD45+ 12 T cells 191 | 14 Liver hep- CD45+ 13 Dendritic cells 192 | 14 Liver hep- CD45+ 14 B cells 193 | 15 Lung 0 Monocytes 194 | 15 Lung 1 Epithelial cells 195 | 15 Lung 2 Monocytes 196 | 15 Lung 3 T cells 197 | 15 Lung 4 Granulocytes 198 | 15 Lung 5 Fibroblastss 199 | 15 Lung 6 Granulocytes 200 | 15 Lung 7 Epithelial cells 201 | 15 Lung 8 Epithelial cells 202 | 15 Lung 9 Fibroblastss 203 | 15 Lung 10 Epithelial cells 204 | 16 Muscle 0 Endothelial cells 205 | 16 Muscle 1 Myofibroblasts 206 | 16 Muscle 2 Fibroblastss 207 | 16 Muscle 3 T cells 208 | 16 Muscle 4 Smooth muscle cells 209 | 16 Muscle 5 Endothelial cells 210 | 16 Muscle 6 Fibroblastss 211 | 16 Muscle 7 Smooth muscle cells 212 | 17 NK cells blood 0 NK cells 213 | 17 NK cells blood 1 NK cells 214 | 17 NK cells blood 2 NK cells 215 | 17 NK cells blood 3 NK cells 216 | 17 NK cells blood 4 NK cells 217 | 17 NK cells blood 5 NK cells 218 | 18 NK cells bone marrow 0 NK cells 219 | 18 NK cells bone marrow 1 NK cells 220 | 18 NK cells bone marrow 2 NK cells 221 | 18 NK cells bone marrow 3 NK cells 222 | 18 NK cells bone marrow 4 NK cells 223 | 18 NK cells bone marrow 5 NK cells 224 | 18 NK cells bone marrow 6 NK cells 225 | 18 NK cells bone marrow 7 NK cells 226 | 18 NK cells bone marrow 8 Dendritic cells 227 | 19 PBMCs 0 Monocytes 228 | 19 PBMCs 1 T cells 229 | 19 PBMCs 2 T cells 230 | 19 PBMCs 3 T cells 231 | 19 PBMCs 4 T cells 232 | 19 PBMCs 5 T cells 233 | 19 PBMCs 6 T cells 234 | 19 PBMCs 7 Monocytes 235 | 19 PBMCs 8 B cells 236 | 19 PBMCs 9 Monocytes 237 | 19 PBMCs 10 Dendritic cells 238 | 19 PBMCs 11 NA 239 | 20 Placenta 0 Villous cytotrophoblast 240 | 20 Placenta 1 Fibroblastss 241 | 20 Placenta 2 Villous cytotrophoblast 242 | 20 Placenta 3 Extravillous trophoblasts 243 | 20 Placenta 4 Villous cytotrophoblast 244 | 20 Placenta 5 Hofbauer cells 245 | 20 Placenta 6 Extravillous trophoblasts 246 | 20 Placenta 7 Macrophages 247 | 20 Placenta 8 Villous cytotrophoblast 248 | 20 Placenta 9 Extravillous trophoblasts 249 | 20 Placenta 10 Syncytiotrophoblast 250 | 20 Placenta 11 Epithelial cells 251 | 20 Placenta 12 Villous cytotrophoblast 252 | 20 Placenta 13 Villous cytotrophoblast 253 | 20 Placenta 14 Villous cytotrophoblast 254 | 20 Placenta 15 Villous cytotrophoblast 255 | 20 Placenta 16 Villous cytotrophoblast 256 | 20 Placenta 17 Fibroblastss 257 | 20 Placenta 18 Monocytes 258 | 20 Placenta 19 Endothelial cells 259 | 20 Placenta 20 Epithelial cells 260 | 21 Placenta blood 0 T cells 261 | 21 Placenta blood 1 T cells 262 | 21 Placenta blood 2 T cells 263 | 21 Placenta blood 3 T cells 264 | 21 Placenta blood 4 T cells 265 | 21 Placenta blood 5 Monocytes 266 | 21 Placenta blood 6 T cells 267 | 21 Placenta blood 7 Monocytes 268 | 21 Placenta blood 8 T cells 269 | 21 Placenta blood 9 T cells 270 | 21 Placenta blood 10 Dendritic cells 271 | 21 Placenta blood 11 Monocytes 272 | 22 Prostate 0 Basal epithelia 273 | 22 Prostate 1 Epithelial cells 274 | 22 Prostate 2 Epithelial cells 275 | 22 Prostate 3 Fibroblastss 276 | 22 Prostate 4 Basal epithelia 277 | 22 Prostate 5 Endothelial cells 278 | 22 Prostate 6 Smooth muscle cells 279 | 22 Prostate 7 Epithelial cells 280 | 22 Prostate 8 Epithelial cells 281 | 22 Prostate 9 Smooth muscle cells 282 | 22 Prostate 10 Smooth muscle cells 283 | 22 Prostate 11 Fibroblastss 284 | 22 Prostate 12 Epithelial cells 285 | 22 Prostate 13 Endothelial cells 286 | 22 Prostate 14 Endothelial cells 287 | 22 Prostate 15 Epithelial cells 288 | 23 Prostate 2 0 Epithelial cells 289 | 23 Prostate 2 1 Fibroblastss 290 | 23 Prostate 2 2 Epithelial cells 291 | 23 Prostate 2 3 Fibroblastss 292 | 23 Prostate 2 4 Epithelial cells 293 | 23 Prostate 2 5 Fibroblastss 294 | 23 Prostate 2 6 Epithelial cells 295 | 23 Prostate 2 7 Epithelial cells 296 | 23 Prostate 2 8 Epithelial cells 297 | 23 Prostate 2 9 Smooth muscle cells 298 | 23 Prostate 2 10 Epithelial cells 299 | 23 Prostate 2 11 Endothelial cells 300 | 23 Prostate 2 12 T cells 301 | 24 Prostate 3 0 Basal 302 | 24 Prostate 3 1 Basal 303 | 24 Prostate 3 2 Fibroblastss 304 | 24 Prostate 3 3 Epithelial cells 305 | 24 Prostate 3 4 Smooth muscle cells 306 | 24 Prostate 3 5 Fibroblastss 307 | 24 Prostate 3 6 Fibroblastss 308 | 24 Prostate 3 7 Epithelial cells 309 | 24 Prostate 3 8 Epithelial cells 310 | 24 Prostate 3 9 Endothelial cells 311 | 24 Prostate 3 10 Epithelial cells 312 | 24 Prostate 3 11 Smooth muscle cells 313 | 24 Prostate 3 12 Smooth muscle cells 314 | 24 Prostate 3 13 Epithelial cells 315 | 24 Prostate 3 14 Epithelial cells 316 | 24 Prostate 3 15 Monocytes 317 | 24 Prostate 3 16 T cells 318 | 25 Rectum 0 Epithelial cells 319 | 25 Rectum 1 Goblet 320 | 25 Rectum 2 Epithelial cells 321 | 25 Rectum 3 Epithelial cells 322 | 25 Rectum 4 Progenitor cells 323 | 25 Rectum 5 Enterocytes 324 | 25 Rectum 6 Progenitor cells 325 | 25 Rectum 7 Progenitor cells 326 | 25 Rectum 8 Epithelial cells 327 | 25 Rectum 9 Epithelial cells 328 | 25 Rectum 10 Goblet 329 | 25 Rectum 11 Epithelial cells 330 | 26 Testis 0 Spermatogonia 331 | 26 Testis 1 Spermatogonia 332 | 26 Testis 2 Spermatogonia 333 | 26 Testis 3 Spermatocytes 334 | 26 Testis 4 Spermatocytes 335 | 26 Testis 5 Leydig and myoid-like cells 336 | 26 Testis 6 Leydig and myoid-like cells 337 | 26 Testis 7 Spermatocytes 338 | 26 Testis 8 Spermatogonia 339 | 26 Testis 9 Spermatogonia 340 | 26 Testis 10 Spermatogonia 341 | 26 Testis 11 Spermatocytes 342 | 26 Testis 12 Spermatocytes 343 | 26 Testis 13 Sertoli cells 344 | 26 Testis 14 Spermatocytes 345 | 26 Testis 15 Monocytes 346 | 26 Testis 16 Endothelial cells 347 | 26 Testis 17 Spermatogonia 348 | 26 Testis 18 Epithelial cells 349 | 26 Testis 19 Sertoli cells 350 | 26 Testis 20 Spermatogonia 351 | 27 Testis 2 0 Epithelial cells 352 | 27 Testis 2 1 Spermatogonia 353 | 27 Testis 2 2 Spermatogonia 354 | 27 Testis 2 3 Spermatocytes 355 | 27 Testis 2 4 Endothelial cells 356 | 27 Testis 2 5 Spermatogonia 357 | 27 Testis 2 6 Spermatocytes 358 | 27 Testis 2 7 Sertoli cells 359 | 27 Testis 2 8 Leydig and myoid-like cells 360 | 27 Testis 2 9 Macrophages 361 | 27 Testis 2 10 Spermatogonia 362 | 27 Testis 2 11 Sertoli cells 363 | 27 Testis 2 12 Spermatogonia 364 | 27 Testis 2 13 Spermatocytes 365 | 27 Testis 2 14 Spermatogonia 366 | 27 Testis 2 15 Spermatocytes 367 | 27 Testis 2 16 Spermatogonia 368 | 28 Testis 3 0 Myofibroblasts 369 | 28 Testis 3 1 Spermatogonia 370 | 28 Testis 3 2 Myofibroblasts 371 | 28 Testis 3 3 Endothelial cells 372 | 28 Testis 3 4 Monocytes 373 | 28 Testis 3 5 Spermatogonia 374 | 28 Testis 3 6 Spermatogonia 375 | 28 Testis 3 7 Spermatocytes 376 | 28 Testis 3 8 Myofibroblasts 377 | 28 Testis 3 9 Sertoli cells 378 | 28 Testis 3 10 Myofibroblasts 379 | 28 Testis 3 11 Smooth muscle cells 380 | 28 Testis 3 12 Endothelial cells 381 | 28 Testis 3 13 T cells 382 | -------------------------------------------------------------------------------- /scripts/heatmap generator/cell_type_palette.tsv: -------------------------------------------------------------------------------- 1 | name value 2 | ? gray 3 | Amacrine #FFDD00 4 | Atrial Cardiomyocyte #DE6C7D 5 | B cells #66287F 6 | Basal tan 7 | Basal cells tan 8 | Basal epithelia tan 9 | Bipolar #FFDD00 10 | Cones orange 11 | Dendritic cells #199985 12 | Endothelial cells brown 13 | Enterocytes tan 14 | Epithelial cells tan 15 | Epithelial cells of prostatic glands tan 16 | Extravillous trophoblasts tan 17 | Fibroblastss tan2 18 | Fibroblasts tan2 19 | Glomerular parietal Epithelial cells tan 20 | Goblet tan 21 | Granulocytes #F39B7F 22 | Hepatocytes #D1CBE5 23 | Hofbauer cells #E64B35 24 | Horizontal orange 25 | Leydig and myoid-like cells #95D4F5 26 | Luminal 1 brown 27 | Luminal 2 brown 28 | Macrophages #E64B35 29 | Monocytes #E64B35 30 | Muller #FFDD00 31 | Myeloid #E64B35 32 | Myofibroblasts tan2 33 | NK cells #AD1D78 34 | Paneth cells tan 35 | Progenitor cells green3 36 | Proximal tubule #F9A266 37 | Retinal ganglion cells orange 38 | Retinal pigment Epithelial cells tan 39 | Rods orange 40 | Sertoli cells #95D4F5 41 | Smooth muscle cells #DE6C7D 42 | Spermatocytes #95D4F5 43 | Spermatogonia #95D4F5 44 | Stellate cells #F9A266 45 | stem cells darkblue 46 | Syncytiotrophoblast tan 47 | T cells #7D8DAF 48 | Transient amplifying cells #1280C4 49 | Vascular brown 50 | Ventricular Cardiomyocyte #DE6C7D 51 | Villous cytotrophoblast tan 52 | Type II pneumocytes tan 53 | Type I pneumocytes tan 54 | gray 55 | Unknown cell type gray 56 | Bronchial epithelium, ciliated cells tan 57 | Bronchial epithelium, mucus-secreting cells tan 58 | Syncytotrophoblasts tan 59 | acinary tan 60 | Inflammatory cells #b30000 61 | Vesicula seminalis #95D4F5 62 | Vesicula seminalis transcripts #95D4F5 63 | Urothelium tan 64 | Urothelium transcripts tan 65 | Dendritic cell #199985 66 | Mono #E64B35 67 | Dendritic cell myeloid #199985 68 | Erythrocytes #b30000 69 | Dendritic cell plasmacytoid #199985 70 | Basophils #F39B7F 71 | NK #AD1D78 72 | MAIT #7D8DAF 73 | Neutrophils #F39B7F 74 | Spermatogonia and fibroblasts blue 75 | Leydig cells and fibroblasts blue 76 | Leydig cells and myoid cells blue 77 | Leydig cells and Sertoli cells blue 78 | Myoid cells blue 79 | Spermatids and Sertoli cells blue 80 | Fibroblasts and Leydig cells blue 81 | Fibroblasts and myoid cells blue 82 | Spermatogonia and myoid cells blue 83 | Spermatocytes, spermatids and Sertoli cells blue 84 | Myoid cells? blue 85 | Myoid cells and Sertoli cells? blue 86 | Leydig cells blue 87 | -------------------------------------------------------------------------------- /scripts/heatmap generator/heatmap_generator.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | # required_packages <- 4 | # c("tidyverse", "pheatmap", "viridis", "magrittr") 5 | # 6 | # install.packages(required_packages[!required_packages %in% rownames(installed.packages())], repos = "https://cloud.r-project.org") 7 | 8 | require(tidyverse) 9 | require(pheatmap) 10 | require(magrittr) 11 | 12 | # ------------ INPUTS ------------ 13 | # Gene info 14 | 15 | # Format of geneinfo 16 | # # A tibble: 19,670 x 2 17 | # ensg_id gene_name 18 | # 19 | # 1 ENSG00000000003 TSPAN6 20 | # 2 ENSG00000000005 TNMD 21 | # 3 ENSG00000000419 DPM1 22 | # 4 ENSG00000000457 SCYL3 23 | # 5 ENSG00000000460 C1orf112 24 | # 6 ENSG00000000938 FGR 25 | # 7 ENSG00000000971 CFH 26 | # 8 ENSG00000001036 FUCA2 27 | # 9 ENSG00000001084 GCLC 28 | # 10 ENSG00000001167 NFYA 29 | # # ... with 19,660 more rows 30 | 31 | gene_info92 <- 32 | read_delim("geninfo_92.tsv", delim = "\t") %>% 33 | select(1:2) 34 | 35 | # Cell markers 36 | 37 | # Format of cell markers 38 | # # A tibble: 237 x 4 39 | # tissue cell_group cell_type marker 40 | # 41 | # 1 _General Adipose tissue Adipocytes LIPE 42 | # 2 _General Adipose tissue Adipocytes PLIN1 43 | # 3 _General Adipose tissue Adipocytes FABP4 44 | # 4 _General Adipose tissue Adipocytes SLC7A10 45 | # 5 _General Adipose tissue Adipocytes PLIN4 46 | # 6 _General Epithelial cells neuroendocrine cells CHGA 47 | # 7 _General Epithelial cells neuroendocrine cells SCG2 48 | # 8 _General Epithelial cells neuroendocrine cells SCG5 49 | # 9 _General Epithelial cells neuroendocrine cells PCSK1N 50 | # 10 _General Epithelial cells neuroendocrine cells SCG3 51 | # # ... with 227 more rows 52 | 53 | # uppsala_cell_markers <- 54 | # read_delim("data/meta/20200320 Cell type markers.tsv", delim = "\t") %>% 55 | # select(tissue = 1, 56 | # cell_group = 2, 57 | # cell_type = 3, 58 | # marker = 4) 59 | 60 | cell_markers <- 61 | read_delim("Cell type markers.tsv", delim = "\t") %>% 62 | select(tissue = 1, 63 | cell_group = 2, 64 | cell_type = 3, 65 | marker = 4) %>% 66 | left_join(gene_info92, 67 | by = c("marker" = "gene_name")) 68 | 69 | 70 | 71 | 72 | # Cell type palette 73 | 74 | cell_type_palette <- 75 | read_delim("cell_type_palette.tsv", delim = "\t") %$% 76 | set_names(value, name) 77 | 78 | # Data 79 | 80 | # Format of data 81 | # # A tibble: 7,474,600 x 4 82 | # dataset ensg_id cluster_id norm_count 83 | # 84 | # 1 Axillary lymph nodes ENSG00000121410 Cluster-0 5.51 85 | # 2 Axillary lymph nodes ENSG00000148584 Cluster-0 0 86 | # 3 Axillary lymph nodes ENSG00000175899 Cluster-0 8.77 87 | # 4 Axillary lymph nodes ENSG00000166535 Cluster-0 0 88 | # 5 Axillary lymph nodes ENSG00000184389 Cluster-0 0 89 | # 6 Axillary lymph nodes ENSG00000128274 Cluster-0 62.7 90 | # 7 Axillary lymph nodes ENSG00000118017 Cluster-0 0 91 | # 8 Axillary lymph nodes ENSG00000094914 Cluster-0 11.0 92 | # 9 Axillary lymph nodes ENSG00000081760 Cluster-0 7.95 93 | # 10 Axillary lymph nodes ENSG00000114771 Cluster-0 0.0459 94 | # # ... with 7,474,590 more rows 95 | 96 | cluster_norm_count <- 97 | read_delim("cluster_exp.tab", delim = "\t") 98 | 99 | 100 | # cluster_annotation %>% 101 | # select(dataset) %>% 102 | # unique %>% 103 | # mutate(B = c(NA, sort(unique(cluster_norm_count$dataset)), NA), 104 | # C = paste0("'", dataset, "' = '", B, "'")) %>% 105 | # pull(C) %>% 106 | # paste0(collaspe = ",\n") %>% 107 | # cat 108 | 109 | 110 | 111 | # Cluster annotation 112 | 113 | # Format of cluster annotation 114 | # # A tibble: 380 x 5 115 | # dataset_id dataset cluster_id n_cells cell_type 116 | # 117 | # 1 1 Axillary lymph nodes Cluster-0 5335 Endothelial cells 118 | # 2 1 Axillary lymph nodes Cluster-1 3843 Endothelial cells 119 | # 3 1 Axillary lymph nodes Cluster-2 2971 Endothelial cells 120 | # 4 1 Axillary lymph nodes Cluster-3 2787 Endothelial cells 121 | # 5 1 Axillary lymph nodes Cluster-4 2357 Endothelial cells 122 | # 6 1 Axillary lymph nodes Cluster-5 1879 Endothelial cells 123 | # 7 1 Axillary lymph nodes Cluster-6 1639 Endothelial cells 124 | # 8 1 Axillary lymph nodes Cluster-7 1309 Endothelial cells 125 | # 9 1 Axillary lymph nodes Cluster-8 672 Endothelial cells 126 | # 10 1 Axillary lymph nodes Cluster-9 489 Endothelial cells 127 | # # ... with 370 more rows 128 | 129 | cluster_annotation <- 130 | read_delim("Cluster annotation.tsv", delim = "\t") %>% 131 | select(dataset_id = 1, 132 | dataset = 2, 133 | cluster_id = 3, 134 | cell_type = 4) %>% 135 | filter(!is.na(dataset_id)) %>% 136 | mutate(row_id = paste(dataset_id, str_extract(cluster_id, "\\d*$"), cell_type)) 137 | 138 | 139 | 140 | # ------------ Format data ------------ 141 | 142 | 143 | heatmap_palette <- 144 | viridis::inferno(20, direction = -1) 145 | 146 | dataset_map <- 147 | c('Axillary lymph nodes' = 'Lymphatic system', 148 | 'Breast' = 'Breast', 149 | 'Colon' = 'Colon', 150 | 'Colon 2' = 'Colon', 151 | 'Eyes' = 'Eye', 152 | 'Eyes macula' = 'Eye', 153 | 'Eyes peripheral' = 'Eye', 154 | 'Head and neck lymph nodes' = 'Lymphatic system', 155 | 'Heart' = 'Muscle', 156 | 'Ileum' = 'Ileum', 157 | 'Kidney' = 'Kidney', 158 | 'Liver' = 'Liver', 159 | 'Liver hep- CD45-' = 'Liver', 160 | 'Liver hep- CD45+' = 'Liver', 161 | 'Lung' = 'Lung', 162 | 'Muscle' = 'Muscle', 163 | 'NK cells blood' = 'Lymphatic system', 164 | 'NK cells bone marrow' = 'Lymphatic system', 165 | 'PBMCs' = 'Lymphatic system', 166 | 'Placenta' = 'Placenta', 167 | 'Placenta blood' = 'Placenta', 168 | 'Prostate' = 'Prostate', 169 | 'Prostate 2' = 'Prostate', 170 | 'Prostate 3' = 'Prostate', 171 | 'Rectum' = 'Rectum', 172 | 'Testis' = 'Testis', 173 | 'Testis 2' = 'Testis', 174 | 'Testis 3' = 'Testis') %>% 175 | enframe("dataset", "tissue") 176 | 177 | 178 | heatmap_data <- 179 | cluster_norm_count %>% 180 | 181 | filter(ensg_id %in% cell_markers$ensg_id) %>% 182 | left_join(cluster_annotation, 183 | by = c("dataset", "cluster_id")) %>% 184 | left_join(gene_info92) 185 | # ------------ Format data ------------ 186 | 187 | # All markers and data 188 | 189 | row_annotation <- 190 | cluster_annotation %>% 191 | select(row_id, cell_type, dataset) %>% 192 | as.data.frame() %>% 193 | set_rownames(.$row_id) %>% 194 | {.[,-1]} 195 | 196 | col_annotation <- 197 | cell_markers %>% 198 | select(cell_type, marker) %>% 199 | mutate(yes = "yes") %>% 200 | unique() %>% 201 | spread(cell_type, yes, fill = "no") %>% 202 | as.data.frame() %>% 203 | set_rownames(.$marker) %>% 204 | {.[,-1]} 205 | 206 | colors_annotation <- 207 | col_annotation %>% 208 | names() %>% 209 | {set_names(rep(list(c('yes' = 'red', 'no' = 'white')), length(.)), .)} 210 | 211 | colors_annotation[["cell_type"]] <- cell_type_palette 212 | 213 | 214 | 215 | plot_data <- 216 | heatmap_data %>% 217 | select(row_id, gene_name, norm_count) %>% 218 | group_by(gene_name) %>% 219 | mutate(max_count = max(norm_count), 220 | norm_count = norm_count / max_count) %>% 221 | ungroup() %>% 222 | filter(max_count > 0) %>% 223 | select(-max_count) %>% 224 | spread(row_id, norm_count) %>% 225 | column_to_rownames("gene_name") %>% 226 | t() 227 | 228 | 229 | # write_delim(x = row_annotation, "row.tsv", delim = "\t") 230 | # write_delim(x = col_annotation, "col.tsv", delim = "\t") 231 | # write_delim(x = colors_annotation, "col.tsv", delim = "\t") 232 | 233 | svg("Cluser marker heatmap.svg", width = 30, height = 40) 234 | plot_data %>% 235 | 236 | pheatmap(clustering_method = "ward.D2", 237 | color = heatmap_palette, 238 | border_color = NA, 239 | cutree_rows = 1, 240 | cutree_cols = 1, 241 | annotation_row = row_annotation, 242 | annotation_col = col_annotation, 243 | annotation_colors = colors_annotation, 244 | # filename = "Cluster marker heatmap.pdf", 245 | # width = 30, height = 40, 246 | 247 | 248 | annotation_legend = F) 249 | 250 | dev.off() 251 | 252 | unique_tissues <- 253 | dataset_map %>% 254 | pull(tissue) %>% 255 | unique() 256 | 257 | 258 | lapply(unique_tissues, 259 | function(tis_) { 260 | cat(tis_) 261 | tis_cell_markers <- 262 | cell_markers %>% 263 | filter(tissue %in% c("_General", tis_)) %>% 264 | left_join(gene_info92 %>% 265 | select(gene_name, ensg_id)) 266 | 267 | tis_col_annotation <- 268 | col_annotation[sort(unique(tis_cell_markers$marker)),] %>% 269 | {.[, sapply(., n_distinct) > 1]} 270 | 271 | heatmap_data %>% 272 | 273 | filter(grepl(tis_, tissue)) %>% 274 | filter(ensg_id %in% tis_cell_markers$ensg_id) %>% 275 | 276 | group_by(gene_name) %>% 277 | mutate(norm_count = norm_count / max(norm_count)) %>% 278 | filter(!is.na(norm_count)) %>% 279 | select(gene_name, row_id, norm_count) %>% 280 | spread(row_id, norm_count) %>% 281 | 282 | column_to_rownames("gene_name") %>% 283 | 284 | t() %>% 285 | pheatmap(clustering_method = "ward.D2", 286 | color = heatmap_palette, 287 | border_color = NA, 288 | annotation_row = row_annotation, 289 | annotation_col = tis_col_annotation, 290 | annotation_colors = colors_annotation, 291 | annotation_legend = F, 292 | main = tis_, 293 | filename = paste(tis_, "marker heatmap.pdf"), 294 | width = 16, height = 12) 295 | 296 | }) 297 | -------------------------------------------------------------------------------- /scripts/theme.R: -------------------------------------------------------------------------------- 1 | library(viridis) 2 | 3 | # ----- colors & factor levels ----- 4 | 5 | 6 | tissue_colors <- 7 | c('abdominal adipose tissue' = '#A7DACD', 8 | 'subcutaneous adipose tissue' = '#A7DACD', 9 | 'adrenal gland' = '#7F6A9C', 10 | 'aorta' = '#A7DACD', 11 | 'cecum' = '#1280C4', 12 | 'bone marrow' = '#A1A8AA', 13 | 'breast' = '#F8BDD7', 14 | 'bronchus' = '#6AA692', 15 | 'bulbourethral gland' = '#95D4F5', 16 | 'ear cartilage' = '#A7DACD', 17 | 'joint cartilage' = '#A7DACD', 18 | 'cervix' = '#F8BDD7', 19 | 'colon' = '#1280C4', 20 | 'duodenum' = '#1280C4', 21 | 'endometrium' = '#F8BDD7', 22 | 'epididymis' = '#95D4F5', 23 | 'esophagus' = '#FBDAD9', 24 | 'fallopian tube' = '#F8BDD7', 25 | 'gallbladder' = '#D1CBE5', 26 | "Heart muscle" = '#DE6C7D', 27 | 'heart atrium' = '#DE6C7D', 28 | 'heart valva' = '#DE6C7D', 29 | 'heart wall' = '#DE6C7D', 30 | 'ileum' = '#1280C4', 31 | 'jejunum' = '#1280C4', 32 | 'synovial tissue' = '#A7DACD', 33 | 'kidney cortex' = '#F9A266', 34 | 'kidney medulla' = '#F9A266', 35 | 'larynx' = '#6AA692', 36 | 'liver' = '#D1CBE5', 37 | 'lung' = '#6AA692', 38 | 'lymph node' = '#A1A8AA', 39 | 'urethral gland' = '#95D4F5', 40 | 'nasopharynx epithelium' = '#6AA692', 41 | 'olfactory epithelium' = '#6AA692', 42 | 'oral mucosa' = '#FBDAD9', 43 | 'ovary' = '#F8BDD7', 44 | 'pancreas' = '#96C08E', 45 | 'penis' = '#95D4F5', 46 | 'peritoneum' = '#A7DACD', 47 | 'pleura' = '#A7DACD', 48 | 'prostate' = '#95D4F5', 49 | 'rectum' = '#1280C4', 50 | 'salivary gland' = '#FBDAD9', 51 | 'seminal vesicle' = '#95D4F5', 52 | 'skin (groin)' = '#FCCAB3', 53 | 'skin (back)' = '#FCCAB3', 54 | 'lip' = '#FBDAD9', 55 | 'skeletal muscle' = '#DE6C7D', 56 | 'smooth muscle (intestine)' = '#DE6C7D', 57 | 'smooth muscle (uterus)' = '#DE6C7D', 58 | 'spleen' = '#A1A8AA', 59 | 'stomach lower' = '#1280C4', 60 | 'stomach upper' = '#1280C4', 61 | 'tongue' = '#FBDAD9', 62 | 'facial adipose' = '#A7DACD', 63 | 'testis' = '#95D4F5', 64 | 'thymus' = '#A1A8AA', 65 | 'thyroid gland' = '#7F6A9C', 66 | 'tonsil' = '#A1A8AA', 67 | 'trachea' = '#6AA692', 68 | 'urinary bladder' = '#F9A266', 69 | 'vagus nerve' = '#A7DACD', 70 | 'ductus deferens' = '#95D4F5', 71 | 'vein' = '#A7DACD', 72 | 'cingulate cortex' = '#53977F', 73 | 'occipital cortex' = '#53977F', 74 | 'insular cortex' = '#53977F', 75 | 'motor cortex' = '#53977F', 76 | 'prefrontal cortex' = '#53977F', 77 | 'somatosensory cortex' = '#53977F', 78 | 'temporal cortex' = '#53977F', 79 | 'retrosplenial cortex' = '#53977F', 80 | 'olfactory bulb' = '#99D0C1', 81 | 'dorsal hippocampus' = '#B4CE52', 82 | 'ventral hippocampus' = '#B4CE52', 83 | 'entorhinal cortex' = '#B4CE52', 84 | 'subiculum' = '#B4CE52', 85 | 'amygdala' = '#82B579', 86 | 'caudate' = '#53C2EA', 87 | 'putamen' = '#53C2EA', 88 | 'septum' = '#53C2EA', 89 | 'ventral pallidum' = '#53C2EA', 90 | 'thalamus' = '#EA9586', 91 | 'hypothalamus' = '#E6351D', 92 | 'substantia nigra' = '#B67AB3', 93 | 'periaqueductal gray' = '#B67AB3', 94 | 'midbrain' = '#B67AB3', 95 | 'medulla' = '#EA5699', 96 | 'pons' = '#EA5699', 97 | 'cerebellum' = '#FDCA43', 98 | 'corpus callosum' = '#878786', 99 | 'dorsal spinal cord' = '#008BCC', 100 | 'ventral spinal cord' = '#008BCC', 101 | 'superiour colliculi' = '#B67AB3', 102 | 'pituitary gland' = '#7F6A9C', 103 | 'retina' = '#FFEF78', 104 | 'eye' = '#FFEF78', 105 | 'cornea' = '#FFEF78', 106 | 'lens' = '#FFEF78', 107 | 'choroid plexus' = '#A7DACD', 108 | 'dura mater' = '#A7DACD', 109 | 'pineal gland' = '#7F6A9C', 110 | 'parathyroid gland' = '#7F6A9C', 111 | 'placenta' = '#F8BDD7', 112 | 'vagina' = '#F8BDD7', 113 | 'intermediate monocyte' = '#E64B35', 114 | 'non-classical monocyte' = '#E64B35', 115 | 'classical monocyte' = '#E64B35', 116 | 'neutrophil' = '#F39B7F', 117 | 'basophil' = '#F39B7F', 118 | 'eosinophil' = '#F39B7F', 119 | 'T-reg' = '#7D8DAF', 120 | 'MAIT T-cell' = '#7D8DAF', 121 | 'memory CD4 T-cell' = '#7D8DAF', 122 | 'naive CD4 T-cell' = '#7D8DAF', 123 | 'memory CD8 T-cell' = '#7D8DAF', 124 | 'naive CD8 T-cell' = '#7D8DAF', 125 | 'memory B-cell' = '#66287F', 126 | 'naive B-cell' = '#66287F', 127 | 'NK-cell' = '#AD1D78', 128 | 'gdTCR' = '#66287F', 129 | 'myeloid DC' = '#199985', 130 | 'plasmacytoid DC' = '#199985', 131 | 'total PBMC' = '#b30000', 132 | 'cerebral cortex' = '#53977F', 133 | 'olfactory region' = '#99D0C1', 134 | 'hippocampal formation' = '#B4CE52', 135 | 'basal ganglia' = '#53C2EA', 136 | 'pons and medulla' = '#EA5699', 137 | 'spinal cord' = '#008BCC', 138 | 'pitutiary' = '#7F6A9C', 139 | 'monocytes' = '#E64B35', 140 | 'granulocytes' = '#F39B7F', 141 | 'T-cells' = '#7D8DAF', 142 | 'B-cells' = '#66287F', 143 | 'NK-cells' = '#AD1D78', 144 | 'dendritic cells' = '#199985', 145 | 'blood' = '#b30000', 146 | 'adipose tissue' = '#A7DACD', 147 | 'large intestine' = '#1280C4', 148 | 'cartilage' = '#A7DACD', 149 | 'small intestine' = '#1280C4', 150 | 'heart' = '#DE6C7D', 151 | 'kidney' = '#F9A266', 152 | 'upper respiratory system' = '#6AA692', 153 | 'lymphoid tissue' = '#A1A8AA', 154 | 'mouth' = '#FBDAD9', 155 | 'mesothelial tissue' = '#A7DACD', 156 | 'skin' = '#FCCAB3', 157 | 'smooth muscle' = '#DE6C7D', 158 | 'stomach' = '#1280C4', 159 | 'brain' = '#FFDD00', 160 | 'Adipose & soft tissue' = '#A7DACD', 161 | 'Endocrine tissues' = '#7F6A9C', 162 | 'Gastrointestinal tract' = '#1280C4', 163 | 'Bone marrow & immune system' = '#A1A8AA', 164 | 'Breast and female reproductive system' = '#F8BDD7', 165 | 'Respiratory system' = '#6AA693', 166 | 'Male reproductive system' = '#95D4F5', 167 | 'Proximal digestive tract' = '#FBDAD9', 168 | 'Liver & gallbladder' = '#D1CBE5', 169 | 'Muscle tissues' = '#DE6C7D', 170 | 'Kidney & urinary bladder' = '#F9A266', 171 | 'Respiratory system' = '#6AA692', 172 | 'Pancreas' = '#96C08E', 173 | 'Skin' = '#FCCAB3', 174 | 'Brain' = '#FFDD00', 175 | 'Eye' = '#FFEF78', 176 | 'Blood' = '#b30000', 177 | 'Abdominal adipose tissue' = '#A7DACD', 178 | 'Subcutaneous adipose tissue' = '#A7DACD', 179 | 'Adrenal gland' = '#7F6A9C', 180 | 'Aorta' = '#A7DACD', 181 | 'Cecum' = '#1280C4', 182 | 'Bone marrow' = '#A1A8AA', 183 | 'Breast' = '#F8BDD7', 184 | 'Bronchus' = '#6AA692', 185 | 'Bulbourethral gland' = '#95D4F5', 186 | 'Ear cartilage' = '#A7DACD', 187 | 'Joint cartilage' = '#A7DACD', 188 | 'Cervix' = '#F8BDD7', 189 | 'Colon' = '#1280C4', 190 | 'Duodenum' = '#1280C4', 191 | 'Endometrium' = '#F8BDD7', 192 | 'Epididymis' = '#95D4F5', 193 | 'Esophagus' = '#FBDAD9', 194 | 'Fallopian tube' = '#F8BDD7', 195 | 'Gallbladder' = '#D1CBE5', 196 | 'Heart atrium' = '#DE6C7D', 197 | 'Heart valva' = '#DE6C7D', 198 | 'Heart wall' = '#DE6C7D', 199 | 'Ileum' = '#1280C4', 200 | 'Jejunum' = '#1280C4', 201 | 'Synovial tissue' = '#A7DACD', 202 | 'Kidney cortex' = '#F9A266', 203 | 'Kidney medulla' = '#F9A266', 204 | 'Larynx' = '#6AA692', 205 | 'Liver' = '#D1CBE5', 206 | 'Lung' = '#6AA692', 207 | 'Lymph node' = '#A1A8AA', 208 | 'Urethral gland' = '#95D4F5', 209 | 'Nasopharynx epithelium' = '#6AA692', 210 | 'Olfactory epithelium' = '#6AA692', 211 | 'Oral mucosa' = '#FBDAD9', 212 | 'Ovary' = '#F8BDD7', 213 | 'Pancreas' = '#96C08E', 214 | 'Penis' = '#95D4F5', 215 | 'Peritoneum' = '#A7DACD', 216 | 'Pleura' = '#A7DACD', 217 | 'Prostate' = '#95D4F5', 218 | 'Rectum' = '#1280C4', 219 | 'Salivary gland' = '#FBDAD9', 220 | 'Seminal vesicle' = '#95D4F5', 221 | 'Skin (groin)' = '#FCCAB3', 222 | 'Skin (back)' = '#FCCAB3', 223 | 'Lip' = '#FBDAD9', 224 | 'Skeletal muscle' = '#DE6C7D', 225 | 'Smooth muscle (intestine)' = '#DE6C7D', 226 | 'Smooth muscle (uterus)' = '#DE6C7D', 227 | 'Spleen' = '#A1A8AA', 228 | 'Stomach lower' = '#1280C4', 229 | 'Stomach upper' = '#1280C4', 230 | 'Tongue' = '#FBDAD9', 231 | 'Facial adipose' = '#A7DACD', 232 | 'Testis' = '#95D4F5', 233 | 'Thymus' = '#A1A8AA', 234 | 'Thyroid gland' = '#7F6A9C', 235 | 'Tonsil' = '#A1A8AA', 236 | 'Trachea' = '#6AA692', 237 | 'Urinary bladder' = '#F9A266', 238 | 'Vagus nerve' = '#A7DACD', 239 | 'Ductus deferens' = '#95D4F5', 240 | 'Vein' = '#A7DACD', 241 | 'Cingulate cortex' = '#53977F', 242 | 'Occipital cortex' = '#53977F', 243 | 'Insular cortex' = '#53977F', 244 | 'Motor cortex' = '#53977F', 245 | 'Prefrontal cortex' = '#53977F', 246 | 'Somatosensory cortex' = '#53977F', 247 | 'Temporal cortex' = '#53977F', 248 | 'Retrosplenial cortex' = '#53977F', 249 | 'Olfactory bulb' = '#99D0C1', 250 | 'Dorsal hippocampus' = '#B4CE52', 251 | 'Ventral hippocampus' = '#B4CE52', 252 | 'Entorhinal cortex' = '#B4CE52', 253 | 'Subiculum' = '#B4CE52', 254 | 'Amygdala' = '#82B579', 255 | 'Caudate' = '#53C2EA', 256 | 'Putamen' = '#53C2EA', 257 | 'Septum' = '#53C2EA', 258 | 'Ventral pallidum' = '#53C2EA', 259 | 'Thalamus' = '#EA9586', 260 | 'Hypothalamus' = '#E6351D', 261 | 'Substantia nigra' = '#B67AB3', 262 | 'Periaqueductal gray' = '#B67AB3', 263 | 'Midbrain' = '#B67AB3', 264 | 'Medulla' = '#EA5699', 265 | 'Pons' = '#EA5699', 266 | 'Cerebellum' = '#FDCA43', 267 | 'Corpus callosum' = '#878786', 268 | 'Dorsal spinal cord' = '#008BCC', 269 | 'Ventral spinal cord' = '#008BCC', 270 | 'Superiour colliculi' = '#B67AB3', 271 | 'Pituitary gland' = '#7F6A9C', 272 | 'Retina' = '#FFEF78', 273 | 'Cornea' = '#FFEF78', 274 | 'Lens' = '#FFEF78', 275 | 'Choroid plexus' = '#A7DACD', 276 | 'Dura mater' = '#A7DACD', 277 | 'Pineal gland' = '#7F6A9C', 278 | 'Parathyroid gland' = '#7F6A9C', 279 | 'Placenta' = '#F8BDD7', 280 | 'Vagina' = '#F8BDD7', 281 | 'Intermediate monocyte' = '#E64B35', 282 | 'Non-classical monocyte' = '#E64B35', 283 | 'Classical monocyte' = '#E64B35', 284 | 'Neutrophil' = '#F39B7F', 285 | 'Basophil' = '#F39B7F', 286 | 'Eosinophil' = '#F39B7F', 287 | 'T-reg' = '#7D8DAF', 288 | 'MAIT T-cell' = '#7D8DAF', 289 | 'Memory CD4 T-cell' = '#7D8DAF', 290 | 'Naive CD4 T-cell' = '#7D8DAF', 291 | 'Memory CD8 T-cell' = '#7D8DAF', 292 | 'Naive CD8 T-cell' = '#7D8DAF', 293 | 'Memory B-cell' = '#66287F', 294 | 'Naive B-cell' = '#66287F', 295 | 'NK-cell' = '#AD1D78', 296 | 'GdTCR' = '#66287F', 297 | 'Myeloid DC' = '#199985', 298 | 'Plasmacytoid DC' = '#199985', 299 | 'Total PBMC' = '#b30000', 300 | 'Cerebral cortex' = '#53977F', 301 | 'Olfactory region' = '#99D0C1', 302 | 'Hippocampal formation' = '#B4CE52', 303 | 'Basal ganglia' = '#53C2EA', 304 | 'Pons and medulla' = '#EA5699', 305 | 'Spinal cord' = '#008BCC', 306 | 'Pitutiary' = '#7F6A9C', 307 | 'Monocytes' = '#E64B35', 308 | 'Granulocytes' = '#F39B7F', 309 | 'T-cells' = '#7D8DAF', 310 | 'B-cells' = '#66287F', 311 | 'Nk-cells' = '#AD1D78', 312 | 'Dendritic cells' = '#199985', 313 | 'Blood' = '#b30000', 314 | 'Adipose tissue' = '#A7DACD', 315 | 'Large intestine' = '#1280C4', 316 | 'Cartilage' = '#A7DACD', 317 | 'Small intestine' = '#1280C4', 318 | 'Heart' = '#DE6C7D', 319 | 'Kidney' = '#F9A266', 320 | 'Upper respiratory system' = '#6AA692', 321 | 'Lymphoid tissue' = '#A1A8AA', 322 | 'Mouth' = '#FBDAD9', 323 | 'Mesothelial tissue' = '#A7DACD', 324 | 'Skin' = '#FCCAB3', 325 | 'Smooth muscle' = '#DE6C7D', 326 | 'Stomach' = '#1280C4', 327 | 'Brain' = '#FFDD00', 328 | 'Adipose & soft tissue' = '#A7DACD', 329 | 'Endocrine tissues' = '#7F6A9C', 330 | 'Gastrointestinal tract' = '#1280C4', 331 | 'Bone marrow & immune system' = '#A1A8AA', 332 | 'Breast and female reproductive system' = '#F8BDD7', 333 | 'Respiratory system' = '#6AA693', 334 | 'Male reproductive system' = '#95D4F5', 335 | 'Proximal digestive tract' = '#FBDAD9', 336 | 'Liver & gallbladder' = '#D1CBE5', 337 | 'Muscle tissues' = '#DE6C7D', 338 | 'Kidney & urinary bladder' = '#F9A266', 339 | 'Respiratory system' = '#6AA692', 340 | 'Pancreas' = '#96C08E', 341 | 'Skin' = '#FCCAB3', 342 | 'Brain' = '#FFDD00', 343 | 'Eye' = '#FFEF78', 344 | 'Blood' = '#b30000', 345 | 346 | 'Monocyte' = '#E64B35', 347 | 'Granulocyte' = '#F39B7F', 348 | 'T-cell' = '#7D8DAF', 349 | 'B-cell' = '#66287F', 350 | 'NK-cell' = '#AD1D78', 351 | 'Dendritic cell' = '#199985', 352 | 'T/NK-cell' = "#955593", 353 | "Unknown" = "gray", 354 | 355 | "B/DC" = "#3F6082", 356 | "Endothelial" = "brown", 357 | "Endothelium" = "brown", 358 | "Monocyte/DC" = "#7F715D", 359 | "Epithelium" = "tan1", 360 | "B/T-cell" = "#715A97", 361 | "Epithelium/B/T-cell" = "#B87F73", 362 | "B/T/DC" = "#45798E", 363 | "Endothelium/DC" = "#5F6157", 364 | "Trophoblast" = "tan3", 365 | 366 | "cervix, uterine" = '#F8BDD7', 367 | 'intestine' = '#1280C4', 368 | "Not tissue enriched" = "black", 369 | "heart muscle" = "#DE6C7D", 370 | "Not enriched" = "darkgray", 371 | "not enriched" = "darkgray") 372 | 373 | 374 | 375 | cell_type_palette_old <- 376 | c('?' = 'gray', 377 | 'Amacrine' = '#FFDD00', 378 | 'Atrial Cardiomyocyte' = '#DE6C7D', 379 | "Cardiomyocytes" = '#DE6C7D', 380 | 'B cells' = '#66287F', 381 | 'Basal' = 'tan', 382 | 'Basal cells' = 'tan', 383 | 'Basal epithelia' = 'tan', 384 | 'Bipolar' = '#FFDD00', 385 | 'Cones' = 'orange', 386 | 'Dendritic cells' = '#199985', 387 | 'Endothelial cells' = 'brown', 388 | 'Enterocytes' = 'tan', 389 | 'Epithelial cells' = 'tan', 390 | 'Epithelial cells of prostatic glands' = 'tan', 391 | "Prostate glands, basal cells" = 'tan', 392 | "Prostate glands, eptithelial cells" = 'tan', 393 | 'Extravillous trophoblasts' = 'tan', 394 | 'Fibroblastss' = 'tan2', 395 | 'Fibroblasts' = 'tan2', 396 | 'Glomerular parietal Epithelial cells' = 'tan', 397 | 'Goblet' = 'tan', 398 | 'Granulocytes' = '#F39B7F', 399 | 'Hepatocytes' = '#D1CBE5', 400 | 'Hofbauer cells' = '#E64B35', 401 | 'Horizontal' = 'orange', 402 | 'Leydig and myoid-like cells' = '#95D4F5', 403 | 'Luminal 1' = 'brown', 404 | 'Luminal 2' = 'brown', 405 | 'Macrophages' = '#E64B35', 406 | 'Monocytes' = '#E64B35', 407 | 'Muller' = '#FFDD00', 408 | 'Myeloid' = '#E64B35', 409 | 'Myofibroblasts' = 'tan2', 410 | 'NK cells' = '#AD1D78', 411 | 'Paneth cells' = 'tan', 412 | "Paneth-like cells" = 'tan', 413 | 414 | 'Progenitor cells' = 'green3', 415 | 'Proximal tubule' = '#F9A266', 416 | "Renal tubules, distal cells" = '#F9A266', 417 | "Renal tubules, proximal cells" = '#F9A266', 418 | 'Retinal ganglion cells' = 'orange', 419 | 'Retinal pigment Epithelial cells' = 'tan', 420 | 'Rods' = 'orange', 421 | 'Sertoli cells' = '#95D4F5', 422 | 'Smooth muscle cells' = '#DE6C7D', 423 | 'Spermatocytes' = '#95D4F5', 424 | 'Spermatogonia' = '#95D4F5', 425 | "Sperm"= '#95D4F5', 426 | "Peritubular cells" = '#F9A266', 427 | "Late spermatids"= '#95D4F5', 428 | "Spermatids"= '#95D4F5', 429 | "Early spermatids"= '#95D4F5', 430 | 'Stellate cells' = '#F9A266', 431 | 'stem cells' = 'darkblue', 432 | "Stem cells" = 'darkblue', 433 | "Undifferentiated cells" = "darkblue", 434 | 'Syncytiotrophoblast' = 'tan', 435 | 'T cells' = '#7D8DAF', 436 | 'Transient amplifying cells' = '#1280C4', 437 | "Transient-amplifying (TA) cells" = '#1280C4', 438 | 439 | 'Vascular' = 'brown', 440 | 'Ventricular Cardiomyocyte' = '#DE6C7D', 441 | 'Villous cytotrophoblast' = 'tan', 442 | 'Type II pneumocytes' = 'tan', 443 | 'Type I pneumocytes' = 'tan', 444 | "Alveolar cells type 1" = 'tan', 445 | "Alveolar cells type 2" = 'tan', 446 | " " = "gray", 447 | "Unknown cell type" = "gray", 448 | 449 | 450 | 'Bronchial epithelium, ciliated cells' = 'tan', 451 | 'Bronchial epithelium, mucus-secreting cells' = 'tan', 452 | "Bronchial epithelium, Club cells" = 'tan', 453 | "Ciliated cells" = 'tan', 454 | 'Syncytotrophoblasts' = 'tan', 455 | 'acinary' = 'tan', 456 | 'Inflammatory cells' = '#b30000', 457 | "Immune cells" = '#b30000', 458 | 'Vesicula seminalis' = '#95D4F5', 459 | 'Vesicula seminalis transcripts' = '#95D4F5', 460 | 'Urothelium' = 'tan', 461 | 'Urothelium transcripts' = 'tan', 462 | 'Dendritic cell' = '#199985', 463 | 'Mono' = '#E64B35', 464 | 'Dendritic cell myeloid' = '#199985', 465 | 'Erythrocytes' = '#b30000', 466 | 'Dendritic cell plasmacytoid' = '#199985', 467 | 'Basophils' = '#F39B7F', 468 | 'NK' = '#AD1D78', 469 | 'MAIT' = '#7D8DAF', 470 | 'Neutrophils' = "#F39B7F", 471 | 'Spermatogonia and fibroblasts' = 'blue', 472 | 'Leydig cells and fibroblasts' = 'blue', 473 | 'Leydig cells and myoid cells' = 'blue', 474 | 'Leydig cells and Sertoli cells' = 'blue', 475 | 'Myoid cells' = 'blue', 476 | 'Spermatids and Sertoli cells' = 'blue', 477 | 'Fibroblasts and Leydig cells' = 'blue', 478 | 'Fibroblasts and myoid cells' = 'blue', 479 | 'Spermatogonia and myoid cells' = 'blue', 480 | 'Spermatocytes, spermatids and Sertoli cells' = 'blue', 481 | 'Myoid cells?' = 'blue', 482 | 'Myoid cells and Sertoli cells?' = 'blue', 483 | 'Leydig cells' = 'blue', 484 | 485 | 'Adipocytes' = '#A7DACD', 486 | 'Cytotrophoblasts' = 'tan', 487 | 'Fibroblasts' = 'tan2', 488 | 'Proximal tubule cells' = '#F9A266', 489 | 'Myoepithelial cells' = '#7F6A9C', 490 | 'Type i pneumocytes' = 'tan', 491 | 'B lymphocytes' = '#66287F', 492 | 'Dendritic cells' = '#199985', 493 | 'T lymphocytes' = '#7D8DAF', 494 | 'Granulocytes' = '#F39B7F', 495 | 'Hofbauer cells' = '#E64B35', 496 | 'Kupffer cells' = '#E64B35', 497 | "Macrophages (Hofbauer cells)" = '#E64B35', 498 | "Macrophages (Kupffer cells)" = '#E64B35', 499 | 'Monocytes' = '#E64B35', 500 | "Erythroid cells" = '#b30000', 501 | 'Erthyroid cells' = '#b30000', 502 | 'Horizontal cells' = 'orange', 503 | 'Natural killer cells' = '#AD1D78', 504 | 'Type ii pneumocytes' = 'tan', 505 | 'Basal epithelial cells of prostatic glands' = 'tan', 506 | 'Decidual cells' = '#F8BDD7', 507 | 'Endothelial cells' = 'peru', 508 | 'Hepatic stellate cells (ito cells)' = '#D1CBE5', 509 | "Ito cells" = '#D1CBE5', 510 | 'Muller glia cells' = '#FFEF78', 511 | 'Inflammatory cells' = '#b30000', 512 | 'Epithelial cells of prostatic glands' = '#95D4F5', 513 | 'Vesicula seminalis cells' = '#95D4F5', 514 | 'Atrial cardiomyocyte' = '#DE6C7D', 515 | 'Smooth muscle cells' = '#DE6C7D', 516 | 'Ventricular cardiomyocyte' = '#DE6C7D', 517 | 'Urothelial cells' = '#F9A266', 518 | 'Lens' = '#FFEF78', 519 | 'Pigment epithelial cells' = '#FFEF78', 520 | 'Bronchial epithelium, ciliated cells' = 'tan', 521 | 'Bronchial epithelium, mucus-secreting cells' = 'tan', 522 | 'Collecting duct cells' = '#F9A266', 523 | 'Distal tubule cells' = '#F9A266', 524 | 'Syncytotrophoblasts' = '#F8BDD7', 525 | "Cholangiocytes" = '#D1CBE5', 526 | 'Bipolar cells' = '#FFDD00', 527 | 'Cone photoreceptor cells' = '#FFEF78', 528 | 'Rod photoreceptor cells' = '#FFEF78', 529 | 'Extravillous trophoblasts' = '#F8BDD7', 530 | 531 | "Ductal epithelial cells" = "tan", 532 | "islets of Langerhans" = '#96C08E', 533 | "Islets of Langerhans" = '#96C08E', 534 | 'Acinar cells' = 'tan', 535 | "Melanocytes" = "brown", 536 | "Basal keratinocytes (undifferentiated)" = "tan", 537 | "Suprabasal keratinocytes (differentiated)" = "tan", 538 | 539 | 'collecting duct cells' = '#F9A266', 540 | 'distal tubule cells' = '#F9A266', 541 | 'Hepatic stellate cells (Ito cells)' = '#D1CBE5', 542 | 'Hepatic stellate cells' = '#D1CBE5', 543 | 'myoepithelial cells' = '#7F6A9C', 544 | 'proximal tubule cells' = '#F9A266', 545 | "Goblet cells" = "#1280C4", 546 | "Goblet cells" = "#1280C4", 547 | "Mucus-secreting cells" = "#1280C4", 548 | "luminal epithelial cells" = "tan", 549 | "luminal epithelial cells" = "tan", 550 | "neuroendocrine cells" = '#7F6A9C', 551 | "neuroendocrine cells" = '#7F6A9C', 552 | "Neuroendocrine cells" = '#7F6A9C', 553 | "Not annotated" = "gray", 554 | "Not enriched" = "darkgray", 555 | "not enriched" = "darkgray", 556 | "Not cell enriched"= "darkgray") 557 | 558 | 559 | spec_category_levels <- 560 | c('tissue enriched', 561 | 'group enriched', 562 | 'tissue enhanced', 563 | 'low tissue specificity', 564 | 'not detected', 565 | 566 | 'Tissue enriched', 567 | 'Group enriched', 568 | 'Tissue enhanced', 569 | 'Low tissue specificity', 570 | 'Not detected') 571 | 572 | dist_category_levels <- 573 | c('detected in all', 574 | 'detected in many', 575 | 'detected in some', 576 | 'detected in single', 577 | 'not detected', 578 | 579 | 'Detected in all', 580 | 'Detected in many', 581 | 'Detected in some', 582 | 'Detected in single', 583 | 'Not detected') 584 | 585 | enrichment_overlap_levels <- 586 | c("full overlap", 587 | "partial overlap", 588 | "no overlap", 589 | 590 | "Full overlap", 591 | "Partial overlap", 592 | "No overlap") 593 | 594 | shared_category_levels <- 595 | c("shared", 596 | "minor difference", 597 | "medium difference", 598 | "major difference") 599 | 600 | enrichment_overlap_pal <- 601 | set_names(c(viridis(3), inferno(4), 602 | viridis(3), inferno(4)), 603 | c("full overlap", 604 | "partial overlap", 605 | "no overlap", 606 | "shared", 607 | "minor difference", 608 | "medium difference", 609 | "major difference", 610 | 611 | "Full overlap", 612 | "Partial overlap", 613 | "No overlap", 614 | "Shared", 615 | "Minor difference", 616 | "Medium difference", 617 | "Major difference")) 618 | 619 | 620 | spec_category_overlap_levels <- 621 | c('tissue enriched full overlap', 622 | 'tissue enriched partial overlap', 623 | 'tissue enriched no overlap', 624 | 'group enriched full overlap', 625 | 'group enriched partial overlap', 626 | 'group enriched no overlap', 627 | 'tissue enhanced full overlap', 628 | 'tissue enhanced partial overlap', 629 | 'tissue enhanced no overlap', 630 | 'low tissue specificity no overlap', 631 | 'not detected no overlap') 632 | 633 | spec_category_overlap_levels_short <- 634 | c('tissue enriched FO', 635 | 'tissue enriched PO', 636 | 'tissue enriched NO', 637 | 'group enriched FO', 638 | 'group enriched PO', 639 | 'group enriched NO', 640 | 'tissue enhanced FO', 641 | 'tissue enhanced PO', 642 | 'tissue enhanced NO', 643 | 'low tissue specificity NO', 644 | 'not detected NO') 645 | 646 | spec_category_overlap_pal <- 647 | c('tissue enriched full overlap' = "#E41A1C", 648 | 'tissue enriched partial overlap' = "#ED6667" , 649 | 'tissue enriched no overlap' = "#F6B2B3" , 650 | 'group enriched full overlap' = "#FF9D00", 651 | 'group enriched partial overlap' = "#FFBD55", 652 | 'group enriched no overlap' = "#FFDEAA", 653 | 'tissue enhanced full overlap' = "#984EA3", 654 | 'tissue enhanced partial overlap' = "#BA89C1", 655 | 'tissue enhanced no overlap' = "#DCC3E0", 656 | 'low tissue specificity no overlap' = "grey40", 657 | 'not detected no overlap' = "grey") 658 | 659 | spec_category_overlap_short_pal <- 660 | c('tissue enriched FO' = "#E41A1C", 661 | 'tissue enriched PO' = "#ED6667" , 662 | 'tissue enriched NO' = "#F6B2B3" , 663 | 'group enriched FO' = "#FF9D00", 664 | 'group enriched PO' = "#FFBD55", 665 | 'group enriched NO' = "#FFDEAA", 666 | 'tissue enhanced FO' = "#984EA3", 667 | 'tissue enhanced PO' = "#BA89C1", 668 | 'tissue enhanced NO' = "#DCC3E0", 669 | 'low tissue specificity NO' = "grey40", 670 | 'not detected NO' = "grey") 671 | 672 | 673 | gene_category_pal <- 674 | c("tissue enriched" = "#e41a1c", 675 | "group enriched" = "#FF9D00", 676 | "tissue enhanced" = "#984ea3", 677 | "low tissue specificity" = "grey40", 678 | 679 | "detected in all" = "#253494", 680 | "detected in many" = "#2c7fb8", 681 | "detected in some" = "#41b6c4", 682 | "detected in single" = "#a1dab4", 683 | 684 | "not detected" = "grey", 685 | "not detected " = "grey", 686 | 687 | "Tissue enriched" = "#e41a1c", 688 | "Group enriched" = "#FF9D00", 689 | "Tissue enhanced" = "#984ea3", 690 | "Low tissue specificity" = "grey40", 691 | 692 | "Detected in all" = "#253494", 693 | "Detected in many" = "#2c7fb8", 694 | "Detected in some" = "#41b6c4", 695 | "Detected in single" = "#a1dab4", 696 | 697 | "Not detected" = "grey", 698 | "Not detected " = "grey", 699 | 700 | "cluster enriched" = "#e41a1c", 701 | "cluster enhanced" = "#984ea3", 702 | "low cluster specificity" = "grey40", 703 | 704 | 705 | "Cluster enriched" = "#e41a1c", 706 | "Cluster enhanced" = "#984ea3", 707 | "Low cluster specificity" = "grey40") 708 | 709 | 710 | gene_category_pal_human_pig <- 711 | gene_category_pal %>% 712 | enframe() %>% 713 | do(bind_rows(mutate(., name = paste(name, "human")), 714 | mutate(., name = paste(name, "pig")))) %$% 715 | set_names(value, name) 716 | 717 | gene_category_pal_comparison <- 718 | gene_category_pal %>% 719 | enframe() %>% 720 | do(bind_rows(mutate(., name = paste(name, "canon")), 721 | mutate(., name = paste(name, "comparison")))) %$% 722 | set_names(value, name) 723 | 724 | anova_pal <- 725 | c("Residuals and species" = "gray", 726 | "Residuals" = "gray", 727 | "tissue_ID" = "#D95E37", 728 | "comparison_tissue" = "#D95E37", 729 | "Tissue" = "#D95E37", 730 | "individual" = "#149684", 731 | "species" = "#F9C770", 732 | "Species" = "#F9C770", 733 | "sex" = "#E8C9C7") 734 | 735 | anova_levels <- 736 | c("Residuals and species", 737 | "Residuals", 738 | "tissue_ID", 739 | "comparison_tissue", 740 | "Tissue", 741 | "individual", 742 | "species", 743 | "Species", 744 | "sex") 745 | 746 | sex_palette <- 747 | c("male" = "#D96767", "female" = "#214785") 748 | 749 | elevation_identity_levels <- 750 | c("identity", "overlapping tissues", "different tissues", "no enrichment") 751 | 752 | elevation_identity_pal <- 753 | set_names(rev(pals::ocean.haline(9)[c(3, 4, 6, 7)]), elevation_identity_levels) 754 | 755 | elevation_overlap_levels <- 756 | c("Pig", "Both", "Human") 757 | 758 | elevation_overlap_pal <- 759 | set_names(rev(pals::ocean.haline(9)[c(3, 4, 6)]), elevation_overlap_levels) 760 | 761 | overlap_type_pal <- 762 | c("Human" = "#DB1F48", 763 | "Overlap" = "#004369", 764 | "Pig" = "#01949A", 765 | "Different tissues" = "orangered") 766 | 767 | celltype_pal <- 768 | c('B cells' = '#66287F', 769 | 'Cholangiocytes' = 'palegreen4', 770 | 'Endothelial' = 'peru', 771 | 'Erythroid cells' = 'darkred', 772 | 'Hepatic stellate cells' = 'salmon4', 773 | 'Hepatocytes' = 'saddlebrown', 774 | 'Hepatocytes sub' = 'sandybrown', 775 | 'Macrophages' = 'orangered', 776 | 'Plasma cells' = 'slateblue4', 777 | 'T cells' = '#7D8DAF') 778 | 779 | celltype_levels <- 780 | c('B cells', 781 | 'Plasma cells', 782 | 'T cells', 783 | 'Macrophages', 784 | 'Erythroid cells', 785 | 'Endothelial', 786 | 'Cholangiocytes', 787 | 'Hepatic stellate cells', 788 | 'Hepatocytes', 789 | 'Hepatocytes sub') 790 | 791 | # protein_type_pal <- 792 | # c("secreted" = '#911D51', 793 | # "membrane" = '#6D4BAA', 794 | # "other" = '#008490', 795 | # "cd_marker" = '#318F1E', 796 | # "transcription_factors" = '#B8801B', 797 | # "mitochondrial" = '#E371B4', 798 | # "ribosomal" = '#89A0F3', 799 | # "none" = "black", 800 | # '#00C9BC', '#97C542', '#FFA05E') 801 | # 802 | # protein.localization.palette2 <- c("membrane" = "#CE70A4", 803 | # "secreted" = "#FCAC3B", 804 | # "membrane and secreted isoforms" = "#755A85") 805 | 806 | # protein.localization.palette <- c("intracellular and membrane isoforms" = "#858141", 807 | # "membrane" = "#6DB9C6", 808 | # "intracellular" = "#FCAC3B", 809 | # "secreted" = "#CE70A4", 810 | # "intracellular and secreted isoforms" = "#CF5734", 811 | # "membrane and secreted isoforms" = "#755A85", 812 | # "intracellular, membrane, secreted isoforms" = "#794A39") 813 | 814 | 815 | cluster_levels <- 816 | c("TOTAL", 817 | paste0("Cluster-", 0:100)) 818 | # ----- themes ----- 819 | 820 | simple_theme <- 821 | theme_bw() + 822 | theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 823 | 824 | heatmap_palette = viridis::inferno(20, direction = -1) 825 | 826 | 827 | 828 | theme_stripped <- 829 | theme(panel.background = element_rect(fill = NA, colour = NA), 830 | plot.background = element_rect(fill = NA, color = NA), 831 | panel.grid.major = element_blank(), 832 | panel.grid.minor = element_blank(), 833 | panel.border = element_blank(), 834 | legend.key = element_rect(colour = NA), 835 | #legend.position = "bottom", 836 | #legend.direction = "horizontal", 837 | legend.key.size= unit(0.3, "cm"), 838 | legend.title = element_text(face="italic"), 839 | axis.line = element_line(colour="black",size=0.5)) 840 | 841 | theme_stripped_frame <- 842 | theme(panel.background = element_rect(fill = NA, colour = "gray"), 843 | plot.background = element_rect(fill = NA, color = "gray"), 844 | panel.grid.major = element_blank(), 845 | panel.grid.minor = element_blank(), 846 | panel.border = element_blank(), 847 | legend.key = element_rect(colour = NA), 848 | #legend.position = "bottom", 849 | #legend.direction = "horizontal", 850 | legend.key.size= unit(0.3, "cm"), 851 | legend.title = element_text(face="italic"), 852 | axis.line = element_line(colour="black",size=0.5)) 853 | 854 | theme_angletext <- theme(axis.text.x = element_text(angle = 60, hjust = 1)) 855 | 856 | # Make plot theme 857 | stripped_theme <- 858 | theme(panel.background = element_rect(fill = NA, colour = NA), 859 | plot.background = element_rect(fill = NA, color = NA), 860 | panel.grid.major = element_blank(), 861 | panel.grid.minor = element_blank(), 862 | panel.border = element_blank(), 863 | legend.key = element_rect(colour = NA), 864 | #legend.position = "bottom", 865 | #legend.direction = "horizontal", 866 | legend.key.size= unit(0.3, "cm"), 867 | legend.title = element_text(face="italic"), 868 | axis.line = element_line(colour="black",size=0.5)) 869 | 870 | 871 | 872 | # stripped theme facet 873 | stripped_theme_facet <- 874 | stripped_theme+ 875 | theme(legend.position = "right", 876 | panel.border = element_rect(color = "gray", fill = NA), 877 | strip.background = element_rect(fill = "#003253", 878 | color = "#003253"), 879 | strip.text = element_text(color = "white")) 880 | 881 | # stripped theme facet 882 | stripped_theme_HPA <- 883 | stripped_theme+ 884 | theme(legend.position = "right", 885 | panel.border = element_rect(color = "gray", fill = NA), 886 | strip.background = element_rect(fill = "#313131", 887 | color = "#313131"), 888 | strip.text = element_text(color = "white"), 889 | plot.background = element_rect(fill = "#D9D9D9", 890 | color = "#D9D9D9"), 891 | panel.background = element_rect(fill = "white", 892 | color = "black")) 893 | 894 | 895 | -------------------------------------------------------------------------------- /singlecell_consensus_hierarchy.tsv: -------------------------------------------------------------------------------- 1 | subcell_type_name cell_type_name group_name 2 | Enterocytes Enterocytes Epithelial cells 3 | Mucus-secreting cells Mucus-secreting cells Epithelial cells 4 | Undifferentiated cells Undifferentiated cells Undifferentiated cells 5 | T-cells T-cells Blood & immune cells 6 | Paneth cells Paneth cells Epithelial cells 7 | B-cells B-cells Blood & immune cells 8 | Intestinal endocrine cells Intestinal endocrine cells Endocrine cells 9 | Granulocytes Granulocytes Blood & immune cells 10 | Rod photoreceptor cells Rod photoreceptor cells Neuronal cells 11 | Muller glia cells Muller glia cells Glial cells 12 | Bipolar cells Bipolar cells Neuronal cells 13 | Cone photoreceptor cells Cone photoreceptor cells Neuronal cells 14 | Horizontal cells Horizontal cells Neuronal cells 15 | Endothelial cells Endothelial cells Vascular cells 16 | Cardiomyocytes Cardiomyocytes Muscle cells 17 | Mixed immune cells Mixed immune cells Blood & immune cells 18 | Smooth muscle cells Smooth muscle cells Muscle cells 19 | Fibroblasts Fibroblasts Mesenchymal cells 20 | Proximal tubular cells Proximal tubular cells Epithelial cells 21 | Macrophages Macrophages Blood & immune cells 22 | Distal tubular cells Distal tubular cells Epithelial cells 23 | Collecting duct cells Collecting duct cells Epithelial cells 24 | Hepatocytes Hepatocytes Epithelial cells 25 | Kupffer cells Kupffer cells Blood & immune cells 26 | Ito cells Ito cells Mesenchymal cells 27 | Cholangiocytes Cholangiocytes Epithelial cells 28 | Erythroid cells Erythroid cells Blood & immune cells 29 | Alveolar cells type 2 Alveolar cells type 2 Epithelial cells 30 | Club cells Club cells Epithelial cells 31 | Ciliated cells Ciliated cells Epithelial cells 32 | Alveolar cells type 1 Alveolar cells type 1 Epithelial cells 33 | Cytotrophoblasts Cytotrophoblasts Trophoblast cells 34 | Extravillous trophoblasts Extravillous trophoblasts Trophoblast cells 35 | Hofbauer cells Hofbauer cells Blood & immune cells 36 | Syncytiotrophoblasts Syncytiotrophoblasts Trophoblast cells 37 | Basal glandular cells Basal glandular cells Epithelial cells 38 | Glandular cells Glandular cells Epithelial cells 39 | Urothelial cells Urothelial cells Epithelial cells 40 | Monocytes Monocytes Blood & immune cells 41 | NK-cells NK-cells Blood & immune cells 42 | Dendritic cells Dendritic cells Blood & immune cells 43 | Leydig cells Leydig cells Endocrine cells 44 | Late spermatids Late spermatids Germ cells 45 | Spermatogonia Spermatogonia Germ cells 46 | Early spermatids Early spermatids Germ cells 47 | Spermatocytes Spermatocytes Germ cells 48 | Peritubular cells Peritubular cells Mesenchymal cells 49 | Sertoli cells Sertoli cells Epithelial cells 50 | Ductal cells Ductal cells Epithelial cells 51 | Pancreatic endocrine cells Pancreatic endocrine cells Endocrine cells 52 | Mixed cell types Mixed cell types Mixed cell types 53 | Exocrine glandular cells Exocrine glandular cells Epithelial cells 54 | Neutrophils Neutrophils Blood & immune cells 55 | Basal keratinocytes Basal keratinocytes Epithelial cells 56 | Suprabasal keratinocytes Suprabasal keratinocytes Epithelial cells 57 | Melanocytes Melanocytes Pigment cells 58 | Astrocytes Astrocytes Glial cells 59 | Basal respiratory cells Basal respiratory cells Epithelial cells 60 | Adipocytes Adipocytes Adipocyte cells 61 | Ionocytes Ionocytes Epithelial cells 62 | Granulosa cells Granulosa cells Epithelial cells 63 | Mammary Glandular Cells Mammary Glandular Cells Epithelial cells 64 | Myoepithelial cells Myoepithelial cells Epithelial cells 65 | Glandular cells - Endometrium Glandular cells - Endometrium Epithelial cells 66 | Oligodendrocyte precursor cells Oligodendrocyte precursor cells Glial cells 67 | Inhibitory neurons Inhibitory neurons Neuronal cells 68 | Microglial cells Microglial cells Glial cells 69 | Oligodendrocytes Oligodendrocytes Glial cells 70 | Excitatory neurons Excitatory neurons Neuronal cells 71 | Myocytes Myocytes Muscle cells 72 | Peptic cell Peptic cell Epithelial cells 73 | Mucous cell of stomach Mucous cell of stomach Epithelial cells 74 | Plasma cell Plasma cell Blood & immune cells 75 | Theca cells Theca cells Endocrine cells 76 | Squamous epithelial cells Squamous epithelial cells Epithelial cells 77 | --------------------------------------------------------------------------------