├── .gitignore ├── Figures ├── Ext_Data_Fig_1 │ ├── bar_layer_plots_simple.R │ └── cre_per_cluster_bar_plots.R ├── Ext_Data_Fig_10 │ ├── 2018-05-25_markers.txt │ ├── cluster_violin_plot.R │ └── layer_and_violin_functions.R ├── Ext_Data_Fig_11 │ └── v1_alm_cross_mapping_cor.R ├── Ext_Data_Fig_12 │ ├── 20171030_inj_qc.tsv │ ├── alm_retrograde_inj_dotplots_actual_targets_dendrogram.R │ ├── prune_leaf_custom.R │ └── visp_retrograde_inj_dotplots_actual_targets_dendrogram.R ├── Ext_Data_Fig_15 │ ├── huang_comparison_figure.R │ ├── huang_dendrogram.R │ ├── linnarsson_comparison.R │ ├── linnarsson_stats.R │ ├── prune_leaf_custom.R │ ├── sankey_functions.R │ ├── tolias_comparison_figure.R │ └── tolias_dendrogram.R ├── Ext_Data_Fig_16 │ ├── cluster_bar_plot.R │ ├── cluster_violin_plot.R │ ├── coexpression_bar_plots.R │ ├── coexpression_functions.R │ ├── layer_and_violin_functions.R │ ├── marker_violins.R │ ├── river_plots.R │ └── sankey_functions.R ├── Ext_Data_Fig_18 │ ├── cluster_violin_plot.R │ ├── coexpression_functions.R │ ├── color_functions.R │ ├── layer_and_violin_functions.R │ ├── prune_leaf_custom.R │ ├── tsne_plots.R │ └── unified_layer_distributions_and_markers.R ├── Ext_Data_Fig_2 │ └── violin_plots.R ├── Ext_Data_Fig_20 │ └── dend_plots.R ├── Ext_Data_Fig_3 │ ├── all_cell_tsne_plot.R │ ├── coclustering_heatmaps_sparse_matrix.R │ ├── pairwise_cluster_heatmaps.R │ └── qc_jitter_plots.R ├── Ext_Data_Fig_4 │ ├── intermediate_fraction_plot.R │ └── qc_jitter_plots.R ├── Ext_Data_Fig_5 │ └── all_markers_heatmap.R ├── Ext_Data_Fig_6 │ ├── cluster_marker_heatmaps.R │ └── markers.csv ├── Ext_Data_Fig_7 │ ├── comparison_figure.R │ └── sankey_functions.R ├── Ext_Data_Fig_8 │ └── saturation_plot.R ├── Ext_Data_Fig_9 │ └── build_dotplot.R ├── Figure_1 │ └── dendrogram_panel.R ├── Figure_2 │ ├── class_comparison_colors_simplified.csv │ ├── color_functions.R │ ├── degenes_alm_visp_barplot.R │ ├── pairwise_DEGene_panels.R │ ├── region_marker_genes_panel.R │ └── tsne_panels.R ├── Figure_3 │ ├── alm_retrograde_inj_dotplot_categories.R │ ├── inj_qc.tsv │ ├── prune_leaf_custom.R │ └── visp_retrograde_inj_dotplot_categories.R ├── Figure_4 │ ├── color_functions.R │ ├── layer_and_violin_functions.R │ ├── prune_leaf_custom.R │ └── unified_layer_distributions_and_markers.R ├── Figure_5 │ ├── color_functions.R │ ├── layer_and_violin_functions.R │ ├── prune_leaf_custom.R │ └── unified_layer_distributions_and_markers.R └── Figure_6 │ ├── L4 IT VISp Rspo1 L5 IT VISp Batf3.rda │ ├── L4 IT VISp Rspo1 L5 IT VISp Hsd11b1 Endou.rda │ ├── L4.2016.dat.rda │ ├── L4.dat.rda │ ├── L4.df.rda │ ├── L4.sampled.df.rda │ ├── L4_comparison_plots.R │ ├── comparison_figure.R │ ├── custom_annotate_cat.R │ ├── gradient.df.list.rda │ ├── l4_gradient_heatmap.R │ ├── pairwise_median_lfc.R │ ├── plotting_functions.R │ ├── sankey_functions.R │ ├── sst150_coords.tsv │ ├── sst300_cl_anno.csv │ ├── sst300_coords.tsv │ ├── sst80_cl_anno.csv │ ├── sst80_coords.tsv │ ├── sst_constellation_positions.xlsx │ ├── sst_constellations.R │ └── sst_river_plot.R ├── LICENSE ├── README.md ├── RNA-seq Analysis ├── ALM.ex.analysis.R ├── DEG.R ├── V1_ALM_compare.R ├── VISp.ex.analysis.R ├── all.cell.markers.heatmap.R ├── annotate.cluster.R ├── annotate.markers.R ├── cl.transition.R ├── cluster.R ├── cluster_hetero.R ├── co_cluster.R ├── compare_1679.R ├── compare_Hrvatin.R ├── compare_JoshWang.R ├── compare_Linnarsson.R ├── compare_Tolias.R ├── consensus.clust.R ├── copy_bam.R ├── fast_tsne.R ├── gradient.R ├── init.R ├── intron.exon.ratio.R ├── markers.R ├── retrograde.R ├── saturation_test.R ├── subsample.all.R ├── subsample.reads.R └── tune_parameter.R └── tasic2018analysis.Rproj /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_10/2018-05-25_markers.txt: -------------------------------------------------------------------------------- 1 | Aqp4 2 | Sox10 3 | Pdgfra 4 | Grm5 5 | Ccnb1 6 | Rassf10 7 | Enpp6 8 | Opalin 9 | Serpinb1a 10 | Synpr 11 | Cyba 12 | S100a11 13 | Col1a1 14 | Lum 15 | Dcn 16 | Aox3 17 | Bgn 18 | Osr1 19 | Gjb2 20 | Ccl19 21 | Cd74 22 | Slc47a1 23 | Lypd2 24 | Hs3st6 25 | Ctxn3 26 | Nnat 27 | Mc5r 28 | Spp1 29 | Col15a1 30 | Pdgfrb 31 | Cspg4 32 | Kcnj8 33 | Abcc9 34 | Atp13a5 35 | Art3 36 | Pla1a 37 | Ace2 38 | Acta2 39 | Angpt1 40 | Des 41 | Aoc3 42 | Slc38a5 43 | Pdgfb 44 | Nos3 45 | Eltd1 46 | Pecam1 47 | Ctla2a 48 | Emcn 49 | Sema3c 50 | Tek 51 | Cytl1 52 | Bmx 53 | Sema3f 54 | Gja5 55 | Fcgr3 56 | Cd14 57 | Mrc1 58 | Cd163 59 | Cd4 60 | Lyz2 61 | Siglech 62 | Ctss 63 | Cx3cr1 64 | P2ry12 65 | Tmem119 66 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_10/cluster_violin_plot.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(feather) 4 | library(scrattch.vis) 5 | library(scrattch.io) 6 | options(stringsAsFactors = F) 7 | source("layer_and_violin_functions.R") 8 | 9 | #genes <- unique(unlist(read.csv("markers.csv", header=F))) 10 | genes <- unique(unlist(read.table("2018-05-25_markers.txt", header=F))) 11 | 12 | fdir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/" 13 | 14 | p <- group_violin_plot2(data_source = fdir, 15 | genes = genes, 16 | group_by = "dendcluster", 17 | clusters = 118:133, 18 | logscale = TRUE, 19 | labelheight = 10, 20 | max_width = 10, 21 | fontsize = 6, 22 | showcounts = FALSE) 23 | 24 | p 25 | 26 | ggsave("marker_violins.pdf", p, width = 4.1, height = 8.2, useDingbats = F) 27 | 28 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_11/v1_alm_cross_mapping_cor.R: -------------------------------------------------------------------------------- 1 | library(dendextend) 2 | library(dplyr) 3 | library(feather) 4 | library(ggplot2) 5 | options(stringsAsFactors = F) 6 | 7 | values_to_colors <- function(x, minval = NULL, maxval = NULL, colorset = c("darkblue","dodgerblue","gray80","orangered","red")) { 8 | 9 | heat_colors <- colorRampPalette(colorset)(1001) 10 | 11 | if(is.null(maxval)) { 12 | maxval <- max(x) 13 | } 14 | if (is.null(minval)) { 15 | minval <- min(x) 16 | } 17 | 18 | heat_positions <- unlist(round((x - minval) / (maxval - minval) * 1000 + 1, 0)) 19 | 20 | colors <- heat_colors[heat_positions] 21 | 22 | colors 23 | } 24 | 25 | color_sum <- function(col1,col2) { 26 | 27 | rgbmat1 <- col2rgb(col1)/255 28 | rgbmat2 <- col2rgb(col2)/255 29 | 30 | mix <- rgbmat1 + rgbmat2 31 | 32 | rgb(mix[1],mix[2],mix[3]) 33 | 34 | } 35 | 36 | 37 | # Load Zizhen's V1 to ALM comparisons 38 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/V1.ALM.compare.cor.rda") 39 | 40 | # # Rotate the L5a types so that Ucma_2 is closer to L2/3 41 | # rotate_labels <- labels(V1.dend) 42 | # rotate_labels[4:12] <- rotate_labels[c(9:12,4:8)] 43 | # V1.dend <- V1.dend %>% rotate(rotate_labels) 44 | 45 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 46 | cluster_anno <- anno %>% select(cl, cluster_id, cluster_label, cluster_color) %>% unique 47 | 48 | labels(V1.dend) <- cluster_anno$cluster_label[match(labels(V1.dend), cluster_anno$cl)] 49 | labels(ALM.dend) <- cluster_anno$cluster_label[match(labels(ALM.dend), cluster_anno$cl)] 50 | 51 | comb.map$v1_label <- cluster_anno$cluster_label[match(comb.map$V1.cl, cluster_anno$cl)] 52 | comb.map$alm_label <- cluster_anno$cluster_label[match(comb.map$ALM.cl, cluster_anno$cl)] 53 | 54 | v1_ggdend <- as.ggdend(V1.dend) 55 | alm_ggdend <- as.ggdend(ALM.dend) 56 | 57 | # Invert and center dendrograms 58 | # add 0.3 space between them 59 | # 0.1 for leaves 60 | # 0.1 on each side for labels 61 | 62 | # V1: right-to-left 63 | v1_seg <- v1_ggdend$segments 64 | names(v1_seg)[1:4] <- c("y","x","yend","xend") 65 | ymid <- max(v1_seg$y/2) 66 | ymax <- max(v1_seg$y) 67 | v1_seg <- v1_seg %>% 68 | mutate(y = (y - 2*(y - ymid))/ymax, 69 | yend = (yend - 2*(yend - ymid))/ymax, 70 | x = x + 0.15, 71 | xend = xend + 0.15) 72 | 73 | v1_leaves <- v1_ggdend$labels 74 | names(v1_leaves)[1:3] <- c("v1_y","v1_x","v1_label") 75 | v1_leaves <- v1_leaves %>% 76 | mutate(v1_y = (v1_y - 2*(v1_y - ymid))/ymax, 77 | v1_x = v1_x + 0.05) %>% 78 | select(-col,-cex) 79 | 80 | # ALM: left-to-right 81 | alm_seg <- alm_ggdend$segments 82 | names(alm_seg)[1:4] <- c("y","x","yend","xend") 83 | ymid <- max(alm_seg$y/2) 84 | ymax <- max(alm_seg$y) 85 | alm_seg <- alm_seg %>% 86 | mutate(y = (y - 2*(y - ymid))/ymax, 87 | yend = (yend - 2*(yend - ymid))/ymax, 88 | x = -x - 0.15, 89 | xend = -xend - 0.15) 90 | 91 | alm_leaves <- alm_ggdend$labels 92 | names(alm_leaves)[1:3] <- c("alm_y","alm_x","alm_label") 93 | alm_leaves <- alm_leaves %>% 94 | mutate(alm_y = (alm_y - 2*(alm_y - ymid))/ymax, 95 | alm_x = -alm_x - 0.05) %>% 96 | select(-col,-cex) 97 | 98 | # build comparison segments 99 | comb.map <- comb.map %>% 100 | mutate(Prob = as.numeric(Prob)) %>% 101 | left_join(alm_leaves) %>% 102 | left_join(v1_leaves) 103 | 104 | v1_to_alm <- comb.map %>% 105 | filter(type == "V1.ref.ALM") 106 | alm_to_v1 <- comb.map %>% 107 | filter(type == "ALM.ref.V1") 108 | 109 | alm_v1_comp_plot <- ggplot() + 110 | geom_segment(data = v1_seg, 111 | aes(x = x, xend = xend, 112 | y = y, yend = yend), 113 | lineend = "square") + 114 | geom_segment(data = alm_seg, 115 | aes(x = x, xend = xend, 116 | y = y, yend = yend), 117 | lineend = "square") + 118 | geom_curve(data = alm_to_v1, 119 | aes(x = alm_x, xend = v1_x, 120 | y = alm_y, yend = v1_y, 121 | size = Prob), 122 | color = "#212021", 123 | curvature = -0.1, 124 | alpha = 0.5) + #arrow = arrow(length = unit(0.25,"cm"))) + 125 | geom_curve(data = v1_to_alm, 126 | aes(x = v1_x, xend = alm_x, 127 | y = v1_y, yend = alm_y, 128 | size = Prob), 129 | color = "#848EBC", 130 | curvature = -0.1, 131 | alpha = 0.5) +#, arrow = arrow(length = unit(0.25,"cm"))) + 132 | geom_text(data = alm_leaves, 133 | aes(x = alm_x - 0.01, y = alm_y, 134 | label = alm_label), 135 | hjust = 1, 136 | vjust = 0.3) + 137 | geom_text(data = v1_leaves, 138 | aes(x = v1_x + 0.01, y = v1_y, 139 | label = v1_label), 140 | hjust = 0, 141 | vjust = 0.3) + 142 | theme_void() + 143 | scale_size_continuous(range = c(0,3)) 144 | 145 | alm_v1_comp_plot 146 | 147 | ggsave("v1_alm_crossmapping_cor.pdf",alm_v1_comp_plot,width = 12, height = 8) 148 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_12/prune_leaf_custom.R: -------------------------------------------------------------------------------- 1 | prune.dendrogram <- function(dend, leaves, reindex_dend = TRUE, ...) { 2 | leaves <- as.character(leaves) 3 | 4 | for(i in seq_along(leaves)) 5 | { 6 | # this function is probably not the fastest - but it works... 7 | dend <- prune_leaf(dend, leaves[i]) # move step by stem to remove all of these leaves... 8 | } 9 | 10 | if(reindex_dend) dend <- reindex_dend(dend) 11 | 12 | return(dend) 13 | } 14 | 15 | stats_midcache.dendrogram <- function (x, type = "hclust", quiet = FALSE) 16 | { 17 | type <- match.arg(type) 18 | stopifnot(inherits(x, "dendrogram")) 19 | setmid <- function(d, type) { 20 | if (is.leaf(d)) 21 | return(d) 22 | k <- length(d) 23 | if (k < 1) 24 | stop("dendrogram node with non-positive #{branches}") 25 | r <- d 26 | midS <- 0 27 | for (j in 1L:k) { 28 | r[[j]] <- unclass(setmid(d[[j]], type)) 29 | midS <- midS + .midDend(r[[j]]) 30 | } 31 | if (!quiet && type == "hclust" && k != 2) 32 | warning("midcache() of non-binary dendrograms only partly implemented") 33 | attr(r, "midpoint") <- (.memberDend(d[[1L]]) + midS)/2 34 | r 35 | } 36 | setmid(x, type = type) 37 | } 38 | 39 | stats_.midDend <- function (x) { 40 | if (is.null(mp <- attr(x, "midpoint"))) 0 else mp 41 | } 42 | .midDend <- stats_.midDend # copied so that they would work inside the various functions here... 43 | 44 | stats_.memberDend <- function (x) 45 | { 46 | r <- attr(x, "x.member") 47 | if (is.null(r)) { 48 | r <- attr(x, "members") 49 | if (is.null(r)) 50 | r <- 1L 51 | } 52 | r 53 | } 54 | .memberDend <- stats_.memberDend 55 | 56 | 57 | prune_leaf <- function(dend, leaf_name,...) 58 | { 59 | labels_dend <- labels(dend) 60 | 61 | if(length(labels_dend) != length(unique(labels_dend))) warning("Found dubplicate labels in the tree (this might indicate a problem in the tree you supplied)") 62 | 63 | if(!(leaf_name %in% labels_dend)) { # what to do if there is no such leaf inside the tree 64 | warning(paste("There is no leaf with the label", leaf_name , "in the tree you supplied", "\n" , "Returning original tree", "\n" )) 65 | return(dend) 66 | } 67 | 68 | if(sum(labels_dend %in% leaf_name) > 1) { # what to do if there is no such leaf inside the tree 69 | warning(paste("There are multiple leaves by the name of '", leaf_name , "' in the tree you supplied. Their locations is:", 70 | paste(which(labels_dend %in% leaf_name), collapse = ","),"\n" , "Returning original tree", "\n" )) 71 | return(dend) 72 | } 73 | 74 | is.father.of.leaf.to.remove <- function(dend, leaf_name) 75 | { 76 | # this function checks if the leaf we wish to remove is the direct child of the current branch (dend) we entered the function 77 | is.father <- FALSE 78 | for(i in seq_len(length(dend))) 79 | { 80 | if(is.leaf(dend[[i]]) == TRUE && labels(dend[[i]]) == leaf_name) is.father <- TRUE 81 | } 82 | return(is.father) 83 | } 84 | 85 | 86 | remove_leaf_if_child <- function(dend, leaf_name) 87 | { 88 | # print(labels(dend)) 89 | if(all(labels(dend) != leaf_name)) 90 | { # if the leaf we want to remove is not in this branch, simply return the branch without going deeper intoit. 91 | return(dend) 92 | } else { # but if the leaf we want to remove is here somewhere, go on searching 93 | attr(dend, "members") <- attr(dend, "members") - 1 94 | 95 | if(!is.father.of.leaf.to.remove(dend, leaf_name)) # if you are not the father, then go on and make this function work on each child 96 | { 97 | for(i in seq_len(length(dend))) 98 | { 99 | dend[[i]] <- remove_leaf_if_child(dend[[i]], leaf_name) 100 | } 101 | } else { # we'll merge 102 | if(length(dend) == 2) { 103 | leaf_location <- 1 104 | # if leaf location is 1, then move branch in leaf 2 to be the new x 105 | if(is.leaf(dend[[leaf_location]]) == T && labels(dend[[leaf_location]]) == leaf_name) { 106 | 107 | branch_to_bumpup <- 2 108 | dend <- dend[[branch_to_bumpup]] 109 | } else { # else - the leaf location must be located in position "2" 110 | 111 | branch_to_bumpup <- 1 112 | dend <- dend[[branch_to_bumpup]] 113 | } 114 | } else if(length(dend) > 2) { 115 | # If more than 2 branches, check if any are leaves 116 | dend_leaves <- unlist(lapply(dend, is.leaf)) 117 | if(sum(dend_leaves) > 0) { 118 | # If so, check for matching labels to the leaf to prune 119 | dend_labels <- unlist(lapply(dend, function(x) attr(x, "label"))) 120 | dend_matches <- dend_labels == leaf_name 121 | # Return a list containing the non-matching branches 122 | dend[dend_leaves & dend_matches] <- NULL 123 | # Note that in some cases, the following DOES NOT yield a correct result: 124 | # dend <- dend[!(dend_leaves & dend_matches)] 125 | 126 | # If the length is now 1, it can be bumped up 127 | if(length(dend) == 1) { 128 | dend <- dend[[1]] 129 | } 130 | 131 | } 132 | } 133 | } 134 | } 135 | return(dend) 136 | } 137 | 138 | 139 | new_dend <- remove_leaf_if_child(dend, leaf_name) 140 | new_dend <- suppressWarnings(stats_midcache.dendrogram(new_dend)) # fixes the attributes 141 | # new_x <- fix_members_attr.dendrogram(new_x) # fix the number of memebers attr for each node 142 | return(new_dend) 143 | } -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_15/huang_comparison_figure.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(feather) 4 | options(stringsAsFactors = F) 5 | source("sankey_functions.R") 6 | 7 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/external/mouse_GABA_Paul_2016_20171002/anno.feather") 8 | 9 | # rearrange the target clusters based on which query cluster has the highest freq 10 | new_order <- anno %>% 11 | group_by(cluster_id, cell_class_id) %>% 12 | summarise(Freq = n()) %>% 13 | ungroup() %>% 14 | group_by(cluster_id) %>% 15 | arrange(-Freq) %>% 16 | filter(row_number() == 1) %>% 17 | ungroup() %>% 18 | arrange(cell_class_id, cluster_id) %>% 19 | mutate(new_order = 1:n()) %>% 20 | select(cluster_id, new_order) 21 | 22 | huang_to_new <- left_join(anno, new_order) %>% 23 | mutate(cluster_id = new_order) 24 | 25 | # make_plot_nodes converts group_nodes to rectangles for plotting with geom_rect() 26 | h2n_nodes <- make_plot_nodes(make_group_nodes(huang_to_new, c("cell_class","cluster"))) 27 | 28 | h2n_new_nodes <- h2n_nodes %>% 29 | filter(group == "cluster") %>% 30 | mutate(name = sub("^[0-9]+ ","",name), 31 | name = paste0(name, " (n = ",n,")")) 32 | h2n_huang_nodes <- h2n_nodes %>% 33 | filter(group == "cell_class") %>% 34 | mutate(name = paste0(name, " (n = ",n,")")) 35 | 36 | huang_to_new_river <- build_river_plot(huang_to_new, 37 | c("cell_class","cluster"), 38 | fill_group = "cell_class") + 39 | geom_text(data = h2n_new_nodes, 40 | aes(x = xmax + 0.01, 41 | y = (ymin + ymax)/2, 42 | label = name, 43 | color = color), 44 | hjust = 0) + 45 | geom_text(data = h2n_huang_nodes, 46 | aes(x = xmin - 0.01, 47 | y = (ymin + ymax)/2, 48 | label = name, 49 | color = color), 50 | hjust = 1) + 51 | scale_color_identity() 52 | 53 | huang_to_new_river 54 | 55 | ggsave("huang_to_new_river.pdf", huang_to_new_river, height = 10, width = 6) 56 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_15/huang_dendrogram.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(dendextend) 4 | options(stringsAsFactors = F) 5 | 6 | source("prune_leaf_custom.R") 7 | 8 | dend <- readRDS("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/dend.RData") 9 | 10 | dend <- dend %>% 11 | prune(labels(dend)[c(1:55,116:133)]) 12 | 13 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/external/mouse_GABA_Paul_2016_20171002/anno.feather") 14 | 15 | node_n <- anno %>% 16 | group_by(cluster_label) %>% 17 | summarize(n_cells = n()) 18 | 19 | nodes <- as.ggdend(dend)$nodes %>% 20 | mutate(cluster_label = get_nodes_attr(dend,"label")) %>% 21 | left_join(node_n) 22 | 23 | plot_nodes <- nodes %>% 24 | filter(!is.na(n_cells)) 25 | 26 | segments <- as.ggdend(dend)$segments 27 | 28 | node_labels <- plot_nodes %>% 29 | filter(y > 0) 30 | 31 | leaf_labels <- nodes %>% 32 | filter(y == 0) %>% 33 | mutate(col = ifelse(is.na(n_cells),"#808080",col)) 34 | 35 | dendrogram_plot <- ggplot() + 36 | geom_segment(data = segments, 37 | aes(x = x, xend = xend, 38 | y = y, yend = yend)) + 39 | geom_point(data = plot_nodes, 40 | aes(x = x, 41 | y = y, 42 | size = n_cells), 43 | pch = 21, 44 | fill = "#00AEEF", 45 | color = "#2E3192", 46 | alpha = 0.7) + 47 | geom_text(data = node_labels, 48 | aes(x = x, y = y, 49 | label = cluster_label), 50 | vjust = 1) + 51 | geom_text(data = leaf_labels, 52 | aes(x = x, y = y - 0.01, 53 | label = cluster_label, 54 | color = col), 55 | angle = 90, 56 | hjust = 1, 57 | vjust = 0.3) + 58 | scale_fill_identity() + 59 | scale_color_identity() + 60 | scale_size_area(breaks = c(10,25,50,100)) + 61 | scale_y_continuous(limits = c(-0.1,1)) + 62 | theme_void() 63 | 64 | dendrogram_plot 65 | 66 | ggsave("huang_dendrogram_plot.pdf",dendrogram_plot, width = 8, height = 6) 67 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_15/linnarsson_comparison.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(feather) 3 | library(ggplot2) 4 | options(stringsAsFactors = F) 5 | 6 | source("sankey_functions.R") 7 | 8 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process/WGCNA_result_anterograde/map.linnarsson.ss.df.rda") 9 | 10 | ss.map.df <- ss.map.df %>% 11 | mutate(sample_id = rownames(.)) %>% 12 | rename(ll_label = cl) 13 | 14 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20170913/anno.feather") 15 | 16 | cluster_anno <- anno %>% 17 | select(cl, cluster_id, cluster_label, cluster_color) %>% 18 | unique() %>% 19 | mutate(pred.cl = as.character(cl)) 20 | 21 | 22 | 23 | ll_anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/external/mouse_SS_CA1_Zeisel_2015_20170620/anno.feather") 24 | ll_cluster_anno <- ll_anno %>% 25 | select(cluster_id, cluster_label, cluster_color) %>% 26 | unique() 27 | names(ll_cluster_anno) <- c("ll_id","ll_label","ll_color") 28 | 29 | 30 | map_anno <- ss.map.df %>% 31 | left_join(cluster_anno) %>% 32 | left_join(ll_cluster_anno) %>% 33 | filter(complete.cases(.)) 34 | 35 | # rearrange the target clusters based on which query cluster has the highest freq 36 | new_order <- map_anno %>% 37 | group_by(cluster_id, cluster_label, ll_id) %>% 38 | summarise(Freq = n()) %>% 39 | ungroup() %>% 40 | group_by(cluster_id) %>% 41 | arrange(-Freq) %>% 42 | filter(row_number() == 1) %>% 43 | ungroup() %>% 44 | arrange(cluster_id,ll_id) %>% 45 | mutate(new_order = 1:n()) %>% 46 | select(cluster_id, cluster_label, Freq, new_order) 47 | 48 | linnarsson_to_new <- left_join(map_anno, new_order) %>% 49 | mutate(cluster_id = new_order) %>% 50 | filter(!coarse_cl %in% c("interneurons","pyramidal SS"), 51 | ll_label != "(none)") 52 | 53 | # make_plot_nodes converts group_nodes to rectangles for plotting with geom_rect() 54 | l2n_nodes <- make_plot_nodes(make_group_nodes(linnarsson_to_new, c("ll","cluster")), 55 | pad = 0.2) 56 | 57 | l2n_new_nodes <- l2n_nodes %>% 58 | filter(group == "cluster") %>% 59 | mutate(name = sub("^[0-9]+ ","",name), 60 | name = paste0(name, " (n = ",n,")")) 61 | l2n_linnarsson_nodes <- l2n_nodes %>% 62 | filter(group == "ll") %>% 63 | mutate(name = paste0(name, " (n = ",n,")")) 64 | 65 | linnarsson_to_new_river <- build_river_plot(linnarsson_to_new, 66 | c("ll","cluster"), 67 | fill_group = "ll", 68 | pad = 0.2) + 69 | geom_text(data = l2n_new_nodes, 70 | aes(x = xmax + 0.01, 71 | y = (ymin + ymax)/2, 72 | label = name, 73 | color = color), 74 | hjust = 0, 75 | size = 2) + 76 | geom_text(data = l2n_linnarsson_nodes, 77 | aes(x = xmin - 0.01, 78 | y = (ymin + ymax)/2, 79 | label = name, 80 | color = "black"), 81 | hjust = 1, 82 | size = 2) + 83 | scale_color_identity() 84 | 85 | linnarsson_to_new_river 86 | 87 | ggsave("l2n_glia.pdf",width = 4, height = 12) 88 | 89 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_15/linnarsson_stats.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(feather) 3 | library(ggplot2) 4 | options(stringsAsFactors = F) 5 | 6 | source("sankey_functions.R") 7 | 8 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process/WGCNA_result_anterograde/map.linnarsson.ss.df.rda") 9 | 10 | ss.map.df <- ss.map.df %>% 11 | mutate(sample_id = rownames(.)) %>% 12 | rename(ll_label = cl) 13 | 14 | ss.stats <- ss.map.df %>% 15 | group_by(ll_label) %>% 16 | mutate(ll_n = n()) %>% 17 | ungroup() %>% 18 | group_by(pred.cl, pred_cluster_label, ll_label) %>% 19 | summarise(n_cells = n(), 20 | ll_frac = n()/ll_n[1]) %>% 21 | arrange(as.numeric(pred.cl), 22 | -n_cells) %>% 23 | filter(ll_frac > 0.1) %>% 24 | filter(n_cells > 1) 25 | 26 | write.csv(ss.stats,"filtered_linnarsson_mapping.csv", quote = F, row.names = F) 27 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_15/prune_leaf_custom.R: -------------------------------------------------------------------------------- 1 | prune.dendrogram <- function(dend, leaves, reindex_dend = TRUE, ...) { 2 | leaves <- as.character(leaves) 3 | 4 | for(i in seq_along(leaves)) 5 | { 6 | # this function is probably not the fastest - but it works... 7 | dend <- prune_leaf(dend, leaves[i]) # move step by stem to remove all of these leaves... 8 | } 9 | 10 | if(reindex_dend) dend <- reindex_dend(dend) 11 | 12 | return(dend) 13 | } 14 | 15 | stats_midcache.dendrogram <- function (x, type = "hclust", quiet = FALSE) 16 | { 17 | type <- match.arg(type) 18 | stopifnot(inherits(x, "dendrogram")) 19 | setmid <- function(d, type) { 20 | if (is.leaf(d)) 21 | return(d) 22 | k <- length(d) 23 | if (k < 1) 24 | stop("dendrogram node with non-positive #{branches}") 25 | r <- d 26 | midS <- 0 27 | for (j in 1L:k) { 28 | r[[j]] <- unclass(setmid(d[[j]], type)) 29 | midS <- midS + .midDend(r[[j]]) 30 | } 31 | if (!quiet && type == "hclust" && k != 2) 32 | warning("midcache() of non-binary dendrograms only partly implemented") 33 | attr(r, "midpoint") <- (.memberDend(d[[1L]]) + midS)/2 34 | r 35 | } 36 | setmid(x, type = type) 37 | } 38 | 39 | stats_.midDend <- function (x) { 40 | if (is.null(mp <- attr(x, "midpoint"))) 0 else mp 41 | } 42 | .midDend <- stats_.midDend # copied so that they would work inside the various functions here... 43 | 44 | stats_.memberDend <- function (x) 45 | { 46 | r <- attr(x, "x.member") 47 | if (is.null(r)) { 48 | r <- attr(x, "members") 49 | if (is.null(r)) 50 | r <- 1L 51 | } 52 | r 53 | } 54 | .memberDend <- stats_.memberDend 55 | 56 | 57 | prune_leaf <- function(dend, leaf_name,...) 58 | { 59 | labels_dend <- labels(dend) 60 | 61 | if(length(labels_dend) != length(unique(labels_dend))) warning("Found dubplicate labels in the tree (this might indicate a problem in the tree you supplied)") 62 | 63 | if(!(leaf_name %in% labels_dend)) { # what to do if there is no such leaf inside the tree 64 | warning(paste("There is no leaf with the label", leaf_name , "in the tree you supplied", "\n" , "Returning original tree", "\n" )) 65 | return(dend) 66 | } 67 | 68 | if(sum(labels_dend %in% leaf_name) > 1) { # what to do if there is no such leaf inside the tree 69 | warning(paste("There are multiple leaves by the name of '", leaf_name , "' in the tree you supplied. Their locations is:", 70 | paste(which(labels_dend %in% leaf_name), collapse = ","),"\n" , "Returning original tree", "\n" )) 71 | return(dend) 72 | } 73 | 74 | is.father.of.leaf.to.remove <- function(dend, leaf_name) 75 | { 76 | # this function checks if the leaf we wish to remove is the direct child of the current branch (dend) we entered the function 77 | is.father <- FALSE 78 | for(i in seq_len(length(dend))) 79 | { 80 | if(is.leaf(dend[[i]]) == TRUE && labels(dend[[i]]) == leaf_name) is.father <- TRUE 81 | } 82 | return(is.father) 83 | } 84 | 85 | 86 | remove_leaf_if_child <- function(dend, leaf_name) 87 | { 88 | # print(labels(dend)) 89 | if(all(labels(dend) != leaf_name)) 90 | { # if the leaf we want to remove is not in this branch, simply return the branch without going deeper intoit. 91 | return(dend) 92 | } else { # but if the leaf we want to remove is here somewhere, go on searching 93 | attr(dend, "members") <- attr(dend, "members") - 1 94 | 95 | if(!is.father.of.leaf.to.remove(dend, leaf_name)) # if you are not the father, then go on and make this function work on each child 96 | { 97 | for(i in seq_len(length(dend))) 98 | { 99 | dend[[i]] <- remove_leaf_if_child(dend[[i]], leaf_name) 100 | } 101 | } else { # we'll merge 102 | if(length(dend) == 2) { 103 | leaf_location <- 1 104 | # if leaf location is 1, then move branch in leaf 2 to be the new x 105 | if(is.leaf(dend[[leaf_location]]) == T && labels(dend[[leaf_location]]) == leaf_name) { 106 | 107 | branch_to_bumpup <- 2 108 | dend <- dend[[branch_to_bumpup]] 109 | } else { # else - the leaf location must be located in position "2" 110 | 111 | branch_to_bumpup <- 1 112 | dend <- dend[[branch_to_bumpup]] 113 | } 114 | } else if(length(dend) > 2) { 115 | # If more than 2 branches, check if any are leaves 116 | dend_leaves <- unlist(lapply(dend, is.leaf)) 117 | if(sum(dend_leaves) > 0) { 118 | # If so, check for matching labels to the leaf to prune 119 | dend_labels <- unlist(lapply(dend, function(x) attr(x, "label"))) 120 | dend_matches <- dend_labels == leaf_name 121 | # Return a list containing the non-matching branches 122 | dend[dend_leaves & dend_matches] <- NULL 123 | # Note that in some cases, the following DOES NOT yield a correct result: 124 | # dend <- dend[!(dend_leaves & dend_matches)] 125 | 126 | # If the length is now 1, it can be bumped up 127 | if(length(dend) == 1) { 128 | dend <- dend[[1]] 129 | } 130 | 131 | } 132 | } 133 | } 134 | } 135 | return(dend) 136 | } 137 | 138 | 139 | new_dend <- remove_leaf_if_child(dend, leaf_name) 140 | new_dend <- suppressWarnings(stats_midcache.dendrogram(new_dend)) # fixes the attributes 141 | # new_x <- fix_members_attr.dendrogram(new_x) # fix the number of memebers attr for each node 142 | return(new_dend) 143 | } -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_15/tolias_comparison_figure.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(feather) 4 | options(stringsAsFactors = F) 5 | source("sankey_functions.R") 6 | 7 | v1_alm_anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 8 | v1_alm_cl_anno <- v1_alm_anno %>% 9 | select(cl, cluster_id, cluster_label, cluster_color) %>% 10 | unique() %>% 11 | mutate(cl = as.character(cl)) 12 | 13 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/map.tree.df.rda") 14 | map.tree.df <- map.tree.df %>% 15 | mutate(sample_id = rownames(.)) 16 | 17 | map.dat <- map.tree.df %>% 18 | mutate(cluster_label = as.character(cluster_label)) %>% 19 | mutate(cluster_label = ifelse(is.na(cluster_label), as.character(cl), cluster_label)) %>% 20 | mutate(cl = as.character(cl)) 21 | 22 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/patch_seq/tolias_patchseq_20161102/anno.feather") 23 | 24 | # Remove old mapping and add new mapping from map.tree.df 25 | anno <- anno %>% 26 | select(-cluster_id, -cluster_label, -cluster_color) %>% 27 | left_join(map.dat) %>% 28 | rowwise() %>% 29 | mutate(cluster_color = ifelse(cl %in% v1_alm_cl_anno$cl, 30 | v1_alm_cl_anno$cluster_color[v1_alm_cl_anno$cl == cl], 31 | "#808080")) %>% 32 | # %>% 33 | # mutate(cluster_id = ifelse(cl %in% v1_alm_cl_anno$cl, 34 | # v1_alm_cl_anno$cluster_id[v1_alm_cl_anno$cl == cl], 35 | # 0)) %>% 36 | ungroup() 37 | 38 | # fix cluster ids 39 | anno_cluster_ids <- anno %>% 40 | select(cluster_label) %>% 41 | unique() %>% 42 | mutate(cluster_id = 1:n()) 43 | 44 | anno <- anno %>% 45 | left_join(anno_cluster_ids) 46 | 47 | # rearrange the target clusters based on which query cluster has the highest freq 48 | new_order <- anno %>% 49 | group_by(cluster_id, cluster_label, class_id, cluster_color) %>% 50 | summarise(Freq = n()) %>% 51 | ungroup() %>% 52 | group_by(cluster_id) %>% 53 | arrange(-Freq) %>% 54 | filter(row_number() == 1) %>% 55 | ungroup() %>% 56 | arrange(class_id, cluster_id) %>% 57 | mutate(new_order = 1:n()) %>% 58 | select(cluster_id, cluster_label, cluster_color, Freq, new_order) %>% 59 | mutate(manual_order = c(2,1,7,4,3,8,5,9,11,6,10,12,13,14)) 60 | 61 | #edited_new_order <- edit(new_order) 62 | 63 | tolias_to_new <- left_join(anno, new_order) %>% 64 | mutate(cluster_id = manual_order) %>% 65 | # select(-cluster_color) %>% 66 | # left_join(v1_alm_cl_anno) %>% 67 | mutate(cluster_color = ifelse(is.na(cluster_color), "#808080", cluster_color)) 68 | 69 | # make_plot_nodes converts group_nodes to rectangles for plotting with geom_rect() 70 | t2n_nodes <- make_plot_nodes(make_group_nodes(tolias_to_new, c("class","cluster")), 71 | pad = 0.2) 72 | 73 | t2n_new_nodes <- t2n_nodes %>% 74 | filter(group == "cluster") %>% 75 | mutate(name = sub("^[0-9]+ ","",name), 76 | name = paste0(name, " (n = ",n,")")) 77 | t2n_tolias_nodes <- t2n_nodes %>% 78 | filter(group == "class") %>% 79 | mutate(name = paste0(name, " (n = ",n,")")) 80 | 81 | tolias_to_new_river <- build_river_plot(tolias_to_new, 82 | c("class","cluster"), 83 | fill_group = "class", 84 | pad = 0.2) + 85 | geom_text(data = t2n_new_nodes, 86 | aes(x = xmax + 0.01, 87 | y = (ymin + ymax)/2, 88 | label = name, 89 | color = color), 90 | hjust = 0, 91 | size = 2) + 92 | geom_text(data = t2n_tolias_nodes, 93 | aes(x = xmin - 0.01, 94 | y = (ymin + ymax)/2, 95 | label = name, 96 | color = color), 97 | hjust = 1, 98 | size = 2) + 99 | scale_color_identity() 100 | 101 | tolias_to_new_river 102 | 103 | ggsave("tolias_to_new_river.pdf", tolias_to_new_river, height = 3, width = 2) 104 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_15/tolias_dendrogram.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(feather) 3 | library(ggplot2) 4 | library(dendextend) 5 | options(stringsAsFactors = F) 6 | 7 | source("prune_leaf_custom.R") 8 | 9 | v1_alm_anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 10 | 11 | v1_alm_anno <- v1_alm_anno %>% 12 | filter(cluster_id %in% 1:133) 13 | # 14 | # dend <- readRDS("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/dend.RData") 15 | # 16 | # alm_clusters <- unique(v1_alm_anno$cluster_id[grepl("ALM",v1_alm_anno$cluster_label)]) 17 | # #inh_clusters <- 1:60 18 | # #nn_clusters <- 116:133 19 | # 20 | # non_visp_clusters <- c(alm_clusters) 21 | # visp_clusters <- setdiff(1:133, non_visp_clusters) 22 | # 23 | # non_visp_cluster_labels <- unique(v1_alm_anno$cluster_label[v1_alm_anno$cluster_id %in% non_visp_clusters]) 24 | # 25 | # visp_dend <- dend %>% 26 | # prune.dendrogram(non_visp_cluster_labels) 27 | 28 | 29 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/map.tree.df.rda") 30 | 31 | v1_alm_cl_anno <- v1_alm_anno %>% 32 | select(cl, cluster_id, cluster_label, cluster_color) %>% 33 | unique() %>% 34 | mutate(cl = as.character(cl)) 35 | 36 | labels(V1.dend) <- v1_alm_cl_anno$cluster_label[match(labels(V1.dend), v1_alm_cl_anno$cl)] 37 | 38 | node_n <- map.tree.df %>% 39 | mutate(cl = as.character(cl)) %>% 40 | group_by(cl) %>% 41 | summarise(n_cells = n()) %>% 42 | left_join(v1_alm_cl_anno) %>% 43 | mutate(cluster_label = ifelse(is.na(cluster_label), cl, cluster_label)) 44 | 45 | nodes <- as.ggdend(V1.dend)$nodes %>% 46 | mutate(cluster_label = get_nodes_attr(V1.dend,"label")) %>% 47 | left_join(node_n) 48 | 49 | plot_nodes <- nodes %>% 50 | filter(!is.na(n_cells)) 51 | 52 | segments <- as.ggdend(V1.dend)$segments 53 | 54 | node_labels <- plot_nodes %>% 55 | filter(y > 0) 56 | 57 | leaf_labels <- nodes %>% 58 | filter(y == 0) %>% 59 | mutate(col = ifelse(is.na(n_cells),"#808080",col)) %>% 60 | left_join(v1_alm_cl_anno) 61 | 62 | dendrogram_plot <- ggplot() + 63 | geom_segment(data = segments, 64 | aes(x = x, xend = xend, 65 | y = y, yend = yend), 66 | lineend = "square", 67 | size = 0.5) + 68 | geom_point(data = plot_nodes, 69 | aes(x = x, 70 | y = y, 71 | size = n_cells), 72 | pch = 21, 73 | fill = "#00AEEF", 74 | color = "#2E3192", 75 | alpha = 1) + 76 | geom_text(data = node_labels, 77 | aes(x = x, y = y, 78 | label = cl), 79 | vjust = 1, 80 | size = 2*5/6) + 81 | geom_text(data = leaf_labels, 82 | aes(x = x, y = y - 0.01, 83 | label = cluster_label, 84 | color = col), 85 | angle = 90, 86 | hjust = 1, 87 | vjust = 0.3, 88 | size = 2*5/6) + 89 | scale_fill_identity() + 90 | scale_color_identity() + 91 | scale_size_area(max_size = 2) + 92 | scale_y_continuous(limits = c(-0.1,1)) + 93 | theme_void() 94 | 95 | ggsave("tolias_dendrogram_plot.pdf",dendrogram_plot, width = 8.75, height = 5, useDingbats = FALSE) 96 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_16/cluster_violin_plot.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(feather) 4 | library(scrattch.vis) 5 | library(scrattch.io) 6 | options(stringsAsFactors = F) 7 | source("layer_and_violin_functions.R") 8 | 9 | genes <- split_cst("Vipr2, Pvalb, Slc32a1") 10 | 11 | fdir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/" 12 | 13 | p <- group_violin_plot2(data_source = fdir, 14 | genes = genes, 15 | group_by = "dendcluster", 16 | clusters = 1:133, 17 | logscale = FALSE, 18 | labelheight = 2, 19 | max_width = 10, 20 | fontsize = 6, 21 | showcounts = FALSE) 22 | 23 | ggsave("marker_violins.pdf", width = 10, height = 1, useDingbats = F) 24 | 25 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_16/coexpression_bar_plots.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(feather) 4 | library(reshape2) 5 | 6 | setwd("C:/Users/thucn/Dropbox/AIBS/Transcriptomics/Manuscripts/V1_alm/Figures/Figure_3_Vipr2/scripts") 7 | source("coexpression_functions.R") 8 | 9 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180312/anno.feather") 10 | data <- feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180312/data.feather") 11 | 12 | ## VISp 13 | anno <- anno %>% 14 | group_by(dendcluster_id) %>% 15 | # Filter annotations for only clusters with > 10% VISp cells 16 | mutate(group_visp_fraction = sum(region_label == "VISp")/n()) %>% 17 | filter(group_visp_fraction > 0.05) %>% 18 | # Only keep cells from those clusters that are from VISp 19 | filter(region_label == "VISp") %>% 20 | ungroup() 21 | 22 | # colors and ordering for different combinations 23 | frac_anno <- data.frame(frac_label = c("frac_1","frac_2","frac_3","frac_12","frac_23","frac_13","frac_123"), 24 | frac_id = 1:7, 25 | frac_color = c("#eb008b", "#006838", "cyan", "black", "magenta", "gray", "blue")) 26 | 27 | keep_frac <- c("frac_1", "frac_12", "frac_2","frac_3","frac_12","frac_23","frac_13","frac_123") 28 | keep_frac <- c("frac_12", "frac_3","frac_12","frac_23","frac_13","frac_123") 29 | 30 | coexpression_barplot(condition_1 = c("Vipr2"), 31 | condition_2 = c("Pvalb"), 32 | condition_3 = NULL, 33 | frac_anno = frac_anno, 34 | fpkm_cutoff = 1, 35 | group_by = "cluster", 36 | groups = c(1:125), 37 | anno = anno, 38 | keep_frac = keep_frac, 39 | data = data) 40 | 41 | ggsave("coexpression_barplot_Crh_Sst.pdf", width = 8, height = 5, dpi = 120, useDingbats = FALSE) 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_16/coexpression_functions.R: -------------------------------------------------------------------------------- 1 | condition_to_summarise_call <- function(pos_condition, neg_condition, fpkm_cutoff) { 2 | call <- paste0("sum(") 3 | pos_condition_comparisons <- paste(paste0(pos_condition," > ",fpkm_cutoff), collapse = " & ") 4 | if(!is.null(neg_condition)) { 5 | neg_condition_comparisons <- paste(paste0(neg_condition," > ",fpkm_cutoff), collapse = " & ") 6 | call <- paste0(call, pos_condition_comparisons, " & !(", neg_condition_comparisons,")") 7 | } else { 8 | call <- paste0(call, pos_condition_comparisons) 9 | } 10 | call <- paste0(call,")/n()") 11 | call 12 | } 13 | 14 | 15 | coexpression_barplot <- function(condition_1, condition_2, condition_3, 16 | frac_anno, fpkm_cutoff = 1, group_by = "dendcluster", 17 | groups = 1:128, 18 | anno, keep_frac = frac_anno$frac_label, data) { 19 | 20 | 21 | group_id <- paste0(group_by,"_id") 22 | group_label <- paste0(group_by,"_label") 23 | 24 | group_filter <- paste0(group_id," %in% c(",paste0(groups,collapse = ","),")") 25 | 26 | anno <- anno %>% 27 | filter_(group_filter) 28 | 29 | # recompute filtered positions 30 | group_order <- data.frame(group = groups) %>% 31 | mutate(group_order = 1:n()) 32 | names(group_order)[1] <- group_id 33 | 34 | group_anno <- anno %>% 35 | ungroup() %>% 36 | select(one_of(paste0(group_by,c("_id","_label","_color")))) %>% 37 | unique() %>% 38 | left_join(group_order) %>% 39 | arrange(group_order) %>% 40 | mutate(group_xpos = 1:n()) 41 | 42 | anno <- left_join(anno,group_anno) 43 | 44 | # pull data for all genes used in conditions 45 | 46 | cond_data <- data[,c("sample_id",condition_1,condition_2,condition_3)] 47 | 48 | # join gene values to the annotations 49 | anno_data <- anno %>% 50 | left_join(cond_data, by = "sample_id") 51 | 52 | # for each group, calculate the fraction of cells that match the conditions 53 | 54 | if(!is.null(condition_3)) { 55 | frac_data <- anno_data %>% 56 | group_by(group_xpos) %>% 57 | summarise_(frac_1 = condition_to_summarise_call(condition_1, c(condition_2, condition_3), fpkm_cutoff), 58 | frac_2 = condition_to_summarise_call(condition_2, c(condition_1, condition_3), fpkm_cutoff), 59 | frac_3 = condition_to_summarise_call(condition_3, c(condition_1, condition_2), fpkm_cutoff), 60 | frac_12 = condition_to_summarise_call(c(condition_1, condition_2), condition_3, fpkm_cutoff), 61 | frac_23 = condition_to_summarise_call(c(condition_2, condition_3), condition_1, fpkm_cutoff), 62 | frac_13 = condition_to_summarise_call(c(condition_1, condition_3), condition_2, fpkm_cutoff), 63 | frac_123 = condition_to_summarise_call(c(condition_1, condition_2, condition_3), NULL, fpkm_cutoff)) 64 | } else { 65 | frac_data <- anno_data %>% 66 | group_by(group_xpos) %>% 67 | summarise_(frac_1 = condition_to_summarise_call(condition_1, condition_2, fpkm_cutoff), 68 | frac_2 = condition_to_summarise_call(condition_2, condition_1, fpkm_cutoff), 69 | frac_12 = condition_to_summarise_call(c(condition_1, condition_2), NULL, fpkm_cutoff)) 70 | 71 | frac_anno <- frac_anno %>% 72 | filter(!grepl("3",frac_label)) 73 | } 74 | 75 | frac_anno <- frac_anno %>% 76 | filter(grepl(paste(paste0(keep_frac, "$"), collapse="|"), frac_label)) 77 | 78 | plot_data <- melt(frac_data, "group_xpos") %>% 79 | filter(value > 0) %>% 80 | rename_("frac_label" = "variable") %>% 81 | left_join(frac_anno) %>% 82 | arrange(group_xpos, frac_id) %>% 83 | group_by(group_xpos) %>% 84 | mutate(xmin = group_xpos - 0.4, 85 | xmax = group_xpos + 0.4) %>% 86 | mutate(csum = cumsum(value), 87 | ymin = lag(csum, default = 0), 88 | ymax = csum) 89 | 90 | ggplot() + 91 | geom_rect(data = plot_data, 92 | aes(xmin = xmin, xmax = xmax, 93 | ymin = ymin, ymax = ymax, 94 | fill = frac_color)) + 95 | scale_fill_identity(breaks = frac_anno$frac_color, 96 | labels = frac_anno$frac_label, 97 | guide = "legend") + 98 | scale_x_continuous(breaks = group_anno$group_xpos, 99 | labels = group_anno[[group_label]], 100 | limits = c(0.5, max(group_anno$group_xpos) + 0.5)) + 101 | scale_y_continuous("Fraction of Cells in Cluster", expand = c(0,0)) + 102 | theme_classic() + 103 | theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3, colour=group_anno$cluster_color)) 104 | } 105 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_16/marker_violins.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(scrattch) 3 | options(stringsAsFactors = F) 4 | 5 | fdir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20170405/" 6 | 7 | vipr2_genes <- c("Vipr2") 8 | vipr2_markers <- group_violin_plot(data_source = fdir, 9 | group_by = "dendcluster", 10 | clusters = 1:112, 11 | logscale = FALSE, 12 | genes = vipr2_genes, 13 | labelheight = 50, 14 | showcounts = FALSE, 15 | fontsize = 10) 16 | 17 | vipr2_markers 18 | 19 | ggsave("vipr2_marker_violins.pdf", vipr2_markers, width = 16, height = 2, useDingbats = F) 20 | 21 | vipr2_boxplot <- group_box_plot(data_source = fdir, 22 | group_by = "dendcluster", 23 | clusters = 1:112, 24 | logscale = FALSE, 25 | genes = vipr2_genes, 26 | labelheight = 50, 27 | showcounts = FALSE, 28 | fontsize = 12) 29 | 30 | vipr2_boxplot 31 | 32 | ggsave("vipr2_boxplot.pdf", vipr2_boxplot, width = 16, height = 2, useDingbats = F) 33 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_16/river_plots.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(feather) 4 | options(stringsAsFactors = F) 5 | source("sankey_functions.R") 6 | 7 | 8 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20170405/anno.feather") 9 | 10 | anno <- anno %>% 11 | filter(genotype_label == "Vipr2-IRES2-Cre/wt;Ai14(RCL-tdT)/wt") 12 | 13 | # make_plot_nodes converts group_nodes to rectangles for plotting with geom_rect() 14 | nodes <- make_plot_nodes(make_group_nodes(anno, c("genotype","cluster"))) 15 | 16 | left_nodes <- nodes %>% 17 | filter(group == "genotype") %>% 18 | mutate(name = paste0(name, " (n = ",n,")")) 19 | right_nodes <- nodes %>% 20 | filter(group == "cluster") %>% 21 | mutate(name = paste0(name, " (n = ",n,")")) 22 | 23 | 24 | river <- build_river_plot(anno, 25 | c("genotype","cluster"), 26 | fill_group = "cluster") + 27 | geom_text(data = right_nodes, 28 | aes(x = xmax + 0.01, 29 | y = (ymin + ymax)/2, 30 | label = name, 31 | color = color), 32 | hjust = 0) + 33 | geom_text(data = left_nodes, 34 | aes(x = xmin - 0.01, 35 | y = (ymin + ymax)/2, 36 | label = name, 37 | color = color), 38 | hjust = 1) + 39 | scale_color_identity() 40 | 41 | river 42 | 43 | ggsave("vipr2_ai14_river.pdf", river, height = 5, width = 5) 44 | 45 | ## Simulations for placeholders 46 | 47 | anno <- anno %>% 48 | filter(cluster_id %in% c(1,29:47)) 49 | 50 | nodes <- make_plot_nodes(make_group_nodes(anno, c("genotype","cluster"))) 51 | 52 | left_nodes <- nodes %>% 53 | filter(group == "genotype") %>% 54 | mutate(name = paste0(name, " (n = ",n,")")) 55 | right_nodes <- nodes %>% 56 | filter(group == "cluster") %>% 57 | mutate(name = paste0(name, " (n = ",n,")")) 58 | 59 | 60 | river <- build_river_plot(anno, 61 | c("genotype","cluster"), 62 | fill_group = "cluster") + 63 | geom_text(data = right_nodes, 64 | aes(x = xmax + 0.01, 65 | y = (ymin + ymax)/2, 66 | label = name, 67 | color = color), 68 | hjust = 0) + 69 | geom_text(data = left_nodes, 70 | aes(x = xmin - 0.01, 71 | y = (ymin + ymax)/2, 72 | label = name, 73 | color = color), 74 | hjust = 1) + 75 | scale_color_identity() 76 | 77 | river 78 | 79 | ggsave("SIMULATION_vipr2_slc32a1_river.pdf", river, height = 5, width = 5) 80 | 81 | 82 | anno <- anno %>% 83 | filter(cluster_id %in% c(47)) 84 | 85 | nodes <- make_plot_nodes(make_group_nodes(anno, c("genotype","cluster"))) 86 | 87 | left_nodes <- nodes %>% 88 | filter(group == "genotype") %>% 89 | mutate(name = paste0(name, " (n = ",n,")")) 90 | right_nodes <- nodes %>% 91 | filter(group == "cluster") %>% 92 | mutate(name = paste0(name, " (n = ",n,")")) 93 | 94 | 95 | river <- build_river_plot(anno, 96 | c("genotype","cluster"), 97 | fill_group = "cluster") + 98 | geom_text(data = right_nodes, 99 | aes(x = xmax + 0.01, 100 | y = (ymin + ymax)/2, 101 | label = name, 102 | color = color), 103 | hjust = 0) + 104 | geom_text(data = left_nodes, 105 | aes(x = xmin - 0.01, 106 | y = (ymin + ymax)/2, 107 | label = name, 108 | color = color), 109 | hjust = 1) + 110 | scale_color_identity() 111 | 112 | river 113 | 114 | ggsave("SIMULATION_vipr2_pvalb_river.pdf", river, height = 5, width = 5) 115 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_18/cluster_violin_plot.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(feather) 4 | library(scrattch) 5 | options(stringsAsFactors = F) 6 | 7 | genes <- split_cst("Rspo1, Scnn1a, Hsd11b1, Chrna6, Slc17a8, Batf3, Colq, Fam84b, Osr1, Foxp2, Slc17a7") 8 | 9 | p <- group_violin_plot(data_source = "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20170913/", 10 | genes = genes, 11 | group_by = "dendcluster", 12 | clusters = 1:128, 13 | labelheight = 10) 14 | 15 | ggsave("marker_violins.pdf", width = 10, height = 3, useDingbats = F) 16 | 17 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_18/coexpression_functions.R: -------------------------------------------------------------------------------- 1 | condition_to_summarise_call <- function(pos_condition, neg_condition, fpkm_cutoff) { 2 | call <- paste0("sum(") 3 | pos_condition_comparisons <- paste(paste0(pos_condition," > ",fpkm_cutoff), collapse = " & ") 4 | if(!is.null(neg_condition)) { 5 | neg_condition_comparisons <- paste(paste0(neg_condition," > ",fpkm_cutoff), collapse = " & ") 6 | call <- paste0(call, pos_condition_comparisons, " & !(", neg_condition_comparisons,")") 7 | } else { 8 | call <- paste0(call, pos_condition_comparisons) 9 | } 10 | call <- paste0(call,")/n()") 11 | call 12 | } 13 | 14 | 15 | coexpression_barplot <- function(condition_1, condition_2, condition_3, 16 | frac_anno, fpkm_cutoff = 1, group_by = "dendcluster", 17 | groups = 1:128, 18 | anno, keep_frac = frac_anno$frac_label, data) { 19 | 20 | 21 | group_id <- paste0(group_by,"_id") 22 | group_label <- paste0(group_by,"_label") 23 | 24 | group_filter <- paste0(group_id," %in% c(",paste0(groups,collapse = ","),")") 25 | 26 | anno <- anno %>% 27 | filter_(group_filter) 28 | 29 | # recompute filtered positions 30 | group_order <- data.frame(group = groups) %>% 31 | mutate(group_order = 1:n()) 32 | names(group_order)[1] <- group_id 33 | 34 | group_anno <- anno %>% 35 | ungroup() %>% 36 | select(one_of(paste0(group_by,c("_id","_label","_color")))) %>% 37 | unique() %>% 38 | left_join(group_order) %>% 39 | arrange(group_order) %>% 40 | mutate(group_xpos = 1:n()) 41 | 42 | anno <- left_join(anno,group_anno) 43 | 44 | # pull data for all genes used in conditions 45 | 46 | cond_data <- data[,c("sample_id",condition_1,condition_2,condition_3)] 47 | 48 | # join gene values to the annotations 49 | anno_data <- anno %>% 50 | left_join(cond_data, by = "sample_id") 51 | 52 | # for each group, calculate the fraction of cells that match the conditions 53 | 54 | if(!is.null(condition_3)) { 55 | frac_data <- anno_data %>% 56 | group_by(group_xpos) %>% 57 | summarise_(frac_1 = condition_to_summarise_call(condition_1, c(condition_2, condition_3), fpkm_cutoff), 58 | frac_2 = condition_to_summarise_call(condition_2, c(condition_1, condition_3), fpkm_cutoff), 59 | frac_3 = condition_to_summarise_call(condition_3, c(condition_1, condition_2), fpkm_cutoff), 60 | frac_12 = condition_to_summarise_call(c(condition_1, condition_2), condition_3, fpkm_cutoff), 61 | frac_23 = condition_to_summarise_call(c(condition_2, condition_3), condition_1, fpkm_cutoff), 62 | frac_13 = condition_to_summarise_call(c(condition_1, condition_3), condition_2, fpkm_cutoff), 63 | frac_123 = condition_to_summarise_call(c(condition_1, condition_2, condition_3), NULL, fpkm_cutoff)) 64 | } else { 65 | frac_data <- anno_data %>% 66 | group_by(group_xpos) %>% 67 | summarise_(frac_1 = condition_to_summarise_call(condition_1, condition_2, fpkm_cutoff), 68 | frac_2 = condition_to_summarise_call(condition_2, condition_1, fpkm_cutoff), 69 | frac_12 = condition_to_summarise_call(c(condition_1, condition_2), NULL, fpkm_cutoff)) 70 | 71 | frac_anno <- frac_anno %>% 72 | filter(!grepl("3",frac_label)) 73 | } 74 | 75 | frac_anno <- frac_anno %>% 76 | filter(grepl(paste(paste0(keep_frac, "$"), collapse="|"), frac_label)) 77 | 78 | plot_data <- melt(frac_data, "group_xpos") %>% 79 | filter(value > 0) %>% 80 | rename_("frac_label" = "variable") %>% 81 | left_join(frac_anno) %>% 82 | arrange(group_xpos, frac_id) %>% 83 | group_by(group_xpos) %>% 84 | mutate(xmin = group_xpos - 0.4, 85 | xmax = group_xpos + 0.4) %>% 86 | mutate(csum = cumsum(value), 87 | ymin = lag(csum, default = 0), 88 | ymax = csum) 89 | 90 | ggplot() + 91 | geom_rect(data = plot_data, 92 | aes(xmin = xmin, xmax = xmax, 93 | ymin = ymin, ymax = ymax, 94 | fill = frac_color)) + 95 | scale_fill_identity(breaks = frac_anno$frac_color, 96 | labels = frac_anno$frac_label, 97 | guide = "legend") + 98 | scale_x_continuous(breaks = group_anno$group_xpos, 99 | labels = group_anno[[group_label]], 100 | limits = c(0.5, max(group_anno$group_xpos) + 0.5)) + 101 | scale_y_continuous("Fraction of Cells in Cluster", expand = c(0,0)) + 102 | theme_classic() + 103 | theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3, colour=group_anno$cluster_color)) 104 | } 105 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_18/color_functions.R: -------------------------------------------------------------------------------- 1 | hsv_palette <- function(n_colors, 2 | hue_start = 0, 3 | hue_end = max(1, n-1)/n, 4 | sat_start = 0.55, 5 | sat_end = 1, 6 | sat_steps = 4, 7 | val_start = 1, 8 | val_end = 0.8, 9 | val_steps = 3) { 10 | sats <- rep_len(seq(sat_start, sat_end, length.out = sat_steps),length.out = n_colors) 11 | vals <- rep_len(seq(val_start, val_end, length.out = val_steps),length.out = n_colors) 12 | if(hue_end < hue_start) { 13 | rev(sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_end, end = hue_start))) 14 | } else { 15 | sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_start, end = hue_end)) 16 | } 17 | } 18 | 19 | color_mean <- function(x) { 20 | library(grDevices) 21 | 22 | rgb_x <- col2rgb(x) 23 | rgb_mean <- rowMeans(rgb_x) 24 | new_hex <- rgb(rgb_mean["red"]/255, 25 | rgb_mean["green"]/255, 26 | rgb_mean["blue"]/255) 27 | 28 | new_hex 29 | 30 | } 31 | 32 | check_s <- function(x, min_sat = 0) { 33 | library(grDevices) 34 | 35 | hsv_x <- rgb2hsv(col2rgb(x)) 36 | 37 | hsv_x["s",] < min_sat 38 | 39 | } 40 | 41 | check_v <- function(x, min_val = 0) { 42 | library(grDevices) 43 | 44 | hsv_x <- rgb2hsv(col2rgb(x)) 45 | 46 | hsv_x["v",] < min_val 47 | 48 | } 49 | 50 | adjust_s <- function(x, shift_sat = 0.3) { 51 | library(grDevices) 52 | 53 | hsv_x <- rgb2hsv(col2rgb(x)) 54 | 55 | hsv_x["s", ] <- hsv_x["s", ] + shift_sat 56 | if(hsv_x["s", ] > 1) { 57 | hsv_x["s", ] <- 1 58 | } else if (hsv_x["s",] <- 0) { 59 | hsv_x["s", ] <- 0 60 | } 61 | 62 | hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,]) 63 | } 64 | 65 | adjust_v <- function(x, shift_val = 0.3) { 66 | library(grDevices) 67 | 68 | hsv_x <- rgb2hsv(col2rgb(x)) 69 | 70 | hsv_x["v", ] <- hsv_x["v", ] + shift_val 71 | if(hsv_x["v", ] > 1) { 72 | hsv_x["v", ] <- 1 73 | } else if (hsv_x["v",] <- 0) { 74 | hsv_x["v", ] <- 0 75 | } 76 | 77 | hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,]) 78 | } 79 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_18/prune_leaf_custom.R: -------------------------------------------------------------------------------- 1 | prune.dendrogram <- function(dend, leaves, reindex_dend = TRUE, ...) { 2 | leaves <- as.character(leaves) 3 | 4 | for(i in seq_along(leaves)) 5 | { 6 | # this function is probably not the fastest - but it works... 7 | dend <- prune_leaf(dend, leaves[i]) # move step by stem to remove all of these leaves... 8 | } 9 | 10 | if(reindex_dend) dend <- reindex_dend(dend) 11 | 12 | return(dend) 13 | } 14 | 15 | stats_midcache.dendrogram <- function (x, type = "hclust", quiet = FALSE) 16 | { 17 | type <- match.arg(type) 18 | stopifnot(inherits(x, "dendrogram")) 19 | setmid <- function(d, type) { 20 | if (is.leaf(d)) 21 | return(d) 22 | k <- length(d) 23 | if (k < 1) 24 | stop("dendrogram node with non-positive #{branches}") 25 | r <- d 26 | midS <- 0 27 | for (j in 1L:k) { 28 | r[[j]] <- unclass(setmid(d[[j]], type)) 29 | midS <- midS + .midDend(r[[j]]) 30 | } 31 | if (!quiet && type == "hclust" && k != 2) 32 | warning("midcache() of non-binary dendrograms only partly implemented") 33 | attr(r, "midpoint") <- (.memberDend(d[[1L]]) + midS)/2 34 | r 35 | } 36 | setmid(x, type = type) 37 | } 38 | 39 | stats_.midDend <- function (x) { 40 | if (is.null(mp <- attr(x, "midpoint"))) 0 else mp 41 | } 42 | .midDend <- stats_.midDend # copied so that they would work inside the various functions here... 43 | 44 | stats_.memberDend <- function (x) 45 | { 46 | r <- attr(x, "x.member") 47 | if (is.null(r)) { 48 | r <- attr(x, "members") 49 | if (is.null(r)) 50 | r <- 1L 51 | } 52 | r 53 | } 54 | .memberDend <- stats_.memberDend 55 | 56 | 57 | prune_leaf <- function(dend, leaf_name,...) 58 | { 59 | labels_dend <- labels(dend) 60 | 61 | if(length(labels_dend) != length(unique(labels_dend))) warning("Found dubplicate labels in the tree (this might indicate a problem in the tree you supplied)") 62 | 63 | if(!(leaf_name %in% labels_dend)) { # what to do if there is no such leaf inside the tree 64 | warning(paste("There is no leaf with the label", leaf_name , "in the tree you supplied", "\n" , "Returning original tree", "\n" )) 65 | return(dend) 66 | } 67 | 68 | if(sum(labels_dend %in% leaf_name) > 1) { # what to do if there is no such leaf inside the tree 69 | warning(paste("There are multiple leaves by the name of '", leaf_name , "' in the tree you supplied. Their locations is:", 70 | paste(which(labels_dend %in% leaf_name), collapse = ","),"\n" , "Returning original tree", "\n" )) 71 | return(dend) 72 | } 73 | 74 | is.father.of.leaf.to.remove <- function(dend, leaf_name) 75 | { 76 | # this function checks if the leaf we wish to remove is the direct child of the current branch (dend) we entered the function 77 | is.father <- FALSE 78 | for(i in seq_len(length(dend))) 79 | { 80 | if(is.leaf(dend[[i]]) == TRUE && labels(dend[[i]]) == leaf_name) is.father <- TRUE 81 | } 82 | return(is.father) 83 | } 84 | 85 | 86 | remove_leaf_if_child <- function(dend, leaf_name) 87 | { 88 | # print(labels(dend)) 89 | if(all(labels(dend) != leaf_name)) 90 | { # if the leaf we want to remove is not in this branch, simply return the branch without going deeper intoit. 91 | return(dend) 92 | } else { # but if the leaf we want to remove is here somewhere, go on searching 93 | attr(dend, "members") <- attr(dend, "members") - 1 94 | 95 | if(!is.father.of.leaf.to.remove(dend, leaf_name)) # if you are not the father, then go on and make this function work on each child 96 | { 97 | for(i in seq_len(length(dend))) 98 | { 99 | dend[[i]] <- remove_leaf_if_child(dend[[i]], leaf_name) 100 | } 101 | } else { # we'll merge 102 | if(length(dend) == 2) { 103 | leaf_location <- 1 104 | # if leaf location is 1, then move branch in leaf 2 to be the new x 105 | if(is.leaf(dend[[leaf_location]]) == T && labels(dend[[leaf_location]]) == leaf_name) { 106 | 107 | branch_to_bumpup <- 2 108 | dend <- dend[[branch_to_bumpup]] 109 | } else { # else - the leaf location must be located in position "2" 110 | 111 | branch_to_bumpup <- 1 112 | dend <- dend[[branch_to_bumpup]] 113 | } 114 | } else if(length(dend) > 2) { 115 | # If more than 2 branches, check if any are leaves 116 | dend_leaves <- unlist(lapply(dend, is.leaf)) 117 | if(sum(dend_leaves) > 0) { 118 | # If so, check for matching labels to the leaf to prune 119 | dend_labels <- unlist(lapply(dend, function(x) attr(x, "label"))) 120 | dend_matches <- dend_labels == leaf_name 121 | # Return a list containing the non-matching branches 122 | dend <- dend[!(dend_leaves & dend_matches)] 123 | } 124 | } 125 | } 126 | } 127 | return(dend) 128 | } 129 | 130 | 131 | new_dend <- remove_leaf_if_child(dend, leaf_name) 132 | new_dend <- suppressWarnings(stats_midcache.dendrogram(new_dend)) # fixes the attributes 133 | # new_x <- fix_members_attr.dendrogram(new_x) # fix the number of memebers attr for each node 134 | return(new_dend) 135 | } -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_18/unified_layer_distributions_and_markers.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(cowplot) 4 | library(feather) 5 | library(dendextend) 6 | library(scrattch.vis) 7 | library(scrattch.io) 8 | options(stringsAsFactors = F) 9 | 10 | source("color_functions.R") 11 | source("prune_leaf_custom.R") 12 | source("layer_and_violin_functions.R") 13 | 14 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 15 | 16 | 17 | plot_anno <- data.frame(plot_id = c(1:4), 18 | cluster_id = c(35,35,46,46), 19 | region_id = c(1,2,1,2), 20 | plot_color = rainbow(4), 21 | plot_label = c("ALM Calb2 Necab1","VISp Calb2 Necab1", 22 | "ALM Esm1","VISp Esm1")) 23 | 24 | sub_anno <- anno %>% 25 | filter(cluster_id %in% c(35,46)) %>% 26 | left_join(plot_anno) %>% 27 | mutate(dendcluster_id = plot_id) %>% 28 | mutate(cluster_color = plot_color) 29 | 30 | 31 | layer_jitters <- build_layer_plot(sub_anno, 32 | dendcluster_ids = 1:4) 33 | 34 | ## Violin plots 35 | 36 | fdir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/" 37 | 38 | sst_pvalb_genes <- unique(split_cst("Sst Crh Calb2")) 39 | 40 | data <- feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/data.feather") 41 | gene_data <- data[,c("sample_id",sst_pvalb_genes)] %>% 42 | filter(sample_id %in% sub_anno$sample_id) %>% 43 | left_join(sub_anno) %>% 44 | mutate(xpos = plot_id) 45 | 46 | sst_pvalb_markers <- group_violin_plot2(data = gene_data, 47 | group_by = "plot", 48 | clusters = 1:4, 49 | genes = sst_pvalb_genes, 50 | logscale = TRUE, 51 | labelheight = 2, 52 | max_width = 10, 53 | fontsize = 5, 54 | showcounts = FALSE) 55 | 56 | all_plots <- plot_grid(layer_jitters, 57 | sst_pvalb_markers, 58 | align = "v", 59 | nrow = 2, 60 | rel_widths = 1, 61 | rel_heights = 1) 62 | 63 | save_plot("layers_violins.pdf", 64 | all_plots, 65 | ncol = 2, 66 | nrow = 2, 67 | base_width = 1.5/2, 68 | base_height = 4/2) 69 | 70 | count_summary <- sub_anno %>% 71 | group_by(plot_id) %>% 72 | summarise(n_cells = n()) 73 | 74 | max_summary <- data.frame(sst_max = max(gene_data$Sst), 75 | crh_max = max(gene_data$Crh), 76 | calb2_max = max(gene_data$Calb2)) 77 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_20/dend_plots.R: -------------------------------------------------------------------------------- 1 | library(dendextend) 2 | library(dplyr) 3 | library(feather) 4 | options(stringsAsFactors = F) 5 | 6 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/dend.collapse.rda") 7 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/dend.bak.rda") 8 | 9 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 10 | 11 | cl_anno <- anno %>% 12 | select(cl, cluster_id, cluster_label, cluster_color) %>% 13 | unique() 14 | 15 | cl_to_cluster <- cl_anno %>% 16 | select(cl, cluster_id) 17 | 18 | format_dend <- function(dend, cl_anno, cl_to_cluster) { 19 | # Rotate so that Glutamatergic samples are first: 20 | original_labels <- as.numeric(labels(dend)) 21 | gaba_labels <- cl_to_cluster$cl[cl_to_cluster$cluster_id %in% 1:60] 22 | gluta_labels <- cl_to_cluster$cl[cl_to_cluster$cluster_id %in% 61:115] 23 | nn_labels <- cl_to_cluster$cl[cl_to_cluster$cluster_id %in% 116:133] 24 | 25 | dend <- dend %>% 26 | rotate(as.character(c(gluta_labels, gaba_labels, nn_labels))) 27 | 28 | labels_colors(dend) <- cl_anno$cluster_color[match(labels(dend), cl_anno$cl)] 29 | labels(dend) <- as.character(cl_anno$cluster_label[match(as.numeric(labels(dend)), cl_anno$cl)]) 30 | 31 | labelDend <- function(dend,n=1) 32 | { 33 | if(is.null(attr(dend,"label"))){ 34 | attr(dend, "label") =paste0("n",n) 35 | n= n +1 36 | } 37 | if(length(dend)>1){ 38 | for(i in 1:length(dend)){ 39 | tmp = labelDend(dend[[i]], n) 40 | dend[[i]] = tmp[[1]] 41 | n = tmp[[2]] 42 | } 43 | } 44 | return(list(dend, n)) 45 | } 46 | 47 | 48 | dend <- labelDend(dend)[[1]] 49 | dend 50 | } 51 | 52 | # Original, uncollapsed dendrogram 53 | dend_original <- format_dend(dend, 54 | cl_anno, cl_to_cluster) 55 | 56 | pdf("dend_0.pdf", 57 | width = 7.5, height = 4, 58 | useDingbats = F) 59 | plot(dend_original) 60 | dev.off() 61 | 62 | # version used for figures 63 | dend_0.4 <- readRDS("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/dend.RData") 64 | pdf("dend_0.4.pdf", 65 | width = 7.5, height = 4, 66 | useDingbats = F) 67 | plot(dend_0.4) 68 | dev.off() 69 | 70 | # Collapsed further 71 | dend_0.8 <- format_dend(dend.collapse[[7]], 72 | cl_anno, cl_to_cluster) 73 | pdf("dend_0.8.pdf", 74 | width = 7.5, height = 4, 75 | useDingbats = F) 76 | plot(dend_0.8) 77 | dev.off() 78 | 79 | # legend 80 | dend_dfs <- as.ggdend(dend) 81 | 82 | library(scrattch.vis) 83 | 84 | colorset <- c("#FFFFFF","#C5C5C5","#921C1C","#000000") 85 | 86 | legend <- heatmap_legend_plot(minval = 0, maxval = 1, 87 | scale_name = "Bootstrapped Confidence", 88 | colorset = colorset) 89 | 90 | ggsave("legend.pdf", 91 | legend, 92 | width = 1, 93 | height = 1) 94 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_3/pairwise_cluster_heatmaps.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | library(dplyr) 3 | library(feather) 4 | library(scrattch.vis) 5 | library(reshape2) 6 | library(pals) 7 | options(stringsAsFactors = F) 8 | 9 | dend <- readRDS("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/dend.RData") 10 | 11 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 12 | 13 | cl_anno <- anno %>% 14 | select(cl, dendcluster_id, cluster_id) %>% 15 | unique() %>% 16 | filter(cluster_id %in% 1:133) %>% 17 | arrange(dendcluster_id) 18 | 19 | 20 | # Pairwise Pearson Correlation 21 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/cl.cor.rda") 22 | cl.cor2 <- cl.cor[as.character(cl_anno$cl),as.character(cl_anno$cl)] 23 | rownames(cl.cor2) <- cl_anno$dendcluster_id 24 | colnames(cl.cor2) <- cl_anno$dendcluster_id 25 | 26 | #colorset <- sub("FF$","",plasma(20)) 27 | colorset <- jet(20) 28 | 29 | cl.cor3 <- melt(cl.cor2) %>% 30 | mutate(color = values_to_colors(value, colorset = colorset)) 31 | 32 | cl.cor_plot <- ggplot() + 33 | geom_tile(data = cl.cor3, 34 | aes(x = Var1, 35 | y = Var2, 36 | fill = color)) + 37 | scale_fill_identity() + 38 | scale_y_reverse() + 39 | theme_void() 40 | 41 | ggsave("cl.cor_plot.pdf", 42 | cl.cor_plot, 43 | width = 2.25, 44 | height = 2.25) 45 | 46 | # Pairwise DEGene expression 47 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/de.summary.rda") 48 | de.summary <- de.summary %>% 49 | mutate(cl1 = as.numeric(as.character(cl1)), 50 | cl2 = as.numeric(as.character(cl2))) 51 | 52 | cl_anno1 <- cl_anno 53 | names(cl_anno1) <- paste0(names(cl_anno), "1") 54 | cl_anno2 <- cl_anno 55 | names(cl_anno2) <- paste0(names(cl_anno), "2") 56 | 57 | de.summary2 <- de.summary %>% 58 | left_join(cl_anno1) %>% 59 | left_join(cl_anno2) %>% 60 | mutate(color = values_to_colors(log10(de.num+1), colorset = colorset)) 61 | 62 | diag_tiles <- data.frame(pos = 1:125, 63 | color = colorset[1]) 64 | 65 | de.summary_plot <- ggplot() + 66 | geom_tile(data = de.summary2, 67 | aes(x = dendcluster_id1, 68 | y = dendcluster_id2, 69 | fill = color)) + 70 | geom_tile(data = de.summary2, 71 | aes(x = dendcluster_id2, 72 | y = dendcluster_id1, 73 | fill = color)) + 74 | geom_tile(data = diag_tiles, 75 | aes(x = pos, 76 | y = pos, 77 | fill = color))+ 78 | scale_fill_identity() + 79 | scale_y_reverse() + 80 | theme_void() 81 | 82 | ggsave("de.summary_plot.pdf", 83 | de.summary_plot, 84 | width = 2.25, 85 | height = 2.25) 86 | 87 | # Pairwise coclustering 88 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/co.stats.rda") 89 | 90 | cl.co.ratio <- co.stats$cl.co.ratio 91 | 92 | cl.co.ratio2 <- cl.co.ratio[as.character(cl_anno$cl),as.character(cl_anno$cl)] 93 | rownames(cl.co.ratio2) <- cl_anno$dendcluster_id 94 | colnames(cl.co.ratio2) <- cl_anno$dendcluster_id 95 | 96 | cl.co.ratio3 <- melt(cl.co.ratio2) %>% 97 | mutate(color = values_to_colors(value, colorset = colorset)) 98 | 99 | cl.co.ratio_plot <- ggplot() + 100 | geom_tile(data = cl.co.ratio3, 101 | aes(x = Var1, 102 | y = Var2, 103 | fill = color)) + 104 | scale_fill_identity() + 105 | scale_y_reverse() + 106 | theme_void() 107 | 108 | ggsave("cl.co.ratio_plot.pdf", 109 | cl.co.ratio_plot, 110 | width = 2.25, 111 | height = 2.25) 112 | 113 | legend_colors <- heatmap_legend_plot(0, 1, colorset = colorset) + 114 | theme_void() 115 | 116 | ggsave("legend_colors.pdf", 117 | legend_colors, 118 | width = 1, height = 0.5) 119 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_3/qc_jitter_plots.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(ggbeeswarm) 4 | 5 | summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE, 6 | conf.interval=.95, .drop=TRUE, roundall = F) { 7 | require(dplyr) 8 | # This does the summary. For each group return a vector with 9 | # N, mean, and sd 10 | 11 | names(data)[names(data) == measurevar] <- "measurevar" 12 | 13 | datac <- data %>% 14 | select(one_of(groupvars,"measurevar")) %>% 15 | filter(ifelse(na.rm == T, !is.na(measurevar), T)) %>% 16 | mutate(measurevar = as.numeric(measurevar)) %>% 17 | group_by_(c(groupvars)) %>% 18 | summarise(N = n(), 19 | median = median(measurevar), 20 | mean = mean(measurevar), 21 | max = max(measurevar), 22 | sd = ifelse(N == 1, 0, sd(measurevar)), 23 | q25 = as.numeric(quantile(measurevar, 0.25)), 24 | q75 = as.numeric(quantile(measurevar, 0.75))) %>% 25 | mutate(se = sd/sqrt(N)) 26 | #%>% 27 | # mutate(ci = se * qt(conf.interval/2 + 0.5, N-1)) 28 | 29 | 30 | if(roundall) { 31 | roundcols <- c("median","mean","max","sd","q25","q75","se","ci") 32 | datac[roundcols] <- round(datac[roundcols],3) 33 | } 34 | 35 | # datac <- datac %>% 36 | # mutate(xpos = 1:n()) 37 | 38 | return(datac) 39 | } 40 | 41 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 42 | 43 | anno <- anno %>% 44 | filter(cluster_id %in% 1:133) 45 | 46 | # dendcluster_id is the annotation for cluster ordering based on the current, bootstrapped dendrogram 47 | genes_stats <- summarySE(data = anno, 48 | measurevar = "confusion_label", 49 | groupvars = "dendcluster_id") 50 | 51 | dendcluster_anno <- anno %>% 52 | select(dendcluster_id, dendcluster_label, dendcluster_color) %>% 53 | unique() 54 | 55 | confusion_plot <- ggplot() + 56 | # geom_quasirandom from the ggbeeswarm package 57 | # makes violin-shaped jittered point plots 58 | geom_quasirandom(data = anno, 59 | aes(x = dendcluster_id, 60 | y = confusion_label), 61 | color = "skyblue", 62 | # Need to set position_jitter height = 0 to prevent 63 | # jitter on the y-axis, which changes data representation 64 | position = position_jitter(width = .3,height = 0), 65 | size = 0.1) + 66 | # Errorbars built using genes_stats values 67 | geom_errorbar(data = genes_stats, 68 | aes(x = dendcluster_id, 69 | ymin = q25, 70 | ymax = q75), 71 | size = 0.2) + 72 | # Median points from genes_stats 73 | geom_point(data = genes_stats, 74 | aes(x = dendcluster_id, 75 | y = median), 76 | color = "red", 77 | size = 0.5) + 78 | # Cluster labels as text objects 79 | geom_text(data = dendcluster_anno, 80 | aes(x = dendcluster_id, 81 | y = -0.1, 82 | label = dendcluster_label, 83 | color = dendcluster_color), 84 | angle = 90, 85 | hjust = 1, 86 | vjust = 0.3, 87 | size = 2) + 88 | # Median values next to cluster labels, since there's space there. 89 | geom_text(data = genes_stats, 90 | aes(x = dendcluster_id, 91 | y = 0, 92 | label = round(median,2)), 93 | angle = 90, 94 | hjust = 1, 95 | vjust = 0.3, 96 | size = 2) + 97 | scale_color_identity() + 98 | # Expand the y scale so that the labels are visible 99 | scale_y_continuous("Confusion Score", 100 | limits = c(-0.5, 1.2), 101 | breaks = seq(0, 1.2, 0.2)) + 102 | # Remove X-axis title 103 | scale_x_continuous("") + 104 | theme_bw() + 105 | # Theme tuning 106 | theme(axis.text.x = element_blank(), 107 | axis.ticks = element_blank(), 108 | panel.border = element_blank(), 109 | panel.grid.major.x = element_blank(), 110 | panel.grid.minor.x = element_blank()) 111 | 112 | ggsave("confusion_plot.pdf", confusion_plot, width = 12, height = 5, useDingbats = F) 113 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_4/intermediate_fraction_plot.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | options(stringsAsFactors = F) 4 | 5 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 6 | 7 | anno <- anno %>% 8 | filter(cluster_id %in% 1:133) %>% 9 | mutate(core_int_label = ifelse(core_int_label == "core", 10 | "core", 11 | "int")) 12 | 13 | plot_data <- anno %>% 14 | group_by(dendcluster_id, dendcluster_label, dendcluster_color) %>% 15 | summarise(n_core = sum(core_int_label == "core"), 16 | n_int = sum(core_int_label == "int"), 17 | n_cells = n()) %>% 18 | mutate(frac_core = n_core/n_cells, 19 | frac_int = n_int/n_cells) 20 | 21 | hlines <- data.frame(yintercept = seq(0,1,by=0.2)) 22 | 23 | ggplot() + 24 | geom_rect(data = plot_data, 25 | aes(xmin = dendcluster_id - 0.5, 26 | xmax = dendcluster_id + 0.5, 27 | ymin = 0, 28 | ymax = frac_core, 29 | fill = dendcluster_color)) + 30 | geom_rect(data = plot_data, 31 | aes(xmin = dendcluster_id - 0.5, 32 | xmax = dendcluster_id + 0.5, 33 | ymin = frac_core, 34 | ymax = 1, 35 | fill = "#000000")) + 36 | geom_text(data = plot_data, 37 | aes(x = dendcluster_id, 38 | y = -0.02, 39 | color = dendcluster_color, 40 | label = dendcluster_label), 41 | angle = 90, 42 | vjust = 0.3, 43 | hjust = 1, 44 | size = 2) + 45 | geom_hline(data = hlines, 46 | aes(yintercept = yintercept), 47 | linetype = "dashed", 48 | size = 0.1) + 49 | scale_fill_identity() + 50 | scale_color_identity() + 51 | theme_void() + 52 | scale_y_continuous(limits = c(-1, 1)) 53 | 54 | ggsave("intermediate_fraction.pdf", width = 7.5, height = 2) 55 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_5/all_markers_heatmap.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(scrattch) 3 | library(ggplot2) 4 | library(feather) 5 | library(reshape2) 6 | options(stringsAsFactors = F) 7 | 8 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/cell.marker.dat.rda") 9 | 10 | anno_file <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather" 11 | 12 | anno <- read_feather(anno_file) 13 | 14 | cluster_anno <- anno %>% 15 | select(sample_id, dendcluster_id, dendcluster_color) 16 | 17 | n_cells <- ncol(cell.marker.dat) 18 | n_genes <- nrow(cell.marker.dat) 19 | 20 | # remove combinatorial genes. ~ rows > 1110 21 | #cell.marker.dat <- cell.marker.dat[c(966:1200,1:965),] 22 | # shift pan markers. ~rows > 1040 23 | 24 | gene_pos <- data.frame(gene = rownames(cell.marker.dat)) %>% 25 | mutate(ypos = n():1) 26 | 27 | sample_pos <- data.frame(sample_id = colnames(cell.marker.dat)) %>% 28 | mutate(xpos = 1:n()) 29 | 30 | data_melt <- melt(cell.marker.dat) 31 | names(data_melt)[1:2] <- c("gene","sample_id") 32 | 33 | cluster_rects <- data.frame(sample_id = colnames(cell.marker.dat)) %>% 34 | mutate(xpos = 1:n()) %>% 35 | mutate(ymin = nrow(cell.marker.dat) + 5, ymax = nrow(cell.marker.dat) + 20) %>% 36 | left_join(cluster_anno) 37 | 38 | plot_data <- data_melt %>% 39 | left_join(gene_pos) %>% 40 | left_join(sample_pos) %>% 41 | mutate(value = ifelse(is.na(value), 0, value)) %>% 42 | mutate(fill = values_to_colors(value, colorset = c("white","black"))) 43 | 44 | all_tile_plot <- ggplot() + 45 | geom_tile(data = plot_data, 46 | aes(x = xpos, y = ypos, 47 | fill = fill)) + 48 | geom_rect(data = cluster_rects, 49 | aes(xmin = xpos - 0.5, xmax = xpos + 0.5, 50 | ymin = ymin, ymax = ymax, 51 | fill = dendcluster_color)) + 52 | scale_fill_identity() + 53 | scale_y_continuous(expand = c(0,0)) + 54 | scale_x_continuous(expand = c(0,0)) + 55 | theme_void() 56 | 57 | ggsave("zy_100sampled_truth_bw_rect.png", all_tile_plot, height = (n_genes*3)/600 + 20/600 + 0.0365, width = n_cells/600 + 0.0365, dpi = 600) 58 | 59 | # Rearrange to match dendrogram 60 | cell.marker.dat2 <- cell.marker.dat[c(1:240, 61 | 436:780, 62 | 241:435, 63 | 781:1000),] 64 | 65 | 66 | gene_pos <- data.frame(gene = rownames(cell.marker.dat2)) %>% 67 | mutate(ypos = n():1) 68 | 69 | sample_pos <- data.frame(sample_id = colnames(cell.marker.dat2)) %>% 70 | left_join(cluster_anno) %>% 71 | arrange(dendcluster_id) %>% 72 | mutate(xpos = 1:n()) 73 | 74 | cell.marker.dat2 <- cell.marker.dat2[,sample_pos$sample_id] 75 | 76 | data_melt <- melt(cell.marker.dat2) 77 | names(data_melt)[1:2] <- c("gene","sample_id") 78 | 79 | cluster_rects <- data.frame(sample_id = colnames(cell.marker.dat2)) %>% 80 | mutate(xpos = 1:n()) %>% 81 | mutate(ymin = nrow(cell.marker.dat2) + 5, ymax = nrow(cell.marker.dat2) + 20) %>% 82 | left_join(cluster_anno) 83 | 84 | plot_data <- data_melt %>% 85 | left_join(gene_pos) %>% 86 | left_join(sample_pos) %>% 87 | mutate(value = ifelse(is.na(value), 0, value)) %>% 88 | mutate(fill = values_to_colors(value, colorset = c("white","black"))) 89 | 90 | all_tile_plot <- ggplot() + 91 | geom_tile(data = plot_data, 92 | aes(x = xpos, y = ypos, 93 | fill = fill)) + 94 | geom_rect(data = cluster_rects, 95 | aes(xmin = xpos - 0.5, xmax = xpos + 0.5, 96 | ymin = ymin, ymax = ymax, 97 | fill = dendcluster_color)) + 98 | scale_fill_identity() + 99 | scale_y_continuous(expand = c(0,0)) + 100 | scale_x_continuous(expand = c(0,0)) + 101 | theme_void() 102 | 103 | ggsave("dend_order_zy_100sampled_truth_bw_rect.png", all_tile_plot, height = (n_genes*3)/600 + 20/600 + 0.0365, width = n_cells/600 + 0.0365, dpi = 600) 104 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_6/cluster_marker_heatmaps.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(feather) 4 | library(scrattch) 5 | library(dendextend) 6 | options(stringsAsFactors = F) 7 | 8 | fdir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/" 9 | 10 | anno <- read_feather(file.path(fdir,"anno.feather")) 11 | 12 | markers <- read.csv("markers_2018-05-25.csv") 13 | 14 | Global.markers <- unique(unlist(markers$Global[markers$Global != ""])) 15 | Inh.markers <- unique(unlist(markers$Inh[markers$Inh != ""])) 16 | Ex.markers <- unique(unlist(markers$Ex[markers$Ex != ""])) 17 | 18 | all_clusters <- anno %>% 19 | filter(cluster_id %in% 1:133) %>% 20 | arrange(dendcluster_id) %>% 21 | select(dendcluster_id) %>% 22 | unique() %>% unlist() 23 | 24 | inh_clusters <- anno %>% 25 | filter(class_label == "GABAergic") %>% 26 | arrange(dendcluster_id) %>% 27 | select(dendcluster_id) %>% 28 | unique() %>% unlist() 29 | 30 | exc_clusters <- anno %>% 31 | filter(class_label == "Glutamatergic") %>% 32 | arrange(dendcluster_id) %>% 33 | select(dendcluster_id) %>% 34 | unique() %>% unlist() 35 | 36 | all_plot <- group_heatmap_plot(data_source = fdir, 37 | genes = Global.markers, 38 | group_by = "dendcluster", 39 | clusters = all_clusters, 40 | calculation = "trimmed_mean", 41 | labelheight = 13, 42 | showcounts = F) 43 | 44 | inh_plot <- group_heatmap_plot(data_source = fdir, 45 | genes = Inh.markers, 46 | group_by = "dendcluster", 47 | clusters = inh_clusters, 48 | calculation = "trimmed_mean", 49 | labelheight = 13, 50 | showcounts = F) 51 | 52 | exc_plot <- group_heatmap_plot(data_source = fdir, 53 | genes = Ex.markers, 54 | group_by = "dendcluster", 55 | clusters = exc_clusters, 56 | calculation = "trimmed_mean", 57 | labelheight = 13, 58 | showcounts = F) 59 | 60 | ggsave("broad_glia_markers.pdf",all_plot,height = 8, width = 15) 61 | ggsave("inh_markers.pdf",inh_plot,height = 8, width = 7.5) 62 | ggsave("exc_markers.pdf",exc_plot,height = 10, width = 7.5) 63 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_6/markers.csv: -------------------------------------------------------------------------------- 1 | Global,Ex,Inh 2 | Fez1,Slc17a7,Gad2 3 | Phyhipl,Rtn4rl2,Slc32a1 4 | Aplp1,Slc30a3,Adarb2 5 | Gnao1,Cux2,Nfix 6 | Caly,Stard8,Nfib 7 | Snap25,Otof,Cxcl14 8 | Atp1a3,Rrad,Tnfaip8l3 9 | Camk2b,Penk,Cplx3 10 | Syt1,Col23a1,Lamp5 11 | Gabrg2,Agmat,Pax6 12 | Fabp3,Emx2,Krt73 13 | Stmn2,Hpgd,Sncg 14 | Kif5c,Macc1,Prox1 15 | Slc17a7,Olfr78,Ndnf 16 | Nrn1,Tcap,Ntn1 17 | Neurod2,Rorb,Pde11a 18 | Sv2b,Fgf17,Pdlim5 19 | Satb2,Rspo1,Lsp1 20 | Tbr1,Whrn,Slc35d3 21 | Vsig2,Scnn1a,Cxcl5 22 | Slc32a1,Endou,Nkx2-1 23 | Gad2,Fezf2,Pdlim3 24 | Dlx1,Hsd11b1,Vip 25 | Dlx5,Batf3,Serpinf1 26 | Dlx2,Colq,Slc10a4 27 | Dlx6os1,Pld5,Cldn10 28 | Slc6a1,Wfdc18,C1ql1 29 | Sox2,Htr2c,Etv1 30 | Adamts19,Wfdc17,Ptprk 31 | Trp73,Aldh1a7,C1ql3 32 | Lhx5,Rxfp2,C1ql1 33 | Lhx1,Cpa6,Crabp1 34 | S100a13,Gkn1,Crispld2 35 | S100a16,Tgfb1,Slc17a8 36 | Cmtm5,Prss35,Cyb5r2 37 | Kcnj10,Ctsc,Nr1h4 38 | Gja1,Osr1,Prss12 39 | Gjb6,Oprk1,Igfbp6 40 | Aqp4,Cd52,Calb2 41 | Lcat,Col18a1,Gpc3 42 | Acsbg1,Car3,Pthlh 43 | Olig1,Ctxn3,Sostdc1 44 | Sox10,Ctgf,Mab21l1 45 | Neu4,Fam84b,Tpbg 46 | Gpr17,Chrna6,Slc18a3 47 | Sapcd2,Fn1,Rspo1 48 | Ccnb1,Erg,Lmo1 49 | Brca1,Tac1,Myl1 50 | Plp1,Pvalb,Rspo4 51 | Cldn11,Dppa1,Chat 52 | Mag,Bmp5,Cbln4 53 | Mog,Depdc7,Cbln4 54 | Mog,Stac,Gsx2 55 | Opalin,C1ql2,Itih5 56 | Gjb1,Ptgfr,Mybpc1 57 | Hapln2,Slco2a1,Lhx6 58 | Cyba,Lrrc9,Sox6 59 | Ctsh,Pappa2,Sst 60 | Ifitm3,Foxp2,Calb1 61 | S100a11,Sla2,Chodl 62 | Vtn,Slc17a8,Gabrg1 63 | Igf2,Trh,Lgr6 64 | Slc6a13,Syt6,Tacr3 65 | Dcn,H60b,Etv1 66 | Col1a1,Gpr139,Il1rapl2 67 | Slc13a3,Nxph4,Edn1 68 | H2-Q7,Mup5,Hpse 69 | Six2,Fam150a,Igsf9 70 | Cxcl9,Fbxl7,Myh8 71 | Slc22a6,F2r,Myh13 72 | Col15a1,Olfr111,Chrna2 73 | Slc38a11,Olfr110,Ptgdr 74 | Atp13a5,Serpinb11,Crygs 75 | Ace2,P2ry12,Crhr2 76 | Kcnj8,Kynu,Col6a1 77 | Acta2,Crh,Tacstd2 78 | Myh11,Hsd17b2,Th 79 | Slco1a4,Mup3,Nts 80 | Cldn5,Lhx5,Il7 81 | Ctss,Trp73,Bche 82 | C1qa,Reln,Tac1 83 | C1qb,Cdh13,Pvalb 84 | C1qc,Cpne7,Syt2 85 | F13a1,Marcksl1,Ntf3 86 | Mrc1,Alcam,Ostn 87 | Mpeg1,Efr3a,Gpr149 88 | Siglech,,Vipr2 89 | ,,Meis2 90 | ,,Adamts19 91 | ,,Reln 92 | ,,Nr2f2 93 | ,,Cck 94 | ,,Npy 95 | ,,Crh 96 | ,,Tac2 97 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_8/saturation_plot.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(feather) 4 | #library(scrattch) 5 | 6 | color_mean <- function(x) { 7 | library(grDevices) 8 | 9 | rgb_x <- col2rgb(x) 10 | rgb_mean <- rowMeans(rgb_x) 11 | new_hex <- rgb(rgb_mean["red"]/255, 12 | rgb_mean["green"]/255, 13 | rgb_mean["blue"]/255) 14 | 15 | new_hex 16 | 17 | } 18 | 19 | ramp_colors <- function (x, minval = NULL, maxval = NULL, colorset = c("darkblue", 20 | "dodgerblue", "gray80", "orangered", "red")) 21 | { 22 | heat_colors <- colorRampPalette(colorset)(1001) 23 | if (is.null(maxval)) { 24 | maxval <- max(x) 25 | } else { 26 | x[x > maxval] <- maxval 27 | } 28 | if (is.null(minval)) { 29 | minval <- min(x) 30 | } else { 31 | x[x < minval] <- minval 32 | } 33 | heat_positions <- unlist(round((x - minval)/(maxval - minval) * 34 | 1000 + 1, 0)) 35 | colors <- heat_colors[heat_positions] 36 | colors 37 | } 38 | 39 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/saturation.df.rda") 40 | 41 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 42 | 43 | anno <- anno %>% 44 | filter(dendcluster_id < 134) 45 | 46 | sat_xpos <- saturation.df %>% 47 | group_by(sample_size) %>% 48 | summarise(n_cl = sum(merge.cl == "nochange") + length(unique(merge.cl)) - 1) %>% 49 | mutate(sample_size = as.numeric(as.character(sample_size))) %>% 50 | arrange(desc(sample_size)) %>% 51 | mutate(xpos = 1:n() + 1) %>% 52 | mutate(xmin = xpos - 0.3, 53 | xmax = xpos + 0.3) 54 | 55 | dendcluster_ypos <- anno %>% 56 | group_by(cl, dendcluster_id, dendcluster_label, dendcluster_color) %>% 57 | summarise(cl_size = n()) %>% 58 | ungroup() %>% 59 | arrange(-dendcluster_id) %>% 60 | mutate(ypos = 1:n()) %>% 61 | mutate(ymin = ypos - 0.5, 62 | ymax = ypos + 0.5) 63 | 64 | full_pos <- dendcluster_ypos %>% 65 | mutate(xpos = 1, 66 | xmin = xpos - 0.3, 67 | xmax = xpos + 0.3) %>% 68 | mutate(cl_size = ifelse(is.na(cl_size), 0, cl_size)) %>% 69 | mutate(fill = ifelse(is.na(cl_size), "#FFFFFF", ramp_colors(cl_size, 0, 20, colorset = c("orange","white","skyblue")))) 70 | 71 | sat_pos <- saturation.df %>% 72 | mutate(sample_size = as.numeric(as.character(sample_size))) %>% 73 | mutate(org.cl = as.numeric(as.character(org.cl))) %>% 74 | left_join(dendcluster_ypos, by = c("org.cl" = "cl")) %>% 75 | left_join(sat_xpos, by = "sample_size") %>% 76 | mutate(dendcluster_color = ifelse(merge.cl == "absent",NA,dendcluster_color)) %>% 77 | mutate(cl_size.x = ifelse(is.na(cl_size.x), 0, cl_size.x)) %>% 78 | mutate(outline = ifelse(merge.cl %in% c("absent","nochange"), NA, "#000000"), 79 | fill = ifelse(is.na(cl_size.x), "#FFFFFF", ramp_colors(cl_size.x, 0, 20, colorset = c("orange","white","skyblue")))) 80 | 81 | sat_unmerged <- sat_pos %>% 82 | filter(merge.cl %in% c("absent","nochange")) 83 | 84 | sat_merged <- sat_pos %>% 85 | filter(!merge.cl %in% c("absent","nochange")) %>% 86 | group_by(sample_size, merge.cl) %>% 87 | mutate(dendcluster_color = color_mean(dendcluster_color)) 88 | 89 | sat_curves <- sat_merged %>% 90 | group_by(sample_size, merge.cl) %>% 91 | arrange(ypos) %>% 92 | mutate(x = xmax, xend = xmax, 93 | y = ypos, yend = lead(ypos)) %>% 94 | 95 | ungroup() %>% 96 | filter(complete.cases(.)) 97 | 98 | merge_plot <- ggplot() + 99 | geom_rect(data = full_pos, 100 | aes(xmin = xmin, xmax = xmax, 101 | ymin = ymin, ymax = ymax, 102 | fill = fill, 103 | color = "white"), 104 | size = 0.1) + 105 | geom_text(data = full_pos, 106 | aes(x = xmax, y = ymax, 107 | label = cl_size), 108 | hjust = 0, vjust = 1, 109 | size = 2) + 110 | geom_text(data = dendcluster_ypos, 111 | aes(x = 0.5, y = ypos, 112 | label = dendcluster_label, 113 | color = dendcluster_color), 114 | hjust = 0, vjust = 0.3, 115 | size = 2*5/6) + 116 | geom_rect(data = sat_unmerged, 117 | aes(xmin = xmin, xmax = xmax, 118 | ymin = ymin, ymax = ymax, 119 | fill = fill, 120 | color = "white"), 121 | size = 0.1) + 122 | geom_text(data = sat_unmerged, 123 | aes(x = xmax, y = ymax, 124 | label = cl_size.x), 125 | hjust = 0, vjust = 1, 126 | size = 2*5/6) + 127 | geom_rect(data = sat_merged, 128 | aes(xmin = xmin + 0.2, xmax = xmax + 0.2, 129 | ymin = ymin, ymax = ymax, 130 | fill = fill, 131 | color = outline)) + 132 | geom_text(data = sat_merged, 133 | aes(x = xmax + 0.2, y = ymax, 134 | label = cl_size.x), 135 | hjust = 0, vjust = 1, 136 | size = 2*5/6) + 137 | geom_curve(data = sat_curves, 138 | aes(x = x + 0.2, xend = xend + 0.2, 139 | y = y, yend = yend), 140 | curvature = -1) + 141 | scale_fill_identity() + 142 | scale_color_identity() + 143 | scale_x_reverse(limits = c(12, 0)) + 144 | theme_void() 145 | 146 | merge_plot 147 | 148 | ggsave("merge_plot_numbers.pdf",merge_plot,width = 7.5, height = 10) 149 | 150 | cluster_counts <- saturation.df %>% 151 | group_by(sample_size) %>% 152 | summarise(n_clusters = length(unique(as.character(merge.cl))) - 1 + sum(merge.cl == "nochange")) 153 | 154 | cluster_counts 155 | -------------------------------------------------------------------------------- /Figures/Ext_Data_Fig_9/build_dotplot.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(feather) 3 | library(ggplot2) 4 | options(stringsAsFactors = F) 5 | 6 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 7 | 8 | anno <- anno %>% 9 | filter(cluster_id %in% 1:133) 10 | 11 | cluster_anno <- anno %>% 12 | select(cluster_id, cluster_label, cluster_color) %>% 13 | unique() 14 | 15 | cre_anno <- anno %>% 16 | select(cre_id, cre_label, cre_color) %>% 17 | unique() 18 | 19 | plot_anno <- anno %>% 20 | filter(inj_type_label == "No Injection") %>% 21 | filter(facs_label != "RFP-negative") 22 | 23 | plot_data <- plot_anno %>% 24 | group_by(cre_id, cre_label, cre_color, 25 | cluster_id, cluster_label, cluster_color) %>% 26 | summarise(n_cells = n()) 27 | 28 | max_cre <- max(anno$cre_id) 29 | max_cluster <- max(anno$cluster_id) 30 | 31 | bg_rects <- data.frame(xmin = 1:(max_cre/2)*2 - 0.5, 32 | xmax = (1:(max_cre/2) + 1)*2 - 1.5, 33 | ymin = 0.5, 34 | ymax = max_cluster + 0.5) 35 | 36 | class_breaks <- anno %>% 37 | group_by(subclass_id) %>% 38 | summarise(yintercept = max(cluster_id) + 0.5) 39 | 40 | ggplot() + 41 | geom_rect(data = bg_rects, 42 | aes(xmin = xmin, xmax = xmax, 43 | ymin = ymin, ymax = ymax), 44 | fill = "#CAD7D7") + 45 | geom_hline(data = class_breaks, 46 | aes(yintercept = yintercept), 47 | size = 0.2) + 48 | geom_point(data = plot_data, 49 | aes(x = cre_id, 50 | y = cluster_id, 51 | size = n_cells, 52 | fill = cluster_color, 53 | color = "#FFFFFF"), 54 | pch = 21) + 55 | geom_text(data = cluster_anno, 56 | aes(x = 1, 57 | y = cluster_id, 58 | label = cluster_label, 59 | color = cluster_color), 60 | hjust = 1, 61 | vjust = 0.3, 62 | size = 2*5/6) + 63 | scale_color_identity() + 64 | scale_fill_identity() + 65 | scale_y_reverse("", 66 | limits = c(133.5, -0.5), 67 | expand = c(0,0)) + 68 | scale_x_continuous("", 69 | expand = c(0,0), 70 | limits = c(-5, 57), 71 | breaks = cre_anno$cre_id, 72 | labels = cre_anno$cre_label, 73 | position = "top") + 74 | scale_size_area(max_size = 5, 75 | breaks = c(1,10,50,100,200,500)) + 76 | theme_classic(base_size = 7) + 77 | theme(axis.text.y = element_blank(), 78 | axis.ticks = element_blank(), 79 | axis.line = element_blank(), 80 | axis.text.x = element_text(angle = 90, 81 | hjust = 0)) 82 | 83 | ggsave("cre_dotplot.pdf",width = 8.25, height = 10.5, useDingbats = F) 84 | 85 | 86 | # Pan-broad_specific ordering 87 | pan_cre <- c("Gad2-IRES-Cre","Snap25-IRES2-Cre","Slc32a1-IRES-Cre","Slc17a7-IRES2-Cre") 88 | broad_cre <- c("Rbp4-Cre_KL100","Pvalb-IRES-Cre","Vip-IRES-Cre","Sst-IRES-Cre","Ctgf-T2A-dgCre","Htr3a-Cre_NO152","Trib2-F2A-CreERT2","Ntsr1-Cre_GN220","Tlx3-Cre_PL56","Ndnf-IRES2-dgCre","Rorb-IRES2-Cre") 89 | 90 | plot_anno2 <- plot_anno %>% 91 | mutate(cre_class_id = ifelse(cre_label %in% pan_cre, 1, 92 | ifelse(cre_label %in% broad_cre,2,3))) 93 | 94 | 95 | cre_anno2 <- plot_anno2 %>% 96 | select(cre_id, cre_label, cre_color, cre_class_id) %>% 97 | unique() %>% 98 | arrange(cre_class_id, cre_id) %>% 99 | mutate(new_cre_id = 2:(n()+1)) 100 | 101 | plot_anno2 <- plot_anno2 %>% 102 | left_join(cre_anno2) 103 | 104 | plot_data2 <- plot_anno2 %>% 105 | group_by(new_cre_id, cre_label, cre_color, 106 | cluster_id, cluster_label, cluster_color) %>% 107 | summarise(n_cells = n()) 108 | 109 | ggplot() + 110 | geom_rect(data = bg_rects, 111 | aes(xmin = xmin, xmax = xmax, 112 | ymin = ymin, ymax = ymax), 113 | fill = "#CAD7D7") + 114 | geom_point(data = plot_data2, 115 | aes(x = new_cre_id, 116 | y = cluster_id, 117 | size = n_cells, 118 | color = cluster_color)) + 119 | geom_text(data = cluster_anno, 120 | aes(x = 1, 121 | y = cluster_id, 122 | label = cluster_label, 123 | color = cluster_color), 124 | hjust = 1, 125 | vjust = 0.3, 126 | size = 2*5/6) + 127 | geom_hline(data = class_breaks, 128 | aes(yintercept = yintercept), 129 | size = 0.2) + 130 | scale_color_identity() + 131 | scale_y_reverse("", 132 | limits = c(116.5, -0.5), 133 | expand = c(0,0)) + 134 | scale_x_continuous("", 135 | expand = c(0,0), 136 | limits = c(-5, 37), 137 | breaks = cre_anno2$new_cre_id, 138 | labels = cre_anno2$cre_label, 139 | position = "top") + 140 | scale_size_area(max_size = 3, 141 | breaks = c(10,50,100,200,500)) + 142 | theme_classic(base_size = 7) + 143 | theme(axis.text.y = element_blank(), 144 | axis.ticks = element_blank(), 145 | axis.line = element_blank(), 146 | axis.text.x = element_text(angle = 90, 147 | hjust = 0)) 148 | 149 | ggsave("cre_grouped_dotplot.pdf",width = 8.25, height = 10.5, useDingbats = F) 150 | -------------------------------------------------------------------------------- /Figures/Figure_2/class_comparison_colors_simplified.csv: -------------------------------------------------------------------------------- 1 | class_label1,class_label2,class_type,class_color 2 | Glutamatergic,Glutamatergic,both_gluta,#00A809 3 | Glutamatergic,GABAergic,gluta_gaba,#B03FAB 4 | GABAergic,Glutamatergic,gluta_gaba,#B03FAB 5 | GABAergic,GABAergic,both_gaba,#DA1C5C 6 | Glutamatergic,Endothelial,neu_non,#8B5E3C 7 | Endothelial,Glutamatergic,neu_non,#8B5E3C 8 | GABAergic,Endothelial,neu_non,#8B5E3C 9 | Endothelial,GABAergic,neu_non,#8B5E3C 10 | Non-Neuronal,Glutamatergic,neu_non,#8B5E3C 11 | Glutamatergic,Non-Neuronal,neu_non,#8B5E3C 12 | Non-Neuronal,GABAergic,neu_non,#8B5E3C 13 | GABAergic,Non-Neuronal,neu_non,#8B5E3C 14 | Non-Neuronal,Non-Neuronal,non_non,#726658 15 | Non-Neuronal,Endothelial,non_non,#726658 16 | Endothelial,Non-Neuronal,non_non,#726658 17 | -------------------------------------------------------------------------------- /Figures/Figure_2/color_functions.R: -------------------------------------------------------------------------------- 1 | hsv_palette <- function(n_colors, 2 | hue_start = 0, 3 | hue_end = max(1, n-1)/n, 4 | sat_start = 0.55, 5 | sat_end = 1, 6 | sat_steps = 4, 7 | val_start = 1, 8 | val_end = 0.8, 9 | val_steps = 3) { 10 | sats <- rep_len(seq(sat_start, sat_end, length.out = sat_steps),length.out = n_colors) 11 | vals <- rep_len(seq(val_start, val_end, length.out = val_steps),length.out = n_colors) 12 | if(hue_end < hue_start) { 13 | rev(sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_end, end = hue_start))) 14 | } else { 15 | sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_start, end = hue_end)) 16 | } 17 | } 18 | 19 | color_mean <- function(x) { 20 | library(grDevices) 21 | 22 | rgb_x <- col2rgb(x) 23 | rgb_mean <- rowMeans(rgb_x) 24 | new_hex <- rgb(rgb_mean["red"]/255, 25 | rgb_mean["green"]/255, 26 | rgb_mean["blue"]/255) 27 | 28 | new_hex 29 | 30 | } 31 | 32 | check_s <- function(x, min_sat = 0) { 33 | library(grDevices) 34 | 35 | hsv_x <- rgb2hsv(col2rgb(x)) 36 | 37 | hsv_x["s",] < min_sat 38 | 39 | } 40 | 41 | check_v <- function(x, min_val = 0) { 42 | library(grDevices) 43 | 44 | hsv_x <- rgb2hsv(col2rgb(x)) 45 | 46 | hsv_x["v",] < min_val 47 | 48 | } 49 | 50 | adjust_s <- function(x, shift_sat = 0.3) { 51 | library(grDevices) 52 | 53 | hsv_x <- rgb2hsv(col2rgb(x)) 54 | 55 | hsv_x["s", ] <- hsv_x["s", ] + shift_sat 56 | if(hsv_x["s", ] > 1) { 57 | hsv_x["s", ] <- 1 58 | } else if (hsv_x["s",] < 0) { 59 | hsv_x["s", ] <- 0 60 | } 61 | 62 | hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,]) 63 | } 64 | 65 | adjust_v <- function(x, shift_val = 0.3) { 66 | library(grDevices) 67 | 68 | hsv_x <- rgb2hsv(col2rgb(x)) 69 | 70 | hsv_x["v", ] <- hsv_x["v", ] + shift_val 71 | if(hsv_x["v", ] > 1) { 72 | hsv_x["v", ] <- 1 73 | } else if (hsv_x["v",] < 0) { 74 | hsv_x["v", ] <- 0 75 | } 76 | 77 | hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,]) 78 | } 79 | -------------------------------------------------------------------------------- /Figures/Figure_2/region_marker_genes_panel.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(feather) 3 | library(ggplot2) 4 | library(ggrepel) 5 | options(stringsAsFactors = F) 6 | 7 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/V1.ALM.diff.gene.rda") 8 | exc.gene.df <- gene.df %>% 9 | mutate(class = "exc") 10 | 11 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/V1.ALM.diff.inh.gene.rda") 12 | inh.gene.df <- gene.df %>% 13 | mutate(class = "inh") 14 | 15 | # Find DE genes for Inhibitory types 16 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/region.de.df.rda") 17 | region.de.df[] <- lapply(region.de.df, function(x) ifelse(is.factor(x), as.character(x), x)) 18 | 19 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 20 | 21 | gene.df <- rbind(exc.gene.df, inh.gene.df) %>% 22 | filter(!grepl("^LOC",gene), 23 | !grepl("Rik$",gene)) 24 | 25 | alm_exc_label <- gene.df %>% 26 | filter(class == "exc") %>% 27 | arrange(-prop.diff) %>% 28 | head(25) %>% 29 | mutate(color = ifelse(class == "exc", "#212021", "#BE1E2D"), 30 | fill = "#E5E1E5") 31 | 32 | visp_exc_label <- gene.df %>% 33 | filter(class == "exc") %>% 34 | arrange(prop.diff) %>% 35 | head(25) %>% 36 | mutate(color = ifelse(class == "exc", "#848EBC", "#BE1E2D"), 37 | fill = "#D0D7EF") 38 | 39 | alm_inh_label <- gene.df %>% 40 | filter(class == "inh") %>% 41 | arrange(-prop.diff) %>% 42 | head(10) %>% 43 | mutate(color = ifelse(class == "exc", "#212021", "#DD6091"), 44 | fill = "#E5E1E5") 45 | 46 | visp_inh_label <- gene.df %>% 47 | filter(class == "inh") %>% 48 | arrange(prop.diff) %>% 49 | head(10) %>% 50 | mutate(color = ifelse(class == "exc", "#848EBC", "#DD6091"), 51 | fill = "#D0D7EF") 52 | 53 | 54 | all_labels <- rbind(alm_exc_label, visp_exc_label, alm_inh_label, visp_inh_label) 55 | #all_labels <- rbind(alm_label, visp_label, inh_label) 56 | 57 | no_label <- gene.df %>% 58 | filter(!gene %in% all_labels$gene) %>% 59 | mutate(color = ifelse(class == "exc", "#808080", "#EA9BA1")) 60 | 61 | 62 | marker_plot <- ggplot() + 63 | geom_point(data = no_label, 64 | aes(x = -prop.diff, 65 | y = log10(prop.max), 66 | color = color), 67 | #alpha = 1, 68 | size = 0.2) + 69 | geom_point(data = all_labels, 70 | aes(x = -prop.diff, 71 | y = log10(prop.max), 72 | color = color), 73 | size = 1)+ 74 | geom_label_repel(data = all_labels, 75 | aes(x = -prop.diff, 76 | y = log10(prop.max), 77 | label = gene, 78 | fill = fill, 79 | color = color), 80 | size = 2, 81 | min.segment.length = 0) + 82 | scale_color_identity() + 83 | scale_fill_identity() + 84 | scale_x_continuous(limits = c(-1.5, 1.2), 85 | breaks = seq(-1,1,by=0.25), 86 | labels = c(1,0.75,0.5,0.25,0,0.25,0.5,0.75,1)) + 87 | scale_y_continuous(limits = c(-3,0.5), 88 | breaks = log10(c(0.001,0.01,0.1,0.25,0.5,0.75,1)), 89 | labels = c(0.001,0.01, 0.1, 0.25, 0.5, 0.75, 1)) + 90 | theme_bw(7) + 91 | theme(panel.grid.minor = element_blank()) 92 | 93 | marker_plot 94 | 95 | ggsave("marker_plot.pdf",marker_plot, width = 7.5, height = 3.5, useDingbats = FALSE) 96 | 97 | # One layer, with marginals 98 | library(ggExtra) 99 | all_points <- rbind(all_labels, no_label %>% mutate(fill = color)) 100 | 101 | margin_points <- all_points %>% 102 | mutate(color = ifelse(class == "exc", "#808080", "#EA9BA1")) 103 | 104 | marker_plot2 <- ggplot() + 105 | geom_point(data = margin_points, 106 | aes(x = -prop.diff, 107 | y = log10(prop.max), 108 | color = color), 109 | size = 0.5) + 110 | geom_point(data = all_labels, 111 | aes(x = -prop.diff, 112 | y = log10(prop.max), 113 | color = color), 114 | size = 0.5) + 115 | geom_label_repel(data = all_labels, 116 | aes(x = -prop.diff, 117 | y = log10(prop.max), 118 | label = gene, 119 | fill = fill, 120 | color = color), 121 | size = 2) + 122 | scale_color_identity() + 123 | scale_fill_identity() + 124 | scale_x_continuous(limits = c(-1.5, 1.2), 125 | breaks = seq(-1,1,by=0.25), 126 | labels = c(1,0.75,0.5,0.25,0,0.25,0.5,0.75,1)) + 127 | scale_y_continuous(limits = c(-3,0.5), 128 | breaks = log10(c(0.001,0.01,0.1,0.25,0.5,0.75,1)), 129 | labels = c(0.001,0.01, 0.1, 0.25, 0.5, 0.75, 1)) + 130 | theme_bw(7) + 131 | theme(panel.grid.minor = element_blank()) 132 | 133 | marker_plot2 <- ggMarginal(marker_plot2, 134 | margins = "x", 135 | size = 10, 136 | groupColour = TRUE, 137 | groupFill = TRUE, 138 | bw = 0.05) 139 | 140 | ggsave("marker_plot_marginals.pdf",marker_plot2, width = 7.5, height = 3.5, useDingbats = FALSE) 141 | -------------------------------------------------------------------------------- /Figures/Figure_3/prune_leaf_custom.R: -------------------------------------------------------------------------------- 1 | prune.dendrogram <- function(dend, leaves, reindex_dend = TRUE, ...) { 2 | leaves <- as.character(leaves) 3 | 4 | for(i in seq_along(leaves)) 5 | { 6 | # this function is probably not the fastest - but it works... 7 | dend <- prune_leaf(dend, leaves[i]) # move step by stem to remove all of these leaves... 8 | } 9 | 10 | if(reindex_dend) dend <- reindex_dend(dend) 11 | 12 | return(dend) 13 | } 14 | 15 | stats_midcache.dendrogram <- function (x, type = "hclust", quiet = FALSE) 16 | { 17 | type <- match.arg(type) 18 | stopifnot(inherits(x, "dendrogram")) 19 | setmid <- function(d, type) { 20 | if (is.leaf(d)) 21 | return(d) 22 | k <- length(d) 23 | if (k < 1) 24 | stop("dendrogram node with non-positive #{branches}") 25 | r <- d 26 | midS <- 0 27 | for (j in 1L:k) { 28 | r[[j]] <- unclass(setmid(d[[j]], type)) 29 | midS <- midS + .midDend(r[[j]]) 30 | } 31 | if (!quiet && type == "hclust" && k != 2) 32 | warning("midcache() of non-binary dendrograms only partly implemented") 33 | attr(r, "midpoint") <- (.memberDend(d[[1L]]) + midS)/2 34 | r 35 | } 36 | setmid(x, type = type) 37 | } 38 | 39 | stats_.midDend <- function (x) { 40 | if (is.null(mp <- attr(x, "midpoint"))) 0 else mp 41 | } 42 | .midDend <- stats_.midDend # copied so that they would work inside the various functions here... 43 | 44 | stats_.memberDend <- function (x) 45 | { 46 | r <- attr(x, "x.member") 47 | if (is.null(r)) { 48 | r <- attr(x, "members") 49 | if (is.null(r)) 50 | r <- 1L 51 | } 52 | r 53 | } 54 | .memberDend <- stats_.memberDend 55 | 56 | 57 | prune_leaf <- function(dend, leaf_name,...) 58 | { 59 | labels_dend <- labels(dend) 60 | 61 | if(length(labels_dend) != length(unique(labels_dend))) warning("Found dubplicate labels in the tree (this might indicate a problem in the tree you supplied)") 62 | 63 | if(!(leaf_name %in% labels_dend)) { # what to do if there is no such leaf inside the tree 64 | warning(paste("There is no leaf with the label", leaf_name , "in the tree you supplied", "\n" , "Returning original tree", "\n" )) 65 | return(dend) 66 | } 67 | 68 | if(sum(labels_dend %in% leaf_name) > 1) { # what to do if there is no such leaf inside the tree 69 | warning(paste("There are multiple leaves by the name of '", leaf_name , "' in the tree you supplied. Their locations is:", 70 | paste(which(labels_dend %in% leaf_name), collapse = ","),"\n" , "Returning original tree", "\n" )) 71 | return(dend) 72 | } 73 | 74 | is.father.of.leaf.to.remove <- function(dend, leaf_name) 75 | { 76 | # this function checks if the leaf we wish to remove is the direct child of the current branch (dend) we entered the function 77 | is.father <- FALSE 78 | for(i in seq_len(length(dend))) 79 | { 80 | if(is.leaf(dend[[i]]) == TRUE && labels(dend[[i]]) == leaf_name) is.father <- TRUE 81 | } 82 | return(is.father) 83 | } 84 | 85 | 86 | remove_leaf_if_child <- function(dend, leaf_name) 87 | { 88 | # print(labels(dend)) 89 | if(all(labels(dend) != leaf_name)) 90 | { # if the leaf we want to remove is not in this branch, simply return the branch without going deeper intoit. 91 | return(dend) 92 | } else { # but if the leaf we want to remove is here somewhere, go on searching 93 | attr(dend, "members") <- attr(dend, "members") - 1 94 | 95 | if(!is.father.of.leaf.to.remove(dend, leaf_name)) # if you are not the father, then go on and make this function work on each child 96 | { 97 | for(i in seq_len(length(dend))) 98 | { 99 | dend[[i]] <- remove_leaf_if_child(dend[[i]], leaf_name) 100 | } 101 | } else { # we'll merge 102 | if(length(dend) == 2) { 103 | leaf_location <- 1 104 | # if leaf location is 1, then move branch in leaf 2 to be the new x 105 | if(is.leaf(dend[[leaf_location]]) == T && labels(dend[[leaf_location]]) == leaf_name) { 106 | 107 | branch_to_bumpup <- 2 108 | dend <- dend[[branch_to_bumpup]] 109 | } else { # else - the leaf location must be located in position "2" 110 | 111 | branch_to_bumpup <- 1 112 | dend <- dend[[branch_to_bumpup]] 113 | } 114 | } else if(length(dend) > 2) { 115 | # If more than 2 branches, check if any are leaves 116 | dend_leaves <- unlist(lapply(dend, is.leaf)) 117 | if(sum(dend_leaves) > 0) { 118 | # If so, check for matching labels to the leaf to prune 119 | dend_labels <- unlist(lapply(dend, function(x) attr(x, "label"))) 120 | dend_matches <- dend_labels == leaf_name 121 | # Return a list containing the non-matching branches 122 | dend[dend_leaves & dend_matches] <- NULL 123 | # Note that in some cases, the following DOES NOT yield a correct result: 124 | # dend <- dend[!(dend_leaves & dend_matches)] 125 | 126 | # If the length is now 1, it can be bumped up 127 | if(length(dend) == 1) { 128 | dend <- dend[[1]] 129 | } 130 | 131 | } 132 | } 133 | } 134 | } 135 | return(dend) 136 | } 137 | 138 | 139 | new_dend <- remove_leaf_if_child(dend, leaf_name) 140 | new_dend <- suppressWarnings(stats_midcache.dendrogram(new_dend)) # fixes the attributes 141 | # new_x <- fix_members_attr.dendrogram(new_x) # fix the number of memebers attr for each node 142 | return(new_dend) 143 | } -------------------------------------------------------------------------------- /Figures/Figure_4/color_functions.R: -------------------------------------------------------------------------------- 1 | hsv_palette <- function(n_colors, 2 | hue_start = 0, 3 | hue_end = max(1, n-1)/n, 4 | sat_start = 0.55, 5 | sat_end = 1, 6 | sat_steps = 4, 7 | val_start = 1, 8 | val_end = 0.8, 9 | val_steps = 3) { 10 | sats <- rep_len(seq(sat_start, sat_end, length.out = sat_steps),length.out = n_colors) 11 | vals <- rep_len(seq(val_start, val_end, length.out = val_steps),length.out = n_colors) 12 | if(hue_end < hue_start) { 13 | rev(sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_end, end = hue_start))) 14 | } else { 15 | sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_start, end = hue_end)) 16 | } 17 | } 18 | 19 | color_mean <- function(x) { 20 | library(grDevices) 21 | 22 | rgb_x <- col2rgb(x) 23 | rgb_mean <- rowMeans(rgb_x) 24 | new_hex <- rgb(rgb_mean["red"]/255, 25 | rgb_mean["green"]/255, 26 | rgb_mean["blue"]/255) 27 | 28 | new_hex 29 | 30 | } 31 | 32 | check_s <- function(x, min_sat = 0) { 33 | library(grDevices) 34 | 35 | hsv_x <- rgb2hsv(col2rgb(x)) 36 | 37 | hsv_x["s",] < min_sat 38 | 39 | } 40 | 41 | check_v <- function(x, min_val = 0) { 42 | library(grDevices) 43 | 44 | hsv_x <- rgb2hsv(col2rgb(x)) 45 | 46 | hsv_x["v",] < min_val 47 | 48 | } 49 | 50 | adjust_s <- function(x, shift_sat = 0.3) { 51 | library(grDevices) 52 | 53 | hsv_x <- rgb2hsv(col2rgb(x)) 54 | 55 | hsv_x["s", ] <- hsv_x["s", ] + shift_sat 56 | if(hsv_x["s", ] > 1) { 57 | hsv_x["s", ] <- 1 58 | } else if (hsv_x["s",] <- 0) { 59 | hsv_x["s", ] <- 0 60 | } 61 | 62 | hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,]) 63 | } 64 | 65 | adjust_v <- function(x, shift_val = 0.3) { 66 | library(grDevices) 67 | 68 | hsv_x <- rgb2hsv(col2rgb(x)) 69 | 70 | hsv_x["v", ] <- hsv_x["v", ] + shift_val 71 | if(hsv_x["v", ] > 1) { 72 | hsv_x["v", ] <- 1 73 | } else if (hsv_x["v",] <- 0) { 74 | hsv_x["v", ] <- 0 75 | } 76 | 77 | hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,]) 78 | } 79 | -------------------------------------------------------------------------------- /Figures/Figure_4/prune_leaf_custom.R: -------------------------------------------------------------------------------- 1 | prune.dendrogram <- function(dend, leaves, reindex_dend = TRUE, ...) { 2 | leaves <- as.character(leaves) 3 | 4 | for(i in seq_along(leaves)) 5 | { 6 | # this function is probably not the fastest - but it works... 7 | dend <- prune_leaf(dend, leaves[i]) # move step by stem to remove all of these leaves... 8 | } 9 | 10 | if(reindex_dend) dend <- reindex_dend(dend) 11 | 12 | return(dend) 13 | } 14 | 15 | stats_midcache.dendrogram <- function (x, type = "hclust", quiet = FALSE) 16 | { 17 | type <- match.arg(type) 18 | stopifnot(inherits(x, "dendrogram")) 19 | setmid <- function(d, type) { 20 | if (is.leaf(d)) 21 | return(d) 22 | k <- length(d) 23 | if (k < 1) 24 | stop("dendrogram node with non-positive #{branches}") 25 | r <- d 26 | midS <- 0 27 | for (j in 1L:k) { 28 | r[[j]] <- unclass(setmid(d[[j]], type)) 29 | midS <- midS + .midDend(r[[j]]) 30 | } 31 | if (!quiet && type == "hclust" && k != 2) 32 | warning("midcache() of non-binary dendrograms only partly implemented") 33 | attr(r, "midpoint") <- (.memberDend(d[[1L]]) + midS)/2 34 | r 35 | } 36 | setmid(x, type = type) 37 | } 38 | 39 | stats_.midDend <- function (x) { 40 | if (is.null(mp <- attr(x, "midpoint"))) 0 else mp 41 | } 42 | .midDend <- stats_.midDend # copied so that they would work inside the various functions here... 43 | 44 | stats_.memberDend <- function (x) 45 | { 46 | r <- attr(x, "x.member") 47 | if (is.null(r)) { 48 | r <- attr(x, "members") 49 | if (is.null(r)) 50 | r <- 1L 51 | } 52 | r 53 | } 54 | .memberDend <- stats_.memberDend 55 | 56 | 57 | prune_leaf <- function(dend, leaf_name,...) 58 | { 59 | labels_dend <- labels(dend) 60 | 61 | if(length(labels_dend) != length(unique(labels_dend))) warning("Found dubplicate labels in the tree (this might indicate a problem in the tree you supplied)") 62 | 63 | if(!(leaf_name %in% labels_dend)) { # what to do if there is no such leaf inside the tree 64 | warning(paste("There is no leaf with the label", leaf_name , "in the tree you supplied", "\n" , "Returning original tree", "\n" )) 65 | return(dend) 66 | } 67 | 68 | if(sum(labels_dend %in% leaf_name) > 1) { # what to do if there is no such leaf inside the tree 69 | warning(paste("There are multiple leaves by the name of '", leaf_name , "' in the tree you supplied. Their locations is:", 70 | paste(which(labels_dend %in% leaf_name), collapse = ","),"\n" , "Returning original tree", "\n" )) 71 | return(dend) 72 | } 73 | 74 | is.father.of.leaf.to.remove <- function(dend, leaf_name) 75 | { 76 | # this function checks if the leaf we wish to remove is the direct child of the current branch (dend) we entered the function 77 | is.father <- FALSE 78 | for(i in seq_len(length(dend))) 79 | { 80 | if(is.leaf(dend[[i]]) == TRUE && labels(dend[[i]]) == leaf_name) is.father <- TRUE 81 | } 82 | return(is.father) 83 | } 84 | 85 | 86 | remove_leaf_if_child <- function(dend, leaf_name) 87 | { 88 | # print(labels(dend)) 89 | if(all(labels(dend) != leaf_name)) 90 | { # if the leaf we want to remove is not in this branch, simply return the branch without going deeper intoit. 91 | return(dend) 92 | } else { # but if the leaf we want to remove is here somewhere, go on searching 93 | attr(dend, "members") <- attr(dend, "members") - 1 94 | 95 | if(!is.father.of.leaf.to.remove(dend, leaf_name)) # if you are not the father, then go on and make this function work on each child 96 | { 97 | for(i in seq_len(length(dend))) 98 | { 99 | dend[[i]] <- remove_leaf_if_child(dend[[i]], leaf_name) 100 | } 101 | } else { # we'll merge 102 | if(length(dend) == 2) { 103 | leaf_location <- 1 104 | # if leaf location is 1, then move branch in leaf 2 to be the new x 105 | if(is.leaf(dend[[leaf_location]]) == T && labels(dend[[leaf_location]]) == leaf_name) { 106 | 107 | branch_to_bumpup <- 2 108 | dend <- dend[[branch_to_bumpup]] 109 | } else { # else - the leaf location must be located in position "2" 110 | 111 | branch_to_bumpup <- 1 112 | dend <- dend[[branch_to_bumpup]] 113 | } 114 | } else if(length(dend) > 2) { 115 | # If more than 2 branches, check if any are leaves 116 | dend_leaves <- unlist(lapply(dend, is.leaf)) 117 | if(sum(dend_leaves) > 0) { 118 | # If so, check for matching labels to the leaf to prune 119 | dend_labels <- unlist(lapply(dend, function(x) attr(x, "label"))) 120 | dend_matches <- dend_labels == leaf_name 121 | # Return a list containing the non-matching branches 122 | dend <- dend[!(dend_leaves & dend_matches)] 123 | } 124 | } 125 | } 126 | } 127 | return(dend) 128 | } 129 | 130 | 131 | new_dend <- remove_leaf_if_child(dend, leaf_name) 132 | new_dend <- suppressWarnings(stats_midcache.dendrogram(new_dend)) # fixes the attributes 133 | # new_x <- fix_members_attr.dendrogram(new_x) # fix the number of memebers attr for each node 134 | return(new_dend) 135 | } -------------------------------------------------------------------------------- /Figures/Figure_4/unified_layer_distributions_and_markers.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(cowplot) 4 | library(feather) 5 | library(dendextend) 6 | library(scrattch.vis) 7 | library(scrattch.io) 8 | options(stringsAsFactors = F) 9 | 10 | source("color_functions.R") 11 | source("prune_leaf_custom.R") 12 | source("layer_and_violin_functions.R") 13 | 14 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 15 | 16 | cluster_anno <- anno %>% 17 | select(cl, dendcluster_id, cluster_id, cluster_label, cluster_color) %>% 18 | unique() 19 | 20 | cocl_in <- read.csv("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/co.stats.csv") 21 | cocl_in$cluster_id.x <- cluster_anno$cluster_id[match(cocl_in$cl.x,cluster_anno$cl)] 22 | cocl_in$cluster_id.y <- cluster_anno$cluster_id[match(cocl_in$cl.y,cluster_anno$cl)] 23 | 24 | cocl <- cocl_in %>% 25 | select(cluster_id.x, cluster_id.y, co.ratio) %>% 26 | filter(cluster_id.x != cluster_id.y) 27 | 28 | dend <- readRDS("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/dend.RData") 29 | 30 | gluta_layers <- build_layer_plot(anno, 31 | dend, 32 | cocl, 33 | dendcluster_ids = 1:55) 34 | 35 | ## Violin plots 36 | 37 | fdir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/" 38 | 39 | #gluta_genes <- split_cst("Slc30a3 Cux2 Stard8 Rorb Deptor Scnn1a Rspo1 Hsd11b1 Batf3 Colq Postn Wnt7b Lemd1 Rxfp2 Oprk1 Tunar Osr1 Car3 Fam84b Chrna6 Pvalb Stac Ctxn3 Pappa2 Foxp2 Slc17a8 Trhr Tshz2 Rapgef3 Trh Gpr139 Mup5 Nxph4 Efr3a Rprm Crym") 40 | gluta_genes <- split_cst("Slc30a3 Cux2 Stard8 Rorb Deptor Scnn1a Rspo1 Hsd11b1 Batf3 Colq Postn Wnt7b Lemd1 Rxfp2 Oprk1 Tunar Osr1 Car3 Fam84b Chrna6 Pvalb Stac Ctxn3 Pappa2 Foxp2 Slc17a8 Trhr Tshz2 Rapgef3 Trh Gpr139 Mup5 Nxph4 Efr3a Rprm Crym") 41 | remove_genes <- c("Efr3a","Stard8","Colq","Postn","Wnt7b","Lemd1","Rxfp2","Tunar","Stac","Ctxn3","Mup5") 42 | gluta_genes <- setdiff(gluta_genes, remove_genes) 43 | gluta_markers <- group_violin_plot2(data_source = fdir, 44 | group_by = "dendcluster", 45 | clusters = 1:55, 46 | genes = gluta_genes, 47 | logscale = TRUE, 48 | labelheight = 2, 49 | max_width = 10, 50 | fontsize = 5, 51 | showcounts = FALSE) 52 | 53 | all_plots <- plot_grid(gluta_layers, 54 | gluta_markers, 55 | align = "v", 56 | nrow = 2, 57 | rel_widths = 1, 58 | rel_heights = 1, 59 | labels = c("b","c")) 60 | 61 | save_plot("panels_bc.pdf", 62 | all_plots, 63 | ncol = 2, 64 | nrow = 2, 65 | base_width = 7.5/2, 66 | base_height = 6/2) 67 | -------------------------------------------------------------------------------- /Figures/Figure_5/color_functions.R: -------------------------------------------------------------------------------- 1 | hsv_palette <- function(n_colors, 2 | hue_start = 0, 3 | hue_end = max(1, n-1)/n, 4 | sat_start = 0.55, 5 | sat_end = 1, 6 | sat_steps = 4, 7 | val_start = 1, 8 | val_end = 0.8, 9 | val_steps = 3) { 10 | sats <- rep_len(seq(sat_start, sat_end, length.out = sat_steps),length.out = n_colors) 11 | vals <- rep_len(seq(val_start, val_end, length.out = val_steps),length.out = n_colors) 12 | if(hue_end < hue_start) { 13 | rev(sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_end, end = hue_start))) 14 | } else { 15 | sub("FF$","",rainbow(n_colors, s = sats, v = vals, start = hue_start, end = hue_end)) 16 | } 17 | } 18 | 19 | color_mean <- function(x) { 20 | library(grDevices) 21 | 22 | rgb_x <- col2rgb(x) 23 | rgb_mean <- rowMeans(rgb_x) 24 | new_hex <- rgb(rgb_mean["red"]/255, 25 | rgb_mean["green"]/255, 26 | rgb_mean["blue"]/255) 27 | 28 | new_hex 29 | 30 | } 31 | 32 | check_s <- function(x, min_sat = 0) { 33 | library(grDevices) 34 | 35 | hsv_x <- rgb2hsv(col2rgb(x)) 36 | 37 | hsv_x["s",] < min_sat 38 | 39 | } 40 | 41 | check_v <- function(x, min_val = 0) { 42 | library(grDevices) 43 | 44 | hsv_x <- rgb2hsv(col2rgb(x)) 45 | 46 | hsv_x["v",] < min_val 47 | 48 | } 49 | 50 | adjust_s <- function(x, shift_sat = 0.3) { 51 | library(grDevices) 52 | 53 | hsv_x <- rgb2hsv(col2rgb(x)) 54 | 55 | hsv_x["s", ] <- hsv_x["s", ] + shift_sat 56 | if(hsv_x["s", ] > 1) { 57 | hsv_x["s", ] <- 1 58 | } else if (hsv_x["s",] <- 0) { 59 | hsv_x["s", ] <- 0 60 | } 61 | 62 | hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,]) 63 | } 64 | 65 | adjust_v <- function(x, shift_val = 0.3) { 66 | library(grDevices) 67 | 68 | hsv_x <- rgb2hsv(col2rgb(x)) 69 | 70 | hsv_x["v", ] <- hsv_x["v", ] + shift_val 71 | if(hsv_x["v", ] > 1) { 72 | hsv_x["v", ] <- 1 73 | } else if (hsv_x["v",] <- 0) { 74 | hsv_x["v", ] <- 0 75 | } 76 | 77 | hsv(hsv_x[1,], hsv_x[2,], hsv_x[3,]) 78 | } 79 | -------------------------------------------------------------------------------- /Figures/Figure_5/prune_leaf_custom.R: -------------------------------------------------------------------------------- 1 | prune.dendrogram <- function(dend, leaves, reindex_dend = TRUE, ...) { 2 | leaves <- as.character(leaves) 3 | 4 | for(i in seq_along(leaves)) 5 | { 6 | # this function is probably not the fastest - but it works... 7 | dend <- prune_leaf(dend, leaves[i]) # move step by stem to remove all of these leaves... 8 | } 9 | 10 | if(reindex_dend) dend <- reindex_dend(dend) 11 | 12 | return(dend) 13 | } 14 | 15 | stats_midcache.dendrogram <- function (x, type = "hclust", quiet = FALSE) 16 | { 17 | type <- match.arg(type) 18 | stopifnot(inherits(x, "dendrogram")) 19 | setmid <- function(d, type) { 20 | if (is.leaf(d)) 21 | return(d) 22 | k <- length(d) 23 | if (k < 1) 24 | stop("dendrogram node with non-positive #{branches}") 25 | r <- d 26 | midS <- 0 27 | for (j in 1L:k) { 28 | r[[j]] <- unclass(setmid(d[[j]], type)) 29 | midS <- midS + .midDend(r[[j]]) 30 | } 31 | if (!quiet && type == "hclust" && k != 2) 32 | warning("midcache() of non-binary dendrograms only partly implemented") 33 | attr(r, "midpoint") <- (.memberDend(d[[1L]]) + midS)/2 34 | r 35 | } 36 | setmid(x, type = type) 37 | } 38 | 39 | stats_.midDend <- function (x) { 40 | if (is.null(mp <- attr(x, "midpoint"))) 0 else mp 41 | } 42 | .midDend <- stats_.midDend # copied so that they would work inside the various functions here... 43 | 44 | stats_.memberDend <- function (x) 45 | { 46 | r <- attr(x, "x.member") 47 | if (is.null(r)) { 48 | r <- attr(x, "members") 49 | if (is.null(r)) 50 | r <- 1L 51 | } 52 | r 53 | } 54 | .memberDend <- stats_.memberDend 55 | 56 | 57 | prune_leaf <- function(dend, leaf_name,...) 58 | { 59 | labels_dend <- labels(dend) 60 | 61 | if(length(labels_dend) != length(unique(labels_dend))) warning("Found dubplicate labels in the tree (this might indicate a problem in the tree you supplied)") 62 | 63 | if(!(leaf_name %in% labels_dend)) { # what to do if there is no such leaf inside the tree 64 | warning(paste("There is no leaf with the label", leaf_name , "in the tree you supplied", "\n" , "Returning original tree", "\n" )) 65 | return(dend) 66 | } 67 | 68 | if(sum(labels_dend %in% leaf_name) > 1) { # what to do if there is no such leaf inside the tree 69 | warning(paste("There are multiple leaves by the name of '", leaf_name , "' in the tree you supplied. Their locations is:", 70 | paste(which(labels_dend %in% leaf_name), collapse = ","),"\n" , "Returning original tree", "\n" )) 71 | return(dend) 72 | } 73 | 74 | is.father.of.leaf.to.remove <- function(dend, leaf_name) 75 | { 76 | # this function checks if the leaf we wish to remove is the direct child of the current branch (dend) we entered the function 77 | is.father <- FALSE 78 | for(i in seq_len(length(dend))) 79 | { 80 | if(is.leaf(dend[[i]]) == TRUE && labels(dend[[i]]) == leaf_name) is.father <- TRUE 81 | } 82 | return(is.father) 83 | } 84 | 85 | 86 | remove_leaf_if_child <- function(dend, leaf_name) 87 | { 88 | # print(labels(dend)) 89 | if(all(labels(dend) != leaf_name)) 90 | { # if the leaf we want to remove is not in this branch, simply return the branch without going deeper intoit. 91 | return(dend) 92 | } else { # but if the leaf we want to remove is here somewhere, go on searching 93 | attr(dend, "members") <- attr(dend, "members") - 1 94 | 95 | if(!is.father.of.leaf.to.remove(dend, leaf_name)) # if you are not the father, then go on and make this function work on each child 96 | { 97 | for(i in seq_len(length(dend))) 98 | { 99 | dend[[i]] <- remove_leaf_if_child(dend[[i]], leaf_name) 100 | } 101 | } else { # we'll merge 102 | if(length(dend) == 2) { 103 | leaf_location <- 1 104 | # if leaf location is 1, then move branch in leaf 2 to be the new x 105 | if(is.leaf(dend[[leaf_location]]) == T && labels(dend[[leaf_location]]) == leaf_name) { 106 | 107 | branch_to_bumpup <- 2 108 | dend <- dend[[branch_to_bumpup]] 109 | } else { # else - the leaf location must be located in position "2" 110 | 111 | branch_to_bumpup <- 1 112 | dend <- dend[[branch_to_bumpup]] 113 | } 114 | } else if(length(dend) > 2) { 115 | # If more than 2 branches, check if any are leaves 116 | dend_leaves <- unlist(lapply(dend, is.leaf)) 117 | if(sum(dend_leaves) > 0) { 118 | # If so, check for matching labels to the leaf to prune 119 | dend_labels <- unlist(lapply(dend, function(x) attr(x, "label"))) 120 | dend_matches <- dend_labels == leaf_name 121 | # Return a list containing the non-matching branches 122 | dend <- dend[!(dend_leaves & dend_matches)] 123 | } 124 | } 125 | } 126 | } 127 | return(dend) 128 | } 129 | 130 | 131 | new_dend <- remove_leaf_if_child(dend, leaf_name) 132 | new_dend <- suppressWarnings(stats_midcache.dendrogram(new_dend)) # fixes the attributes 133 | # new_x <- fix_members_attr.dendrogram(new_x) # fix the number of memebers attr for each node 134 | return(new_dend) 135 | } -------------------------------------------------------------------------------- /Figures/Figure_5/unified_layer_distributions_and_markers.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(cowplot) 4 | library(feather) 5 | library(dendextend) 6 | library(scrattch.vis) 7 | library(scrattch.io) 8 | options(stringsAsFactors = F) 9 | 10 | source("color_functions.R") 11 | source("prune_leaf_custom.R") 12 | source("layer_and_violin_functions.R") 13 | 14 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 15 | 16 | cluster_anno <- anno %>% 17 | select(cl, dendcluster_id, cluster_id, cluster_label, cluster_color) %>% 18 | unique() 19 | 20 | cocl_in <- read.csv("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/co.stats.csv") 21 | cocl_in$cluster_id.x <- cluster_anno$cluster_id[match(cocl_in$cl.x,cluster_anno$cl)] 22 | cocl_in$cluster_id.y <- cluster_anno$cluster_id[match(cocl_in$cl.y,cluster_anno$cl)] 23 | 24 | cocl <- cocl_in %>% 25 | select(cluster_id.x, cluster_id.y, co.ratio) %>% 26 | filter(cluster_id.x != cluster_id.y) 27 | 28 | dend <- readRDS("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/dend.RData") 29 | 30 | sst_pvalb_layers <- build_layer_plot(anno, 31 | dend, 32 | cocl, 33 | dendcluster_ids = 85:115) 34 | 35 | lamp5_sncg_vip_layers <- build_layer_plot(anno, 36 | dend, 37 | cocl, 38 | dendcluster_ids = 56:84) 39 | 40 | ## Violin plots 41 | 42 | fdir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/" 43 | 44 | sst_pvalb_genes <- c("Sst","Chodl","Nos1","Mme","Tac1","Tacr3","Calb2","Nr2f2","Myh8","Tac2","Hpse","Crhr2","Crh","Esm1","Rxfp1","Nts","Pvalb","Gabrg1","Th","Calb1","Akr1c18","Sema3e","Gpr149","Reln","Tpbg","Cpne5","Vipr2","Nkx2-1") 45 | #sst_pvalb_genes <- unique(split_cst("Sst Chodl Nos1 Etv1 Il1rapl2 Myh8 Chrna2 Tac2 Crhr2 Etv1 Calb2 Hpse C1ql3 Crh Nts Pvalb Gabrg1 Th Prdm8 Calb1 Reln Gpr149 Cpne5 Vipr2 Nkx2-1")) 46 | sst_pvalb_markers <- group_violin_plot2(data_source = fdir, 47 | group_by = "dendcluster", 48 | clusters = 85:115, 49 | genes = sst_pvalb_genes, 50 | logscale = TRUE, 51 | labelheight = 2, 52 | max_width = 10, 53 | fontsize = 5, 54 | showcounts = FALSE) 55 | 56 | lamp5_sncg_vip_genes <- c("Lamp5","Ndnf","Krt73","Fam19a1","Pax6","Ntn1","Plch2","Lsp1","Lhx6","Nkx2-1","Vip","Sncg","Slc17a8","Nptx2","Gpr50","Itih5","Serpinf1","Igfbp6","Gpc3","Lmo1","Ptprt","Rspo4","Chat","Crispld2","Col15a1","Pde1a") 57 | #lamp5_sncg_vip_genes <- split_cst("Lamp5 Pax6 Ndnf Egln3 Pdlim5 Slc35d3 Vax1 Lhx6 Nkx2-1 Vip Calb2 Serpinf1 Col14a1 Sncg Ptprk Crispld2 Slc17a8 Igfbp6 Reln Gpc3 Lmo1 Cck Rspo4 Cbln4 Htr1f C1ql1 Itih5 ") 58 | lamp5_sncg_vip_markers <- group_violin_plot2(data_source = fdir, 59 | group_by = "dendcluster", 60 | clusters = 56:84, 61 | genes = lamp5_sncg_vip_genes, 62 | logscale = TRUE, 63 | labelheight = 2, 64 | max_width = 10, 65 | fontsize = 5, 66 | showcounts = FALSE) 67 | 68 | all_plots <- plot_grid(sst_pvalb_layers, 69 | lamp5_sncg_vip_layers, 70 | sst_pvalb_markers, 71 | lamp5_sncg_vip_markers, 72 | align = "v", 73 | nrow = 2, 74 | rel_widths = 1, 75 | rel_heights = 1, 76 | labels = c("c","d","e","f")) 77 | 78 | save_plot("panels_cdef.pdf", 79 | all_plots, 80 | ncol = 2, 81 | nrow = 2, 82 | base_width = 7.5/2, 83 | base_height = 6/2) 84 | -------------------------------------------------------------------------------- /Figures/Figure_6/L4 IT VISp Rspo1 L5 IT VISp Batf3.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/L4 IT VISp Rspo1 L5 IT VISp Batf3.rda -------------------------------------------------------------------------------- /Figures/Figure_6/L4 IT VISp Rspo1 L5 IT VISp Hsd11b1 Endou.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/L4 IT VISp Rspo1 L5 IT VISp Hsd11b1 Endou.rda -------------------------------------------------------------------------------- /Figures/Figure_6/L4.2016.dat.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/L4.2016.dat.rda -------------------------------------------------------------------------------- /Figures/Figure_6/L4.dat.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/L4.dat.rda -------------------------------------------------------------------------------- /Figures/Figure_6/L4.df.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/L4.df.rda -------------------------------------------------------------------------------- /Figures/Figure_6/L4.sampled.df.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/L4.sampled.df.rda -------------------------------------------------------------------------------- /Figures/Figure_6/L4_comparison_plots.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | library(cowplot) 4 | options(stringsAsFactors = F) 5 | 6 | load("gradient.df.list.rda") 7 | 8 | load("L4 IT VISp Rspo1 L5 IT VISp Batf3.rda") 9 | L4_vs_Batf3 <- df %>% 10 | mutate(sample_id = rownames(.)) 11 | load("L4 IT VISp Rspo1 L5 IT VISp Hsd11b1 Endou.rda") 12 | L4_vs_Hsd11b1 <- df %>% 13 | mutate(sample_id = rownames(.)) %>% 14 | mutate(prob = 1-prob) %>% 15 | mutate(eigen = -1 * eigen) 16 | load("L4.df.rda") 17 | L4_vs_L4 <- L4.df %>% 18 | mutate(sample_id = rownames(.)) %>% 19 | mutate(color = "#000000", 20 | color = ifelse(prob < 0.2, "#1241FF" , color), 21 | color = ifelse(prob > 0.4 & prob < 0.6, "#6808FF" , color), 22 | color = ifelse(prob > 0.8, "#C80052", color)) %>% 23 | mutate(eigen = -1 * eigen) 24 | 25 | this_theme <- theme_bw(4) + 26 | theme(panel.grid.minor = element_blank(), 27 | panel.grid.major = element_line(color = "#EBEBEB", size = 0.25), 28 | panel.border = element_blank(), 29 | axis.ticks = element_blank()) 30 | 31 | L4_vs_Batf3_points <- ggplot() + 32 | geom_point(data = L4_vs_Batf3, 33 | aes(x = prob, 34 | y = eigen), 35 | size = 0.2) + 36 | ylab("") + 37 | scale_x_continuous("", 38 | limits = c(-0.05, 1.05), 39 | expand = c(0,0)) + 40 | this_theme 41 | 42 | L4_vs_Batf3_ks_pval <- 0e0 43 | 44 | L4_vs_Batf3_sampled <- L4_vs_Batf3 %>% 45 | filter(sample_id %in% rownames(df.list[["69_72"]])) 46 | 47 | L4_vs_Batf3_bars <- ggplot(L4_vs_Batf3_sampled, aes(prob)) + 48 | geom_histogram(aes(y=..count../sum(..count..)), 49 | breaks = seq(0, 1 , by = 0.05), 50 | binwidth = 0.5, 51 | color = NA, 52 | size = 0.25) + 53 | geom_hline(yintercept = 0.05, 54 | size = 0.5) + 55 | ylim(0, 0.3) + 56 | ylab("") + 57 | scale_x_continuous("Assignment probability", 58 | limits = c(-0.05, 1.05), 59 | expand = c(0,0)) + 60 | this_theme 61 | 62 | L4_vs_Hsd11b1_points <- ggplot() + 63 | geom_point(data = L4_vs_Hsd11b1, 64 | aes(x = prob, 65 | y = eigen), 66 | size = 0.2) + 67 | ylab("") + 68 | scale_x_continuous("", 69 | limits = c(-0.05, 1.05), 70 | expand = c(0,0)) + 71 | this_theme 72 | 73 | L4_vs_Hsd11b1_ks_pval <- 4.84e-13 74 | 75 | L4_vs_Hsd11b1_sampled <- L4_vs_Hsd11b1 %>% 76 | filter(sample_id %in% rownames(df.list[["69_70"]])) 77 | 78 | L4_vs_Hsd11b1_bars <- ggplot(L4_vs_Hsd11b1_sampled, aes(prob)) + 79 | geom_histogram(aes(y=..count../sum(..count..)), 80 | breaks = seq(0, 1 , by = 0.05), 81 | binwidth = 0.5, 82 | color = NA, 83 | size = 0.25) + 84 | geom_hline(yintercept = 0.05, 85 | size = 0.5) + 86 | ylim(0, 0.3) + 87 | ylab("") + 88 | scale_x_continuous("Assignment probability", 89 | limits = c(-0.05, 1.05), 90 | expand = c(0,0)) + 91 | this_theme 92 | 93 | L4_vs_L4_points <- ggplot() + 94 | geom_point(data = L4_vs_L4, 95 | aes(x = prob, 96 | y = eigen, 97 | color = color), 98 | size = 0.2) + 99 | scale_color_identity() + 100 | ylab("Eigengenvector position") + 101 | xlab("") + 102 | this_theme 103 | 104 | L4_vs_L4_ks_pval <- 4.08e-3 105 | 106 | L4_vs_L4_sampled <- L4_vs_L4 %>% 107 | filter(sample_id %in% rownames(df.list[["58"]])) 108 | 109 | L4_vs_L4_bars <- ggplot(L4_vs_L4_sampled, aes(prob)) + 110 | geom_histogram(aes(y=..count../sum(..count..)), 111 | breaks = seq(0, 1 , by = 0.05), 112 | binwidth = 0.5, 113 | color = NA, 114 | size = 0.25) + 115 | geom_hline(yintercept = 0.05, 116 | size = 0.5) + 117 | ylim(0, 0.3) + 118 | ylab("Fraction of Cells") + 119 | scale_x_continuous("Assignment probability", 120 | limits = c(-0.05, 1.05), 121 | expand = c(0,0)) + 122 | this_theme 123 | 124 | all_plots <- plot_grid(L4_vs_L4_points, 125 | L4_vs_Hsd11b1_points, 126 | L4_vs_Batf3_points, 127 | L4_vs_L4_bars, 128 | L4_vs_Hsd11b1_bars, 129 | L4_vs_Batf3_bars, 130 | align = "v", 131 | nrow = 2, 132 | rel_widths = 1, 133 | rel_heights = c(1, 0.5)) 134 | 135 | save_plot("gradient_panels.pdf", 136 | all_plots, 137 | ncol = 3, 138 | nrow = 2, 139 | base_width = 3.5/3, 140 | base_height = 1, 141 | useDingbats = F) 142 | -------------------------------------------------------------------------------- /Figures/Figure_6/custom_annotate_cat.R: -------------------------------------------------------------------------------- 1 | annotate_cat <- function(df, 2 | col = NULL, base = NULL, 3 | sort_label = T, na_val = "ZZ_Missing", 4 | colorset = "varibow", color_order = "sort") { 5 | 6 | library(dplyr) 7 | library(viridis) 8 | 9 | if(class(try(is.character(col), silent = T)) == "try-error") { 10 | col <- lazyeval::expr_text(col) 11 | } else if(class(col) == "NULL") { 12 | stop("Specify a column (col) to annotate.") 13 | } 14 | 15 | if(class(try(is.character(base), silent = T)) == "try-error") { 16 | base <- lazyeval::expr_text(base) 17 | } else if(class(base) == "NULL") { 18 | base <- col 19 | } 20 | 21 | if(!is.character(df[[col]])) { 22 | df[[col]] <- as.character(df[[col]]) 23 | } 24 | 25 | df[[col]][is.na(df[[col]])] <- na_val 26 | 27 | x <- df[[col]] 28 | 29 | annotations <- data.frame(label = unique(x), stringsAsFactors = F) 30 | 31 | if(sort_label == T) { 32 | annotations <- annotations %>% arrange(label) 33 | } 34 | 35 | annotations <- annotations %>% 36 | mutate(id = 1:n()) 37 | 38 | if(colorset == "varibow") { 39 | colors <- varibow(nrow(annotations)) 40 | } else if(colorset == "rainbow") { 41 | colors <- sub("FF$","",rainbow(nrow(annotations))) 42 | } else if(colorset == "viridis") { 43 | colors <- sub("FF$","",viridis(nrow(annotations))) 44 | } else if(colorset == "magma") { 45 | colors <- sub("FF$","",magma(nrow(annotations))) 46 | } else if(colorset == "inferno") { 47 | colors <- sub("FF$","",inferno(nrow(annotations))) 48 | } else if(colorset == "plasma") { 49 | colors <- sub("FF$","",plasma(nrow(annotations))) 50 | } else if(colorset == "terrain") { 51 | colors <- sub("FF$","",terrain.colors(nrow(annotations))) 52 | } else if(is.character(colorset)) { 53 | colors <- colorRampPalette(colorset)(nrow(annotations)) 54 | } 55 | 56 | if(color_order == "random") { 57 | 58 | colors <- sample(colors, length(colors)) 59 | 60 | } 61 | 62 | annotations <- mutate(annotations, color = colors) 63 | 64 | names(annotations) <- paste0(base, c("_label","_id","_color")) 65 | 66 | names(df)[names(df) == col] <- paste0(base,"_label") 67 | 68 | df <- left_join(df, annotations, by = paste0(base, "_label")) 69 | 70 | df 71 | } 72 | 73 | color_mean <- function(x) { 74 | library(grDevices) 75 | 76 | rgb_x <- col2rgb(x) 77 | rgb_mean <- rowMeans(rgb_x) 78 | new_hex <- rgb(rgb_mean["red"]/255, 79 | rgb_mean["green"]/255, 80 | rgb_mean["blue"]/255) 81 | 82 | new_hex 83 | 84 | } -------------------------------------------------------------------------------- /Figures/Figure_6/gradient.df.list.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/gradient.df.list.rda -------------------------------------------------------------------------------- /Figures/Figure_6/l4_gradient_heatmap.R: -------------------------------------------------------------------------------- 1 | library(feather) 2 | library(ggplot2) 3 | library(reshape2) 4 | library(scrattch.vis) 5 | library(viridisLite) 6 | options(stringsAsFactors = F) 7 | 8 | load("L4.df.rda") 9 | 10 | genes <- c("Shisa3","Tmem215","Gpr88","Slc35g2","Rab3b","Scnn1a","Thsd7a","Nrsn2","Endou","Ccdc3", 11 | "Col8a1","Rassf2","Nog","Fmn1","Phactr2","Cnn3","Rreb1","Tshz2", 12 | "9530059O14Rik","Kitl","Cnr1","Pde1a","LOC105245781","Clec18a","St6galnac5","Galnt14","Doc2a","Xkr6","Lmo3","Stac2","Adam33","Chrm3","Ctxn3") 13 | 14 | gene_data <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/data.feather", 15 | columns = c("sample_id",genes)) 16 | 17 | l4_data <- L4.df %>% 18 | mutate(sample_id = rownames(.)) %>% 19 | left_join(gene_data) %>% 20 | arrange(prob,eigen) %>% 21 | mutate(xpos = 1:n()) %>% 22 | mutate(color = values_to_colors(prob, colorset = viridis(10))) 23 | 24 | first_instance <- function(x, vals) { 25 | y <- numeric() 26 | for(val in vals) { 27 | y <- c(y, min(which(x >= val))) 28 | } 29 | y 30 | } 31 | 32 | x_breaks <- data.frame(val = seq(0, 1, by = 0.2), 33 | xpos = first_instance(l4_data$prob, seq(0,1, by = 0.2))) 34 | 35 | gene_ypos <- data.frame(gene = genes, 36 | ypos = 1:length(genes)) 37 | 38 | l4_plot_data <- melt(l4_data[,c("xpos",genes)], "xpos") %>% 39 | mutate(color = values_to_colors(log10(value + 1))) %>% 40 | rename(gene = variable) %>% 41 | left_join(gene_ypos) 42 | 43 | l4_heatmap <- ggplot() + 44 | geom_tile(data = l4_plot_data, 45 | aes(x = xpos, 46 | y = ypos, 47 | fill = color)) + 48 | geom_tile(data = l4_data, 49 | aes(x = xpos, 50 | y = -1, 51 | fill = color)) + 52 | scale_fill_identity() + 53 | scale_y_continuous("", expand = c(0,0), 54 | breaks = 1:length(genes), 55 | labels = genes) + 56 | scale_x_continuous("", expand = c(0,0), 57 | breaks = x_breaks$xpos, 58 | labels = x_breaks$val) + 59 | theme_bw(4) + 60 | theme(axis.ticks.y = element_blank(), 61 | panel.border = element_blank(), 62 | panel.grid = element_blank()) 63 | 64 | ggsave("l4_heatmap.pdf", 65 | l4_heatmap, 66 | width = 2, 67 | height = 2) 68 | -------------------------------------------------------------------------------- /Figures/Figure_6/sst150_coords.tsv: -------------------------------------------------------------------------------- 1 | cl sst150_cl sst150_cluster_id sst150_cluster_label sst150_cluster_color const_x const_y 2 | 31 1 30 Sst Chodl #FFFF00 1.25 8 3 | 36 2 31 Sst Mme Fam114a1 #FFBB33 5 4.5 4 | 35 3 32 Sst Tac1 Htr1d #804811 5 2.5 5 | 32 4 33 Sst Tac1 Tacr3 #B06411 5 1.5 6 | 38 5 34 Sst Calb2 Necab1 #BF480D 3 1.5 7 | 37 6 35 Sst Calb2 Pdlim5 #CC6D3D 1 3.75 8 | 33 7 36 Sst Nr2f2 Necab1 #FFDF11 1 4.5 9 | 34 8 37 Sst Myh8 Etv1 #D6C300 1 6 10 | 41 9 38 Sst Myh8 Fibin #FF8011 2 7.5 11 | 42 10 39 Sst Chrna2 Glra3 #FF9F2C 1 7 12 | 43 11 40 Sst Chrna2 Ptgdr #FFB307 2 6 13 | 45 12 41 Sst Tac2 Myh4 #D9C566 3 6.75 14 | 39 13 42 Sst Hpse Sema3c #BF9F00 2 4.5 15 | 40 14 43 Sst Hpse Cbln4 #806B19 3 3.75 16 | 47 15 44 Sst Crhr2 Efemp1 #B95541 4 4.5 17 | 48 16 45 Sst Crh 4930553C11Rik #C77767 5 5.25 18 | 46 17 46 Sst Esm1 #C11331 5 6.75 19 | 44 18 47 Sst Tac2 Tacstd2 #BF8219 3 5 20 | 50 19 48 Sst Rxfp1 Eya1 #994C00 4 5 21 | 49 20 49 Sst Rxfp1 Prdm8 #802600 4 6 22 | 51 21 50 Sst Nts #A81111 4.5 7.5 23 | -------------------------------------------------------------------------------- /Figures/Figure_6/sst300_cl_anno.csv: -------------------------------------------------------------------------------- 1 | "","ref.cl","ref.cl","cluster_id","cluster_color","zy_cluster_label","cluster_label","class_id","class_label","class_color","subclass_id","subclass_label","subclass_color","size","ALM","VISp","gene.counts","class_region_label","region_label","subclass_region_label","specific.markers","within.class.markers","within.class.node.markers","sibling.markers","cl","Notes","X","X.1","sst300_cl" 2 | "1","31",27,30,"#FFFF00","Sst Chodl","Sst Chodl",1,"GABAergic","#EF4136",4,"Sst","#F15A29",180,0.53,0.47,10182,"GABAergic","","Sst","Sit1,Chodl,Krt18,P2rx2","Chodl,Bmp3,Ntn1,Gpr126",NA,NA,31,"","","","1" 3 | "2","35",30,32,"#804811","Sst Tac1","Sst Tac1 Htr1d",1,"GABAergic","#EF4136",4,"Sst","#F15A29",289,0.47,0.53,9278,"GABAergic","","Sst",NA,"Tac1,Htr1d,Edn1,Mmd2","Tac1,Thbs2,Necab1,Fstl4","Hsdl2,Chrna4,Zfp536,Plxdc2",35,"","","","2" 4 | "3","36",31,31,"#FFBB33","Sst Etv1 Cdk15","Sst Mme Fam114a1",1,"GABAergic","#EF4136",4,"Sst","#F15A29",58,0.37,0.63,10218,"GABAergic","","Sst","Cryaa,Cdk15","Cdk15,Sox5,Fgf18,Akr1c18","Necab1,Ajap1","Adora1,Hsdl2,Zfp536,Plxdc2",36,"Need second opinion","","","3" 5 | "4","37",32,35,"#BF480D","Sst Calb2 Slc9a2_1","Sst Calb2 Pdlim5",1,"GABAergic","#EF4136",4,"Sst","#F15A29",268,0.5,0.5,9935,"GABAergic","","Sst",NA,"Slc9a2,Pcsk5,Daam2,Calb2","Daam2,Pcsk5,Calb2,Cbln4","Pcsk5,Ptprt",37,"","","","4" 6 | "5","33",29,36,"#FFDF11","Sst Etv1 Nr2f2_1","Sst Nr2f2 Necab1",1,"GABAergic","#EF4136",4,"Sst","#F15A29",101,0.35,0.65,10110,"GABAergic","","Sst","Rassf10,Pik3cg,Kl,Mstn","Pik3cg,Kl,LOC102633724,Mstn","Necab1,Fstl4,Ajap1","Adora1,Hsdl2",33,"","","","5" 7 | "6","39",33,42,"#BF9F00","Sst Hpse Sema3c","Sst Hpse Sema3c",1,"GABAergic","#EF4136",4,"Sst","#F15A29",268,0.15,0.85,8621,"GABAergic","","Sst","Prdm1,C1qtnf7,2610028E06Rik,Hpse","2610028E06Rik,Prdm1,Hpse,C1qtnf7","Hpse,Prdm1,Ackr3","Pnoc,C1qtnf7",39,"","","","6" 8 | "7","41",35,38,"#FF8011","Sst Myh8 Fibin","Sst Myh8 Fibin",1,"GABAergic","#EF4136",4,"Sst","#F15A29",82,0.2,0.8,8319,"GABAergic","","Sst",NA,"LOC105245003,Fibin,Hrh1,Igfbp5","Myh8,Kit,Ppapdc1a,Il1rapl2","Fam46a,Hs6st2",41,"","","","7" 9 | "8","42",36,39,"#FF9F2C","Sst Chrna2 Fam46a","Sst Chrna2 Kit",1,"GABAergic","#EF4136",4,"Sst","#F15A29",654,0.56,0.44,8738,"GABAergic","","Sst","Chrna2,Myh8,Myh4,Myh13","Chrna2,Myh13,Ppapdc1a,Myh8","Myh8,Kit,Ppapdc1a,Chrna2","Fam46a,Hs6st2,Me3,Elmo1",42,"Need second opinion","","","8" 10 | "9","43",37,40,"#FFB307","Sst Myh8 Ptgdr","Sst Myh13 Ptgdr",1,"GABAergic","#EF4136",4,"Sst","#F15A29",213,0.34,0.66,8950,"GABAergic","","Sst","LOC101056159,Myh4,Ptgdr,Myh13","LOC101056159,Efemp1,Ptgdr,Gulp1","Efemp1,Ptgdr","Sema3c,Efemp1",43,"","","","9" 11 | "10","45",39,41,"#D9C566","Sst Tac2 Myh4","Sst Tac2 Myh4",1,"GABAergic","#EF4136",4,"Sst","#F15A29",39,0.7,0.3,9688,"GABAergic","","Sst","Crygs","Fam212b,Drd2,Bmp2,Tac2",NA,"Htr1a,Eya4",45,"","","","10" 12 | "11","44",38,47,"#BF8219","Sst Tac2 Slc24a4","Sst Tac2 Tacstd2",1,"GABAergic","#EF4136",4,"Sst","#F15A29",123,0.3,0.7,9640,"GABAergic","","Sst","Tacstd2","Slc24a4,Tac2,Ndnf,Lmo1","Tacstd2","Sema3c,Tmem196",44,"","","","11" 13 | "12","46",40,46,"#C11331","Sst Crhr2 Nr2f2","Sst Esm1",1,"GABAergic","#EF4136",4,"Sst","#F15A29",152,0.47,0.53,10370,"GABAergic","","Sst","Crygs,Crhr2,Esm1","Esm1,Grm8,Crhr2,Stat4","Grm8,Nmbr,Crhr2,3110035E14Rik","Man1a,Crhr2",46,"","","","12" 14 | "13","47",41,44,"#B95541","Sst Crhr2 Twist2_1","Sst Crhr2 Efemp1",1,"GABAergic","#EF4136",4,"Sst","#F15A29",227,0.35,0.65,10052,"GABAergic","","Sst","Crygs,Hpse","Twist2,Agtr2,Cckbr,Adra1d","Grm8,Nmbr,Crhr2,3110035E14Rik","Man1a,Crhr2,Cdh7,Zfp536",47,"","","","13" 15 | "14","49",42,49,"#802600","Sst Rxfp1 Prdm8","Sst Rxfp1 Prdm8",1,"GABAergic","#EF4136",4,"Sst","#F15A29",183,0.13,0.87,9828,"GABAergic","","Sst","Tacstd2,Pard3b,Th","Prdm8,Rxfp1,Pla2g4a,Chrnb3","Rxfp1,Ptgs2,Tacstd2,C1ql3","Rxfp1,Ptprk,Zmat4,Itm2a",49,"","","","14" 16 | "15","51",44,50,"#A81111","Sst Nts","Sst Nts",1,"GABAergic","#EF4136",4,"Sst","#F15A29",43,0.17,0.83,9383,"GABAergic","","Sst","Nts,Th,Tacstd2,Gabrq","Nts,Th,LOC105245834,Diap3",NA,NA,51,"","","","15" 17 | -------------------------------------------------------------------------------- /Figures/Figure_6/sst300_coords.tsv: -------------------------------------------------------------------------------- 1 | sst300_cl sst300_cluster_id sst300_cluster_label sst300_cluster_color const_x const_y 2 | 1 1 Sst Chodl #FFFF00 1.25 8 3 | 2 2 Sst Tac1 Htr1d #804811 5 2.5 4 | 3 3 Sst Mme Fam114a1 #FFBB33 5 4.5 5 | 4 4 Sst Calb2 Pdlim5 #BF480D 1 3.75 6 | 5 5 Sst Nr2f2 Necab1 #FFDF11 1 4.5 7 | 6 6 Sst Hpse Sema3c #BF9F00 2 4.5 8 | 7 7 Sst Myh8 Fibin #FF8011 2 7.5 9 | 8 8 Sst Chrna2 Glra3 #FF9F2C 1 7 10 | 9 9 Sst Chrna2 Ptgdr #FFB307 2 6 11 | 10 10 Sst Tac2 Myh4 #D9C566 3 6.75 12 | 11 11 Sst Tac2 Tacstd2 #BF8219 3 5 13 | 12 12 Sst Esm1 #C11331 5 6.75 14 | 13 13 Sst Crhr2 Efemp1 #B95541 4 4.5 15 | 14 14 Sst Rxfp1 Prdm8 #802600 4 6 16 | 15 15 Sst Nts #A81111 4.5 7.5 17 | -------------------------------------------------------------------------------- /Figures/Figure_6/sst80_coords.tsv: -------------------------------------------------------------------------------- 1 | sst80_cl sst80_cluster_id sst80_cluster_label sst80_cluster_color const_x const_y 2 | 1 1 Sst Chodl_1 #FFFF00 1.25 8 3 | 2 2 Sst Chodl_2 #FFFF20 1.75 8 4 | 3 3 Sst Tac1 Tacr3 #B06411 5 1.5 5 | 4 4 Sst Tac1 Htr1d #804811 5 2.5 6 | 5 5 Sst Mme Fam114a1 #FFBB33 5 4.5 7 | 6 6 Sst Calb2 Pdlim5_1 #BF480D 1 2.5 8 | 7 7 Sst Calb2 Pdlim5_2 #BF482D 1.5 3.75 9 | 8 8 Sst Calb2 Pdlim5_3 #BF684D 1.5 3.25 10 | 9 9 Sst Calb2 Pdlim5_4 #BF680D 1 3.25 11 | 10 10 Sst Calb2 Necab1 #BF282D 3 1.5 12 | 11 11 Sst Nr2f2 Necab1 #FFDF11 1 4.5 13 | 12 12 Sst Myh8 Etv1 #FFDF31 1 6 14 | 13 13 Sst Hpse Sema3c_1 #BF9F00 2 4.75 15 | 14 14 Sst Hpse Sema3c_2 #BF9F40 2 4.25 16 | 15 15 Sst Hpse Sema3c_3 #BF9F60 2.5 5.5 17 | 16 16 Sst Hpse Cbln4 #806B19 3 3.75 18 | 17 17 Sst Myh8 Fibin #FF8031 2 7.5 19 | 18 18 Sst Chrna2 Glra3_1 #FF9F2C 0.5 7 20 | 19 19 Sst Chrna2 Glra3_2 #FF9F4C 1 6.75 21 | 20 20 Sst Chrna2 Glra3_3 #FF9F6C 1 7.5 22 | 21 21 Sst Chrna2 Ptgdr #FFB307 2 6 23 | 22 22 Sst Tac2 Myh4 #D9C566 3 6.75 24 | 23 23 Sst Tac2 Tacstd2 #BF8219 3 5 25 | 24 24 Sst Esm1 #C11331 5 6.75 26 | 25 25 Sst Crh 4930553C11Rik_1 #B95541 4.5 5.5 27 | 26 26 Sst Crh 4930553C11Rik_2 #B95561 5 5 28 | 27 27 Sst Crhr2 Efemp1 #B95581 4 4.5 29 | 28 28 Sst Rxfp1 Prdm8 #802600 4 6 30 | 29 29 Sst Rxfp1 Eya1 #994C20 4 5 31 | 30 30 Sst Nts #A81111 4.5 7.5 32 | -------------------------------------------------------------------------------- /Figures/Figure_6/sst_constellation_positions.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AllenInstitute/tasic2018analysis/ae7857c520c675ca560b57ce152689df7bb21829/Figures/Figure_6/sst_constellation_positions.xlsx -------------------------------------------------------------------------------- /Figures/Figure_6/sst_constellations.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | library(dplyr) 3 | source("plotting_functions.R") 4 | 5 | 6 | 7 | cell.cl.map.df_to_transition.df.comb <- function(cell.cl.map.df, cl.df) { 8 | transition.df = with(cell.cl.map.df, as.data.frame(table(org.cl, transition.cl))) 9 | transition.df = transition.df[transition.df$Freq > 0,] 10 | transition.df$org.cl = as.character(transition.df$org.cl) 11 | transition.df$transition.cl = as.character(transition.df$transition.cl) 12 | 13 | ###combine transitions from both directions 14 | transition.df$cl.min = pmin(transition.df$org.cl, transition.df$transition.cl) 15 | transition.df$cl.max = pmax(transition.df$org.cl, transition.df$transition.cl) 16 | transition.df$cl.pair = paste(transition.df$cl.min, transition.df$cl.max) 17 | transition.df.comb= do.call("rbind",tapply(1:nrow(transition.df),transition.df$cl.pair, function(x){ 18 | tmp = transition.df[x,][1,] 19 | tmp$Freq = sum(transition.df[x,"Freq"]) 20 | tmp[,c(4,5,3)] 21 | })) 22 | cl.size = table(cell.cl.map.df$org.cl) 23 | transition.df.comb$cl.min.size = cl.size[transition.df.comb$cl.min] 24 | transition.df.comb$cl.max.size = cl.size[transition.df.comb$cl.max] 25 | transition.df.comb$ratio = with(transition.df.comb,Freq/pmin(cl.min.size,cl.max.size)) 26 | transition.df.comb$cl1_label = cl.df[as.character(transition.df.comb$cl.min),"cluster_label"] 27 | transition.df.comb$cl2_label = cl.df[as.character(transition.df.comb$cl.max),"cluster_label"] 28 | colnames(transition.df.comb)[c(1:3,6)] = c("cl.x","cl.y","trans_n","trans_ratio") 29 | 30 | transition.df.comb 31 | } 32 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/Sst.cl.80.rda") 33 | cl.df$sst80_cl <- rownames(cl.df) 34 | 35 | write.csv(cl.df,"sst80_cl_anno.csv") 36 | 37 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/Sst.cl.300.rda") 38 | cl.df$sst300_cl <- rownames(cl.df) 39 | 40 | write.csv(cl.df,"sst300_cl_anno.csv") 41 | 42 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/Sst.cell.cl.map.80.rda") 43 | samples <- rownames(cell.cl.map.df) 44 | sst80_df <- cell.cl.map.df %>% 45 | mutate(org.cl = as.numeric(org.cl)) 46 | 47 | sst80_core_int <- sst80_df %>% 48 | group_by(org.cl) %>% 49 | summarise(n_core = sum(best.cl == org.cl)) 50 | names(sst80_core_int)[1] <- "cl" 51 | 52 | sst80_cl_anno <- read.delim("sst80_coords.tsv", header = TRUE, sep = "\t") 53 | rownames(sst80_cl_anno) <- sst80_cl_anno$sst80_cl 54 | names(sst80_cl_anno)[1:4] <- c("cl","cluster_id","cluster_label","cluster_color") 55 | 56 | sst80_cl_anno <- sst80_cl_anno %>% 57 | left_join(sst80_core_int) 58 | 59 | sst80_transitions <- cell.cl.map.df_to_transition.df.comb(sst80_df, sst80_cl_anno) %>% 60 | mutate(cl.x = as.numeric(cl.x), 61 | cl.y = as.numeric(cl.y)) %>% 62 | filter(trans_ratio > 0.05) %>% 63 | select(cl.x, cl.y, trans_n) 64 | 65 | sst80_interm <- net_coord_plot(sst80_cl_anno, sst80_transitions, sst80_cl_anno, cluster_ids = 1:30, split = F, 66 | "trans_n", val_min = 0, link_color = "dodgerblue") + 67 | scale_y_reverse() 68 | 69 | ggsave("sst80_constellation.pdf", 70 | sst80_interm, 71 | width = 4, height = 3, 72 | useDingbats = F) 73 | 74 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/Sst.cell.cl.map.300.rda") 75 | sst300_df <- cell.cl.map.df %>% 76 | mutate(org.cl = as.numeric(org.cl)) 77 | 78 | sst300_core_int <- sst300_df %>% 79 | group_by(org.cl) %>% 80 | summarise(n_core = sum(best.cl == org.cl)) 81 | names(sst300_core_int)[1] <- "cl" 82 | 83 | sst300_cl_anno <- read.delim("sst300_coords.tsv", header = TRUE, sep = "\t") 84 | rownames(sst300_cl_anno) <- sst300_cl_anno$sst300_cl 85 | names(sst300_cl_anno)[1:4] <- c("cl","cluster_id","cluster_label","cluster_color") 86 | 87 | sst300_cl_anno <- sst300_cl_anno %>% 88 | left_join(sst300_core_int) 89 | 90 | sst300_transitions <- cell.cl.map.df_to_transition.df.comb(sst300_df, sst300_cl_anno) %>% 91 | mutate(cl.x = as.numeric(cl.x), 92 | cl.y = as.numeric(cl.y)) %>% 93 | filter(trans_ratio > 0.05) 94 | 95 | sst300_interm <- net_coord_plot(sst300_cl_anno, sst300_transitions, sst300_cl_anno, cluster_ids = 1:13, split = F, 96 | "trans_n", val_min = 0, link_color = "dodgerblue") + 97 | scale_y_reverse() 98 | 99 | ggsave("sst300_constellation.pdf", 100 | sst300_interm, 101 | width = 4, height = 3, 102 | useDingbats = F) 103 | 104 | 105 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/cell.cl.map.df.rda") 106 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 107 | sample_to_cl <- anno %>% select(sample_id, cl) 108 | 109 | sst150_df <- cell.cl.map.df[samples,] %>% 110 | mutate(sample_id = rownames(.)) %>% 111 | mutate(org.cl = as.numeric(as.character(org.cl))) 112 | 113 | sst150_core_int <- sst150_df %>% 114 | group_by(org.cl) %>% 115 | summarise(n_core = sum(best.cl == org.cl)) 116 | 117 | sst150_cl_anno <- read.delim("sst150_coords.tsv", header = TRUE, sep = "\t") 118 | rownames(sst150_cl_anno) <- sst150_cl_anno$sst150_cl 119 | names(sst150_cl_anno)[1:5] <- c("cl","sst150_cl","cluster_id","cluster_label","cluster_color") 120 | 121 | sst150_cl_anno <- sst150_cl_anno %>% 122 | left_join(sst150_core_int, by = c("cl"="org.cl")) 123 | 124 | sst150_transitions <- cell.cl.map.df_to_transition.df.comb(sst150_df, sst150_cl_anno) %>% 125 | mutate(cl.x = as.numeric(cl.x), 126 | cl.y = as.numeric(cl.y)) %>% 127 | filter(trans_ratio > 0.05) 128 | 129 | sst150_interm <- net_coord_plot(sst150_cl_anno, sst150_transitions, sst150_cl_anno, cluster_ids = 30:50, split = F, 130 | "trans_n", val_min = 0, link_color = "dodgerblue") + 131 | scale_y_reverse() 132 | 133 | ggsave("sst150_constellation.pdf", 134 | sst150_interm, 135 | width = 4, height = 3, 136 | useDingbats = F) 137 | -------------------------------------------------------------------------------- /Figures/Figure_6/sst_river_plot.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | library(dplyr) 3 | library(scrattch.io) 4 | library(feather) 5 | options(stringsAsFactors = F) 6 | 7 | source("custom_annotate_cat.R") 8 | source("sankey_functions.R") 9 | 10 | anno <- read_feather("//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 11 | 12 | sst_cluster_anno <- anno %>% 13 | filter(subclass_label == "Sst") %>% 14 | select(cl, cluster_id, cluster_label, cluster_color) %>% 15 | unique() %>% 16 | arrange(cluster_id) %>% 17 | mutate(cluster_id = 1:n()) 18 | 19 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/Sst.cell.cl.map.80.rda") 20 | sst80 <- cell.cl.map.df %>% 21 | mutate(sample_id = rownames(.)) 22 | 23 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/Sst.cell.cl.map.300.rda") 24 | sst300 <- cell.cl.map.df %>% 25 | mutate(sample_id = rownames(.)) 26 | 27 | load("//allen/programs/celltypes/workgroups/rnaseqanalysis/V1_ALM/process_new/cell.cl.map.df.rda") 28 | sst150 <- cell.cl.map.df[sst80$sample_id,] %>% 29 | mutate(sample_id = rownames(.)) %>% 30 | mutate(org.cl = as.numeric(as.character(org.cl))) %>% 31 | left_join(sst_cluster_anno, by = c("org.cl" = "cl")) 32 | 33 | sst80_cl <- sst80 %>% 34 | select(sample_id, org.cl) 35 | names(sst80_cl)[2] <- "sst80_cl" 36 | sst150_cl <- sst150 %>% 37 | select(sample_id, org.cl, cluster_id, cluster_label, cluster_color) 38 | names(sst150_cl)[2] <- "sst150_cl" 39 | sst300_cl <- sst300 %>% 40 | select(sample_id, org.cl) 41 | names(sst300_cl)[2] <- "sst300_cl" 42 | 43 | 44 | river_data <- sst80_cl %>% 45 | left_join(sst150_cl) %>% 46 | left_join(sst300_cl) 47 | 48 | # Filter transitions of fewer than 3 cells 49 | river_data2 <- river_data %>% 50 | group_by(sst80_cl, sst150_cl) %>% 51 | mutate(n_80_150 = n()) %>% 52 | group_by(sst150_cl, sst300_cl) %>% 53 | mutate(n_150_300 = n()) %>% 54 | ungroup() %>% 55 | filter(n_80_150 > 2 & n_150_300 > 2) 56 | 57 | max_freq_name <- function(x) { 58 | names(table(x))[which(table(x) == max(table(x)))] 59 | } 60 | 61 | # Ordering: 62 | # First, order 300 based on clusters 63 | sst300_cl_anno <- river_data2 %>% 64 | group_by(sst300_cl) %>% 65 | summarise(sst300_cluster_weight = mean(cluster_id), 66 | sst300_cluster_color = color_mean(cluster_color), 67 | sst300_cluster_label = max_freq_name(cluster_label)) %>% 68 | arrange(sst300_cluster_weight) %>% 69 | mutate(sst300_cluster_id = 1:n()) 70 | 71 | river_data2 <- river_data2 %>% 72 | left_join(sst300_cl_anno) 73 | 74 | # Then, order standard clustering based on 300 75 | std_cl_anno <- river_data2 %>% 76 | group_by(cluster_id) %>% 77 | summarise(std_cluster_weight = mean(sst300_cluster_id), 78 | std_cluster_color = color_mean(sst300_cluster_color), 79 | std_cluster_label = cluster_label[1]) %>% 80 | arrange(std_cluster_weight) %>% 81 | mutate(std_cluster_id = 1:n()) 82 | 83 | river_data2 <- river_data2 %>% 84 | left_join(std_cl_anno) 85 | 86 | sst80_cl_anno <- river_data2 %>% 87 | group_by(sst80_cl) %>% 88 | summarise(sst80_cluster_weight = mean(std_cluster_id), 89 | sst80_cluster_color = color_mean(std_cluster_color), 90 | sst80_cluster_label = max_freq_name(std_cluster_label)) %>% 91 | arrange(sst80_cluster_weight) %>% 92 | mutate(sst80_cluster_id = 1:n()) 93 | 94 | river_data2 <- river_data2 %>% 95 | left_join(sst80_cl_anno) 96 | 97 | plot_nodes <- make_plot_nodes(make_group_nodes(river_data2, c("sst80_cluster","std_cluster","sst300_cluster")),pad = 0.2) 98 | 99 | group_links <- make_group_links(river_data2, 100 | c("sst80_cluster","std_cluster","sst300_cluster"), 101 | plot_nodes) %>% 102 | filter(n > 4) 103 | 104 | plot_links <- make_plot_links(group_links, 105 | fill = "std_cluster") 106 | 107 | sst80_labels <- plot_nodes %>% 108 | filter(group == "sst80_cluster") %>% 109 | mutate(x = 1, 110 | y = (ymin + ymax) / 2) 111 | std_labels <- plot_nodes %>% 112 | filter(group == "std_cluster") %>% 113 | mutate(x = 2, 114 | y = (ymin + ymax) / 2) 115 | sst300_labels <- plot_nodes %>% 116 | filter(group == "sst300_cluster") %>% 117 | mutate(x = 3, 118 | y = (ymin + ymax) / 2) 119 | 120 | river_plot <- build_river_plot_predefined(plot_nodes, plot_links) + 121 | geom_text(data = sst80_labels, 122 | aes(x = x, y = y, 123 | label = name), 124 | size = 2) + 125 | geom_text(data = std_labels, 126 | aes(x = x, y = y, 127 | label = name), 128 | size = 2) + 129 | geom_text(data = sst300_labels, 130 | aes(x = x, y = y, 131 | label = name), 132 | size = 2) 133 | 134 | ggsave("sst_river_plot.pdf", 135 | river_plot, 136 | width = 6, 137 | height = 3.5) 138 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Allen Institute Software License - This software license is the 2-clause BSD license 2 | plus a third clause that prohibits redistribution for commercial purposes without further permission. 3 | 4 | Copyright (c) 2018. Allen Institute. All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 7 | following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the 10 | following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the 13 | following disclaimer in the documentation and/or other materials provided with the distribution. 14 | 15 | 3. Redistributions for commercial purposes are not permitted without the Allen Institute's written permission. 16 | For purposes of this license, commercial purposes is the incorporation of the Allen Institute's software into 17 | anything for which you will charge fees or other compensation. Contact terms@alleninstitute.org for commercial 18 | licensing opportunities. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 21 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 25 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 26 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tasic2018analysis 2 | 3 | Scripts used for Data processing and visualization for the paper Bosiljka Tasic, *et al.*, "Shared and distinct transcriptomic cell types across neocortical areas." (2018) Nature. 4 | 5 | These scripts were not written with portability in mind, and frequently refer to local file structures. They are provided as-is as a record of the analytical processes used in the paper. 6 | 7 | To perform similar analyses on your own data, we recommend utilizing the R packages built around this analysis, available at https://github.com/AllenInstitute/scrattch/ . 8 | 9 | ## Citation 10 | 11 | To cite use of these scripts or parts thereof, please use: 12 | Tasic, et al. "Shared and distinct transcriptomic cell types across neocortical areas." (2018) Nature. 13 | 14 | ## License 15 | 16 | The license for use of these scripts is available on Github at: https://github.com/AllenInstitute/tasic2018analysis/blob/master/LICENSE 17 | 18 | ## Statement of Support 19 | 20 | We are not currently supporting this code, but simply releasing it to the community AS IS but are not able to provide any guarantees of support. The community is welcome to submit issues, but you should not expect an active response. 21 | -------------------------------------------------------------------------------- /RNA-seq Analysis/ALM.ex.analysis.R: -------------------------------------------------------------------------------- 1 | ALM.ex.cl = droplevels(ex.cl[samp.dat[names(ex.cl),"Region"]=="ALM"]) 2 | ALM.ex.cl = ALM.ex.cl[cl.df[as.character(ALM.ex.cl),"region_label"]!="VISp"] 3 | ALM.ex.markers = select_markers(norm.dat, ALM.ex.cl, n.markers=50, de.genes=de.genes)$markers 4 | ALM.ex.tsne.result <- plot_tSNE_cl(norm.dat, ALM.ex.markers, ALM.ex.cl, cl.df, theta=0.05) 5 | ALM.ex.tsne.df = ALM.ex.tsne.result$tsne.df 6 | save(ALM.ex.tsne.df, file="ALM.ex.tsne.df.rda") 7 | ggsave("ALM.ex.tsne.pdf", ALM.ex.tsne.result$g, height=10, width=10) 8 | save(ALM.ex.cl, file="ALM.ex.cl.rda") 9 | save(ALM.ex.markers, file="ALM.ex.markers.rda") 10 | ALM.ex.cell.cl.map.df = get_core_transition(norm.dat, ALM.ex.cl, ALM.ex.markers, n.bin=5, n.iter=100, mc.cores=10) 11 | save(ALM.ex.cell.cl.map.df, file="ALM.ex.cell.cl.map.df.rda") 12 | 13 | de.param$q1.th=0.4 14 | ALM.ex.PT.cl = droplevels(ALM.ex.cl[ALM.ex.cl %in% row.names(cl.df)[cl.df$subclass_label=="L5 PT"]]) 15 | 16 | 17 | 18 | 19 | ALM.ex.cl.present.counts = get_cl_sums(norm.dat>0, droplevels(ALM.ex.cl)) 20 | cl.present.counts = get_cl_sums(norm.dat>0, cl.clean) 21 | 22 | df = within_group_specific_markers(levels(ALM.ex.PT.cl), norm.dat, ALM.ex.PT.cl, de.param = de.param, cl.present.counts=ALM.ex.cl.present.counts) 23 | 24 | 25 | PT = levels(ALM.ex.PT.cl) 26 | all = levels(ALM.ex.cl) 27 | pairs <- list(c1=list(95, PT), c2=list(96:97, PT), c3=list(PT, all), c4=list(95, all), c5=list(96:97,all), c6=list(96,96:97), c7=list(96, all), c8=list(97, 96:97),c9=list(97, all)) 28 | 29 | de.param$q1.th=0.4 30 | ALM.PT.gene.list= sapply(pairs, function(x){ 31 | print(x) 32 | df=group_specific_markers(as.character(x[[1]]), norm.dat, droplevels(cl.clean[cl.clean%in% as.character(x[[2]])]), de.param=de.param, n.markers=10, cl.present.counts = cl.present.counts) 33 | df=df[order(df$pval),] 34 | head(df,20) 35 | },simplify=F) 36 | save(ALM.PT.gene.list, file="ALM.PT.gene.list.top.20.rda") 37 | PT.pairs = read.csv("PT_DE_pairs",row.names=1,header=FALSE) 38 | 39 | names(ALM.PT.gene.list) = PT.pairs[[1]] 40 | 41 | ALM.PT.gene.df = do.call("rbind", ALM.PT.gene.list) 42 | ALM.PT.gene.df$pair = gsub("\\..*","",row.names(ALM.PT.gene.df)) 43 | ALM.PT.gene.df = ALM.PT.gene.df[,c(8,1:7)] 44 | row.names(ALM.PT.gene.df) = NULL 45 | write.csv(ALM.PT.gene.df, file="ALM.PT.gene.df.csv") 46 | 47 | de.param = de_param(q1.th=0.4) 48 | tmp=display_cl(ALM.ex.PT.cl,norm.dat, prefix="ALM.L5.PT", de.param = de.param, n.markers=30) 49 | ALM.ex.PT.markers= tmp$markers 50 | top.genes= unique(unlist(lapply(ALM.PT.gene.list, row.names))) 51 | 52 | setdiff(unique(unlist(lapply(ALM.PT.gene.list, row.names))), ALM.ex.PT.markers) 53 | tmp=display_cl(ALM.ex.PT.cl,norm.dat, prefix="ALM.L5.PT.tmp", de.param = de.param, markers=top.genes) 54 | save(ALM.ex.PT.markers, file="ALM.ex.PT.markers.rda") 55 | 56 | 57 | source("~/zizhen/My_R/map_river_plot.R") 58 | source("~/zizhen/My_R/sankey_functions.R") 59 | 60 | colnames(map.df) = c("map_cluster_id","map_prob","map_cluster_label", "cluster_id","sub_cluster","stim","cor") 61 | map.df$map_cluster_color = cl.df[as.character(map.df$map_cluster_id),"cluster_color"] 62 | 63 | tmp = compare_annotate(setNames(map.df$cluster_id, row.names(map.df)), setNames(map.df$map_cluster_id,row.names(map.df)), cl.df, reorder=FALSE) 64 | tmp.cl.df = tmp$cl.df 65 | map.df$cluster_label = map.df$cluster_id 66 | map.df$cluster_id = as.integer(as.factor(map.df$cluster_label)) 67 | map.df$cluster_color = tmp.cl.df[as.character(map.df$cluster_label),"cluster_color"] 68 | map.df$cluster_color=as.character(map.df$cluster_color) 69 | map.df$map_cluster_color=as.character(map.df$map_cluster_color) 70 | 71 | map.df$maintype = hrvatin.samp.dat$maintype 72 | inh.map.df = droplevels(map.df[which(map.df$maintype=="Interneurons"),]) 73 | g=river_plot(inh.map.df, min.cells=4, min.frac=0.1) 74 | ggsave(g, file="Hrvatin.inh.map.pdf") 75 | 76 | ex.map.df = droplevels(map.df[which(map.df$maintype=="Excitatory" & !map.df$cluster_label %in% c("RSP","Hip","Sub")),]) 77 | g=river_plot(ex.map.df, min.cells=10, min.frac=0.1) 78 | ggsave(g, file="Hrvatin.ex.map.pdf") 79 | 80 | tb=with(ex.map.df, table(map_cluster_label, stim)) 81 | tb.df = as.data.frame(tb) 82 | table(map.df$map_cluster_label) 83 | stim.pval = sapply(names(de.num), function(p){ 84 | x = de.num[[p]] 85 | n <- total.num - m 86 | k <- full.de.num[[p]] 87 | pval = phyper(x - 1, m, n, k, lower.tail = FALSE) 88 | }) 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /RNA-seq Analysis/VISp.ex.analysis.R: -------------------------------------------------------------------------------- 1 | VISp.ex.cl = droplevels(ex.cl[samp.dat[names(ex.cl),"Region"]=="VISp"]) 2 | VISp.ex.cl = VISp.ex.cl[cl.df[as.character(VISp.ex.cl),"region_label"]!="ALM"] 3 | VISp.ex.markers = select_markers(norm.dat, VISp.ex.cl, n.markers=50, de.genes=de.genes)$markers 4 | VISp.ex.tsne.result <- plot_tSNE_cl(norm.dat, VISp.ex.markers, VISp.ex.cl, cl.df, theta=0.05) 5 | VISp.ex.tsne.df = VISp.ex.tsne.result$tsne.df 6 | save(VISp.ex.tsne.df, file="VISp.ex.tsne.df.rda") 7 | ggsave("VISp.ex.tsne.pdf", VISp.ex.tsne.result$g, height=10, width=10) 8 | save(VISp.ex.cl, file="VISp.ex.cl.rda") 9 | save(VISp.ex.markers, file="VISp.ex.markers.rda") 10 | VISp.ex.cell.cl.map.df = get_core_transition(norm.dat, VISp.ex.cl, VISp.ex.markers, n.bin=5, n.iter=100, mc.cores=10) 11 | save(VISp.ex.cell.cl.map.df, file="VISp.ex.cell.cl.map.df.rda") 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /RNA-seq Analysis/all.cell.markers.heatmap.R: -------------------------------------------------------------------------------- 1 | load("cl.final.rda") 2 | library(feather) 3 | anno <- read_feather("/data/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180520/anno.feather") 4 | tmp.cl.df = as.data.frame(unique(anno[,c("cl","dendcluster_id")])) 5 | dend.id = with(tmp.cl.df, setNames(dendcluster_id, cl)) 6 | cl.ord = names(dend.id)[order(dend.id)] 7 | cl.ord = cl.ord[cl.ord %in% levels(cl.clean)] 8 | 9 | 10 | load("de.genes.rda") 11 | load("cl.med.rda") 12 | 13 | load("cl.clean.rda") 14 | cl.clean = setNames(factor(as.character(cl.clean), cl.ord), names(cl.clean)) 15 | 16 | 17 | ####Compute heatmap at single cell level that cover all the clusters. 18 | max.cl.size=50 19 | all.markers= select_markers(norm.dat, cl.clean, de.genes=de.genes, n.markers=20)$markers 20 | core.cells=names(cl)[is.na(cell.cl.map.df[names(cl.clean),"transition.cl"])] 21 | tmp.cells = unlist(tapply(core.cells, cl.clean[core.cells], function(x){ 22 | x= sample(x, min(length(x),max.cl.size)) 23 | },simplify=FALSE)) 24 | tmp.cl = setNames(factor(as.character(cl.clean[tmp.cells]), cl.ord), tmp.cells) 25 | tmp.cl = sort(tmp.cl) 26 | 27 | cl.med = cl.med[,levels(cl.clean)] 28 | all.markers= all.markers[rowMaxs(cl.med[all.markers,]) > 2] 29 | gene.min = apply(cl.med[all.markers,],1, function(x)min(which(x>2))) 30 | gene.sd= apply(cl.med[all.markers,],1, function(x)sd(which(x>2))) 31 | gene.sd[is.na(gene.sd)]=0 32 | 33 | select.markers = all.markers[gene.sd < 20 | gene.sd > 20 & rowSums(cl.med[all.markers,] > 2) > 20] 34 | select = gene.sd[select.markers] < 10 | rowSums(cl.med[select.markers,]>2) < 15 35 | set1 = select.markers[select] 36 | set2 = select.markers[!select] 37 | 38 | 39 | tmp=setNames(rep(6, length(de.genes)),names(de.genes)) 40 | set3=unique(unlist(select_markers_pair(norm.dat[set2,], add.genes=tmp, de.genes=de.genes,rm.genes=setdiff(row.names(norm.dat),set2)))) 41 | set3=set3[hclust(dist(cl.med[set3,]),method="average")$order] 42 | 43 | ord.genes = c(names(sort(gene.min[set1])),set3) 44 | tmp.dat = as.matrix(norm.dat[ord.genes,names(tmp.cl)]) 45 | tmp.dat = tmp.dat/rowMaxs(tmp.dat) 46 | cl.col = as.character(cl.df[as.character(cl[colnames(tmp.dat)]),"cluster_color"]) 47 | 48 | gray.red <-colorRampPalette(c("gray", "red")) 49 | 50 | pdf("all.heatmap.pdf",height=15,width=12) 51 | heatmap.3(tmp.dat,Rowv=NULL, Colv=NULL, ColSideColors=cl.col,trace="none",col=gray.red(100),labRow=FALSE, labCol=FALSE) 52 | dev.off() 53 | 54 | darkblue.orange <-colorRampPalette(c("darkblue", "orange")) 55 | pdf("all.heatmap.2.pdf",height=15,width=12) 56 | heatmap.3(tmp.dat,Rowv=NULL, Colv=NULL, ColSideColors=cl.col,trace="none",col=darkblue.orange(100),labRow=FALSE, labCol=FALSE) 57 | dev.off() 58 | 59 | cell.marker.dat = tmp.dat 60 | save(cell.marker.dat, file="cell.marker.dat.rda") 61 | -------------------------------------------------------------------------------- /RNA-seq Analysis/cl.transition.R: -------------------------------------------------------------------------------- 1 | library(iterclust) 2 | library(matrixStats) 3 | 4 | cl.clean = droplevels(cl[cl %in% row.names(cl.df)[cl.df$class_label!="Low Quality"]]) 5 | 6 | test_cv_cor <- function(norm.dat, cl, markers, n.bin=5,g.perc=1){ 7 | bins=unlist(tapply(names(cl), cl, function(x){ 8 | if(length(x) > n.bin){ 9 | tmp=rep_len(1:n.bin, length(x)) 10 | }else{ 11 | tmp = sample(1:n.bin, length(x)) 12 | } 13 | setNames(tmp[sample(length(tmp))], x) 14 | })) 15 | names(bins) = gsub(".*\\.", "", names(bins)) 16 | bins= bins[names(cl)] 17 | pred.cl = setNames(rep(NA, length(cl)), names(cl)) 18 | for(i in 1:n.bin){ 19 | print(i) 20 | train.cells = names(cl)[bins!=i] 21 | test.cells =names(cl)[bins==i] 22 | select.markers=sample(markers, round(length(markers)*g.perc)) 23 | map.result <- map_by_cor(norm.dat[select.markers,], cl[train.cells], norm.dat[select.markers, test.cells])$pred.df 24 | pred.cl[test.cells] = as.character(map.result[test.cells, "pred.cl"]) 25 | } 26 | return(pred.cl) 27 | } 28 | 29 | 30 | get_core_transition <- function(norm.dat, cl, select.markers, n.bin=5, n.iter=100, mc.cores=10) 31 | { 32 | cl.cv <- mclapply(1:n.iter, function(i){ 33 | tmp=test_cv_cor(norm.dat, cl, select.markers, n.bin=n.bin) 34 | }, mc.cores=mc.cores) 35 | 36 | cell.cl.cor.map = do.call("rbind",sapply(cl.cv, function(x){ 37 | df = data.frame(cell=names(x),cl=x) 38 | },simplify=F)) 39 | cell.cl.cor.map = table(cell.cl.cor.map[,1],cell.cl.cor.map[,2]) 40 | cell.cl.cor.map = cell.cl.cor.map / rowSums(cell.cl.cor.map) 41 | 42 | cell.cl.map.df = data.frame(org.cl= as.character(cl[row.names(cell.cl.cor.map)]),best.score=rowMaxs(cell.cl.cor.map), best.cl = colnames(cell.cl.cor.map)[apply(cell.cl.cor.map, 1, which.max)], stringsAsFactors=FALSE) 43 | row.names(cell.cl.map.df) = row.names(cell.cl.cor.map) 44 | tmp=get_pair_matrix_coor(cell.cl.cor.map, row.names(cell.cl.map.df), as.character(cell.cl.map.df$best.cl)) 45 | tmp1 = cell.cl.cor.map 46 | tmp1[tmp]= 0 47 | cell.cl.map.df$second.score = rowMaxs(tmp1) 48 | cell.cl.map.df$second.cl =colnames(tmp1)[apply(tmp1,1, which.max)] 49 | cell.cl.map.df$second.cl[cell.cl.map.df$second.score ==0] = NA 50 | 51 | cell.cl.map.df$transition.cl = NA 52 | tmp = with(cell.cl.map.df, org.cl!=best.cl | best.score < 0.9) 53 | cell.cl.map.df[tmp,"transition.cl"] = as.character(cell.cl.map.df[tmp,"best.cl"]) 54 | tmp = with(cell.cl.map.df, which(org.cl==transition.cl)) 55 | cell.cl.map.df$transition.cl[tmp] = as.character(cell.cl.map.df[tmp,"second.cl"]) 56 | 57 | cl.med <- do.call("cbind",tapply(names(cl), cl, function(x){ 58 | rowMedians(as.matrix(norm.dat[select.markers,x])) 59 | })) 60 | row.names(cl.med) = select.markers 61 | 62 | cell.cl.cor=cor(as.matrix(norm.dat[select.markers, row.names(cell.cl.map.df)]), cl.med[select.markers,]) 63 | cell.cl.map.df$cor = with(cell.cl.map.df, get_pair_matrix(cell.cl.cor, row.names(cell.cl.map.df),as.character(org.cl))) 64 | cell.cl.map.df$core = is.na(cell.cl.map.df$transition.cl) 65 | return(cell.cl.map.df) 66 | } 67 | 68 | load("select.markers.rda") 69 | cell.cl.map.df = get_core_transition(norm.dat, cl.clean, select.markers, n.bin=5, n.iter=100, mc.cores=10) 70 | 71 | corrected.cells = with(cell.cl.map.df, org.cl!=best.cl & best.score > 0.9) 72 | cl.clean.correct = cl.clean 73 | cl.clean.correct[corrected.cells] = cell.cl.map.df[corrected.cells, "best.cl"] 74 | 75 | tmp.cells = intersect(names(cl.clean)[cl.clean=="104"], row.names(samp.dat)[samp.dat$Region=="VISp"]) 76 | 77 | tmp1.cells = intersect(names(cl.clean)[cl.clean=="104"], row.names(samp.dat)[samp.dat$Region=="ALM"]) 78 | 79 | 80 | 81 | save(cell.cl.map.df, file="cell.cl.map.df.rda") 82 | 83 | transition.df = with(cell.cl.map.df, as.data.frame(table(org.cl, transition.cl))) 84 | transition.df = transition.df[transition.df$Freq > 0,] 85 | transition.df$org.cl = as.character(transition.df$org.cl) 86 | transition.df$transition.cl = as.character(transition.df$transition.cl) 87 | save(transition.df, file="transition.df.rda") 88 | 89 | ###combine transitions from both directions 90 | transition.df$cl.min = pmin(transition.df$org.cl, transition.df$transition.cl) 91 | transition.df$cl.max = pmax(transition.df$org.cl, transition.df$transition.cl) 92 | transition.df$cl.pair = paste(transition.df$cl.min, transition.df$cl.max) 93 | transition.df.comb= do.call("rbind",tapply(1:nrow(transition.df),transition.df$cl.pair, function(x){ 94 | tmp = transition.df[x,][1,] 95 | tmp$Freq = sum(transition.df[x,"Freq"]) 96 | tmp[,c(4,5,3)] 97 | })) 98 | cl.size = table(cl.clean) 99 | transition.df.comb$cl.min.size = cl.size[transition.df.comb$cl.min] 100 | transition.df.comb$cl.max.size = cl.size[transition.df.comb$cl.max] 101 | transition.df.comb$ratio = with(transition.df.comb,Freq/pmin(cl.min.size,cl.max.size)) 102 | save(transition.df.comb, file="transition.df.comb.rda") 103 | transition.df.comb$cl1_label = cl.df[as.character(transition.df.comb$cl.min),"cluster_label"] 104 | transition.df.comb$cl2_label = cl.df[as.character(transition.df.comb$cl.max),"cluster_label"] 105 | transition.df.comb[transition.df.comb$ratio > 0.1 & transition.df.comb$Freq > 1,] 106 | colnames(transition.df.comb)[1:2] = c("cl1","cl2") 107 | save(transition.df.comb, file="transition.df.comb.rda") 108 | 109 | 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /RNA-seq Analysis/cluster_hetero.R: -------------------------------------------------------------------------------- 1 | #####internal heterogenity####### 2 | pca_hetero <- function(norm.dat,select.cells, perm.num=10,vg.padj=0.5,rm.gene.mod= NULL, rm.eigen=NULL,rm.th=0.6) 3 | { 4 | vg= findVG(2^norm.dat[,select.cells]-1) 5 | select.genes = row.names(vg)[vg$loess.padj < vg.padj] 6 | if(length(select.genes)==0){ 7 | return(0) 8 | } 9 | if(length(select.genes)==1){ 10 | return(1) 11 | } 12 | pca = prcomp(t(as.matrix(norm.dat[select.genes, select.cells])),tol=0.01) 13 | if(!is.null(rm.gene.mod)|!is.null(rm.eigen)){ 14 | if(is.null(rm.eigen)){ 15 | rm.eigen = get_eigen(rm.gene.mod, norm.dat, select.cells)[[1]] 16 | } 17 | else{ 18 | rm.eigen = rm.eigen[select.cells,] 19 | } 20 | rm.cor=cor(pca$x, rm.eigen) 21 | rm.score =rowMaxs(abs(rm.cor)) 22 | select.pca = which(rm.score < rm.th)[1] 23 | } 24 | pca.var = summary(pca)$importance[2,select.pca] 25 | ###shuffle expression of every gene. 26 | perm.pca.var = sapply(1:10, function(x){ 27 | perm.dat = norm.dat[select.genes, select.cells] 28 | for(i in 1:nrow(perm.dat)){ 29 | perm.dat[i, ] = sample(perm.dat[i, ]) 30 | } 31 | perm.pca = prcomp(t(as.matrix(perm.dat)),tol=0.5) 32 | summary(perm.pca)$importance[2,1] 33 | }) 34 | vg = vg[select.genes,] 35 | vg = vg[order(vg$loess.padj),] 36 | list(pca.ratio=pca.var/mean(perm.pca.var), pca.var = pca.var, vg.num=length(select.genes), vg=vg) 37 | } 38 | 39 | 40 | WGCNA_hetero <- function(norm.dat,select.cells, perm.num=10,vg.padj=0.5,rm.gene.mod=NULL) 41 | { 42 | vg= findVG(2^norm.dat[,select.cells]-1) 43 | select.genes = row.names(vg)[vg$loess.padj < vg.padj] 44 | dat = norm.dat[select.genes,select.cells] 45 | adj=adjacency(t(dat), power = 4,type="unsigned") 46 | adj[is.na(adj)]=0 47 | TOM = TOMsimilarity(adj,TOMType="unsigned") 48 | dissTOM = as.matrix(1-TOM) 49 | row.names(dissTOM)= colnames(dissTOM) = row.names(dat) 50 | geneTree = flashClust(as.dist(dissTOM), method = "average") 51 | dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM, cutHeight=0.99, deepSplit = 2, pamRespectsDendro = FALSE,minClusterSize = 5) 52 | gene.mod = split(row.names(dissTOM), dynamicMods) 53 | gene.mod = gene.mod[setdiff(names(gene.mod),"0")] 54 | rm.eigen=NULL 55 | if(!is.null(rm.gene.mod)){ 56 | rm.eigen = getEigen(rm.gene.mod, norm.dat, select.cells) 57 | } 58 | gm= filterGeneMod(norm.dat, select.cells, gene.mod, minModuleSize=5, rm.eigen=rm.eigen,min.padj = 40, padj.th=0.05,min.cells=3, ...) 59 | 60 | if(length(select.genes)==0){ 61 | return(0) 62 | } 63 | if(length(select.genes)==1){ 64 | return(1) 65 | } 66 | rd.WGCNA(norm.dat, select.genes, select.cells, minModuleSize=5,...) 67 | 68 | pca.var = summary(pca)$importance[2,1] 69 | 70 | ###shuffle expression of every gene. 71 | perm.pca.var = sapply(1:10, function(x){ 72 | perm.dat = norm.dat[select.genes, select.cells] 73 | for(i in 1:nrow(perm.dat)){ 74 | perm.dat[i, ] = sample(perm.dat[i, ]) 75 | } 76 | perm.pca = prcomp(t(as.matrix(perm.dat)),tol=0.5) 77 | summary(perm.pca)$importance[2,1] 78 | }) 79 | vg = vg[select.genes,] 80 | vg = vg[order(vg$loess.padj),] 81 | list(pca.ratio=pca.var/mean(perm.pca.var), pca.var = pca.var, vg.num=length(select.genes), vg=vg) 82 | } 83 | 84 | 85 | qc.index = setNames(samp.dat$percent_reads_aligned_to_genome_only/samp.dat$percent_reads_aligned_to_mrna,row.names(samp.dat)) 86 | 87 | ###Add oligodendrocyte contamination genes 88 | save(rm.gene.mod, file="rm.gene.mod.hetero.rda") 89 | pca.hetero= sapply(levels(clean.cl),function(x){ 90 | print(x) 91 | select.cells = names(clean.cl)[clean.cl==x] 92 | #tmp.norm = lm_normalize(norm.dat[,select.cells], qc.index[select.cells])[[1]] 93 | tmp=pca_hetero(norm.dat,select.cells, perm.num=10,vg.padj=0.5, rm.eigen=rm.eigen) 94 | },simplify=F) 95 | pca.ratio = unlist(sapply(pca.hetero, function(x)x[1])) 96 | 97 | names(pca.ratio) = paste(names(pca.hetero), cl.df[names(pca.hetero),"cluster_label"]) 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /RNA-seq Analysis/co_cluster.R: -------------------------------------------------------------------------------- 1 | co.stats = get_cl_co_stats(cl, co.ratio) 2 | save(co.stats, file="co.stats.rda") 3 | 4 | select.cl= levels(cl.clean) 5 | co.stats.df = as.data.frame(as.table(co.stats$cl.co.ratio[select.cl, select.cl])) 6 | colnames(co.stats.df)=c("cl.x","cl.y","co.ratio") 7 | co.stats.df = co.stats.df[co.stats.df[,3]>0.05,] 8 | co.stats.df$cl.x.label = cl.df[as.character(co.stats.df$cl.x), "cluster_label"] 9 | co.stats.df$cl.y.label = cl.df[as.character(co.stats.df$cl.y), "cluster_label"] 10 | co.stats.df = co.stats.df[as.integer(co.stats.df[,1]) <= as.integer(co.stats.df[,2]),] 11 | write.table(co.stats.df, file="co.stats.df.csv",sep=",",row.names=F,quote=F) 12 | 13 | tmp.dat = co.stats$cl.co.ratio[select.cl, select.cl] 14 | row.names(tmp.dat)=colnames(tmp.dat) = cl.df[colnames(tmp.dat),"cluster_label"] 15 | pdf("cl.co.pdf",height=10,width=10) 16 | heatmap.3(tmp.dat, Rowv=as.dendrogram(dend), Colv=as.dendrogram(dend), trace="none",col=blue.red(100),cexRow=0.5, cexCol=0.5) 17 | dev.off() 18 | 19 | select.cells = sample_cells(cl.clean, 50) 20 | 21 | ord=order(match(as.character(cl.clean[select.cells]), labels(dend)), co.stats$cell.cl.confusion[select.cells]) 22 | select.cells= select.cells[ord] 23 | 24 | tmp.dat = co.stats$cell.cl.co.ratio[select.cells,labels(dend)] 25 | colnames(tmp.dat) = cl.df[colnames(tmp.dat),"cluster_label"] 26 | library(gplots) 27 | pdf("cell.cl.pdf") 28 | heatmap.2(tmp.dat, Rowv=NULL, Colv=as.dendrogram(dend), trace="none",col=blue.red(100), labRow=FALSE, labCol=FALSE) 29 | dev.off() 30 | 31 | library(WGCNA) 32 | tom = TOMsimilarity(as.matrix(co.ratio[select.cells, select.cells])) 33 | colnames(tom)=row.names(tom)=select.cells 34 | all.hc = hclust(as.dist(1-tom), method="average") 35 | ord1 = all.hc$labels[all.hc$order] 36 | ord = names(cl.clean)[order(cl.clean,match(names(cl.clean), ord1))] 37 | sep = cl.clean[ord] 38 | sep=which(sep[-1]!=sep[-length(sep)]) 39 | png("all.co.png",height=10,width=10) 40 | heatmap.3(as.matrix(co.ratio[ord,ord]), col = blue.red(100), trace="none", Rowv=NULL, Colv=NULL,colsep=sep,sepcolor="black") 41 | dev.off() 42 | 43 | 44 | select.cells = names(cl.clean)[cl.clean %in% row.names(cl.df)[cl.df$top=="Inh"]] 45 | select.cells = unlist(tapply(select.cells,cl.clean[select.cells],function(x)sample(x, min(length(x),100)))) 46 | ord1 = all.hc$labels[all.hc$order] 47 | ord1 = ord1[ord1%in% select.cells] 48 | ord = select.cells[order(cl.clean[select.cells],match(select.cells, ord1))] 49 | Inh.co.ratio.100 = co.ratio[ord,ord] 50 | sep = cl.clean[ord] 51 | sep=which(sep[-1]!=sep[-length(sep)]) 52 | pdf("Inh.co.100.pdf",height=10,width=10) 53 | #heatmap.3(Inh.co.ratio.100, col = blue.red(100), trace="none", Rowv=NULL, Colv=NULL,colsep=sep,sepcolor="black") 54 | heatmap.3(Inh.co.ratio.100, col = blue.red(100), trace="none", Rowv=NULL, Colv=NULL) 55 | dev.off() 56 | save(Inh.co.ratio.100, file="Inh.co.ratio.100.rda") 57 | 58 | select.cells = names(cl.clean)[cl.clean %in% row.names(cl.df)[cl.df$top=="Ex"]] 59 | select.cells = unlist(tapply(select.cells,cl.clean[select.cells],function(x)sample(x, min(length(x),100)))) 60 | ord1 = all.hc$labels[all.hc$order] 61 | ord1 = ord1[ord1%in% select.cells] 62 | ord = select.cells[order(cl.clean[select.cells],match(select.cells, ord1))] 63 | sep = cl.clean[ord] 64 | sep=which(sep[-1]!=sep[-length(sep)]) 65 | Ex.co.ratio.100 = co.ratio[ord,ord] 66 | pdf("Ex.co.100.pdf",height=10,width=10) 67 | heatmap.3(Ex.co.ratio.100, col = blue.red(100), trace="none", Rowv=NULL, Colv=NULL,colsep=sep,sepcolor="black") 68 | dev.off() 69 | save(Ex.co.ratio.100, file="Ex.co.ratio.100.rda") 70 | 71 | 72 | 73 | select.cells = names(cl.clean)[cl.clean %in% row.names(cl.df)[cl.df$top%in%c("Glia","Endo")]] 74 | select.cells = unlist(tapply(select.cells,cl.clean[select.cells],function(x)sample(x, min(length(x),100)))) 75 | ord1 = all.hc$labels[all.hc$order] 76 | ord1 = ord1[ord1%in% select.cells] 77 | ord = select.cells[order(cl.clean[select.cells],match(select.cells, ord1))] 78 | sep = cl.clean[ord] 79 | sep=which(sep[-1]!=sep[-length(sep)]) 80 | Noneuron.co.ratio.100 = co.ratio[ord,ord] 81 | pdf("Non.neuronal.co.100.pdf",height=10,width=10) 82 | heatmap.3(Noneuron.co.ratio.100, col = blue.red(100), trace="none", Rowv=NULL, Colv=NULL,colsep=sep,sepcolor="black") 83 | dev.off() 84 | save(Noneuron.co.ratio.100, file="Noneuron.co.ratio.100.rda") 85 | 86 | select.cells = c(colnames(Inh.co.ratio.100),colnames(Ex.co.ratio.100),colnames(Noneuron.co.ratio.100)) 87 | tmp.cl = cl.clean[select.cells] 88 | levels(tmp.cl) = cl.df[levels(tmp.cl),"cluster_label"] 89 | tmp.cl = setNames(factor(as.character(tmp.cl), levels=labels(dend)),names(tmp.cl)) 90 | select.cells=names(tmp.cl) 91 | ord1 = all.hc$labels[all.hc$order] 92 | ord1 = ord1[ord1%in% select.cells] 93 | ord = select.cells[order(tmp.cl,match(select.cells, ord1))] 94 | co.ratio.100= co.ratio[ord,ord] 95 | save(co.ratio.100, file="co.ratio.100.rda") 96 | 97 | -------------------------------------------------------------------------------- /RNA-seq Analysis/compare_1679.R: -------------------------------------------------------------------------------- 1 | library(feather) 2 | library(iterclust) 3 | library(dplyr) 4 | library(Matrix) 5 | 6 | 7 | load("cl.final.rda") 8 | load("cl.clean.rda") 9 | load("norm.dat.rda") 10 | load("samp.dat.rda") 11 | load("V1.cl.rda") 12 | 13 | 14 | d = "/allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_VISp_SMV1_1679/" 15 | V1.2016.samp.dat = as.data.frame(read_feather(file.path(d, "anno.feather"))) 16 | row.names(V1.2016.samp.dat)=V1.2016.samp.dat[["sample_id"]] 17 | 18 | V1.2016.counts = as.matrix(round(read.csv("dat_1679/counts.csv",row.names=1,header=T))) 19 | V1.2016.norm.dat = log2(t(t(V1.2016.counts)*10^6/Matrix::colSums(V1.2016.counts))+1) 20 | V1.2016.cl = setNames(V1.2016.samp.dat[["final_label"]],V1.2016.samp.dat[["sample_id"]]) 21 | V1.2016.cl = V1.2016.cl[V1.2016.cl!=""] 22 | tmp = unique(V1.2016.cl) 23 | tmp = tmp[order(as.integer(gsub(" f.*$","",tmp)))] 24 | V1.2016.cl = factor(V1.2016.cl,levels= tmp) 25 | tmp.cl = V1.2016.cl 26 | levels(tmp.cl) = 1:length(levels(tmp.cl)) 27 | tmp = select_markers(V1.2016.norm.dat, tmp.cl, n.markers=50, de.param = de_param(q1.th=0.4, q.diff.th=0.7)) 28 | V1.2016.de.genes = tmp$de.genes 29 | V1.2016.markers=tmp$markers 30 | common.markers=intersect(V1.2016.markers, row.names(norm.dat)) 31 | 32 | 33 | map.result = map_sampling(V1.2016.norm.dat[,names(V1.2016.cl)], V1.2016.cl, norm.dat[, V1.cells], markers = common.markers) 34 | 35 | map.df = map.result$map.df 36 | map.df$org.cl = cl[row.names(map.df)] 37 | map.df$org.cl_label = cl.df[as.character(cl[row.names(map.df)]), "cluster_label"] 38 | tb = as.data.frame(with(map.df%>%filter(prob > 0.9), table(org.cl_label, pred.cl))) %>% filter(Freq > 1) 39 | save(map.df, file="V1.ref.49.df.rda") 40 | map.ref.2016.df = map.df 41 | 42 | 43 | load("de.genes.rda") 44 | tmp = select_markers(norm.dat, droplevels(cl[V1.cells]), de.genes=de.genes, n.markers=50) 45 | common.markers=intersect(tmp$markers, row.names(V1.2016.norm.dat)) 46 | V1.markers= tmp$markers 47 | save(V1.markers, file="V1.markers.rda") 48 | map.result = map_sampling(as.matrix(norm.dat[common.markers,V1.cells]), droplevels(cl[V1.cells]), V1.2016.norm.dat, markers = common.markers) 49 | map.df = map.result$map.df 50 | map.df$org.cl = V1.2016.cl[row.names(map.df)] 51 | map.df$pred.cl_label = cl.df[as.character(map.df$pred.cl), "cluster_label"] 52 | map.df = map.df[!is.na(map.df$org.cl),] 53 | tb = as.data.frame(with(map.df%>%filter(prob > 0.9), table(org.cl, pred.cl_label))) %>% filter(Freq > 1) 54 | 55 | save(map.df, file="V1.ref.101.df.rda") 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /RNA-seq Analysis/compare_JoshWang.R: -------------------------------------------------------------------------------- 1 | library(feather) 2 | library(iterclust) 3 | 4 | load("cl.clean.rda") 5 | load("norm.dat.rda") 6 | load("samp.dat.rda") 7 | load("cl.med.rda") 8 | 9 | ###load mapping data and preprocessing 10 | d = "/allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/external/mouse_GABA_Paul_2016_20171002" 11 | Wang.samp.dat = as.data.frame(read_feather(file.path(d, "anno.feather"))) 12 | row.names(Wang.samp.dat)=Wang.samp.dat[["sample_id"]] 13 | Wang.counts = as.data.frame(read_feather(file.path(d, "data_t.feather"))) 14 | row.names(Wang.counts)= Wang.counts[,1] 15 | Wang.counts= as.matrix(Wang.counts[,-1]) 16 | 17 | ###compute overlapping genes present in both dataset, and normalize data on overlapping genes 18 | common.genes = intersect(row.names(Wang.counts), row.names(norm.dat)) 19 | 20 | Wang.counts=Wang.counts[common.genes,] 21 | Wang.dat = t(t(Wang.counts)*10^6/colSums(Wang.counts)) 22 | Wang.dat = log2(Wang.dat+1) 23 | 24 | ##Use our clusters as reference, compute markers 25 | load("de.genes.rda") 26 | inh.cl = droplevels(cl[cl %in% row.names(cl.df)[cl.df$class_label=="GABAergic"]]) 27 | inh.cells=names(inh.cl) 28 | inh.markers = select_markers(norm.dat, inh.cl, de.genes = de.genes, n.markers=50)$markers 29 | inh.markers=intersect(inh.markers, common.genes) 30 | common.markers= inh.markers[rowMaxs(Wang.dat[inh.markers,])>0] 31 | 32 | map.result = map_sampling(as.matrix(norm.dat[common.markers, inh.cells]), droplevels(cl[inh.cells]), Wang.dat, markers = common.markers) 33 | map.df = map.result$map.df 34 | map.freq = map.result$map.freq 35 | 36 | tmp.cor = rowMaxs(cor(Wang.dat[common.markers,], cl.med[common.markers, droplevels(cl[inh.cells])])) 37 | tmp.cor[is.na(tmp.cor)]=0 38 | map.df$cor = tmp.cor 39 | 40 | map.df$cluster_label = as.character(cl.df[as.character(map.df$pred.cl),"cluster_label"]) 41 | tmp = is.na(map.df$cluster_label) 42 | map.df$cluster_label[tmp] = map.df$cl[tmp] 43 | map.df$Wang.cl = Wang.samp.dat[match(row.names(map.df),Wang.samp.dat$sample_id),"cell_class_label"] 44 | save(map.df, file="map.Wang.rda") 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /RNA-seq Analysis/compare_Linnarsson.R: -------------------------------------------------------------------------------- 1 | library(feather) 2 | library(iterclust) 3 | 4 | load("cl.clean.rda") 5 | load("norm.dat.rda") 6 | load("samp.dat.rda") 7 | load("cl.med.rda") 8 | load("V1.cl.rda") 9 | 10 | ###load mapping data and preprocessing 11 | d="/allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/external/mouse_SS_CA1_Zeisel_2015_20170620" 12 | linnarsson.samp.dat = as.data.frame(read_feather(file.path(d, "anno.feather"))) 13 | row.names(linnarsson.samp.dat)=linnarsson.samp.dat[["sample_id"]] 14 | linnarsson.counts = as.data.frame(read_feather(file.path(d, "data_t.feather"))) 15 | row.names(linnarsson.counts)= linnarsson.counts[,1] 16 | linnarsson.counts= as.matrix(linnarsson.counts[,-1]) 17 | 18 | ###compute overlapping genes present in both dataset, and normalize data on overlapping genes 19 | common.genes = intersect(row.names(linnarsson.counts), row.names(norm.dat)) 20 | cells= names(cl) 21 | linnarsson.counts=linnarsson.counts[common.genes,] 22 | linnarsson.dat = t(t(linnarsson.counts)*10^6/colSums(linnarsson.counts)) 23 | linnarsson.dat = log2(linnarsson.dat+1) 24 | 25 | ##Use our clusters as reference, compute markers 26 | select.markers=intersect(V1.markers, row.names(linnarsson.dat)) 27 | select.markers = select.markers[rowMaxs(linnarsson.dat[select.markers,])>0] 28 | 29 | map.result = map_sampling(norm.dat[,V1.cells], droplevels(cl[V1.cells]), linnarsson.dat, markers=select.markers) 30 | 31 | map.df = map.result$map.df 32 | 33 | map.df$pred_cluster_label = cl.df[as.character(map.df$pred.cl),"cluster_label"] 34 | map.df$cl = linnarsson.samp.dat[match(row.names(map.df),linnarsson.samp.dat$sample_id),"cluster_label"] 35 | map.df$coarse_cl= linnarsson.samp.dat[match(row.names(map.df),linnarsson.samp.dat$sample_id),"coarse_label"] 36 | map.df$group = linnarsson.samp.dat[match(row.names(map.df),linnarsson.samp.dat$sample_id),"group_label"] 37 | map.df$tissue = linnarsson.samp.dat[match(row.names(map.df),linnarsson.samp.dat$sample_id),"tissue_label"] 38 | 39 | tmp.cor = rowMaxs(cor(linnarsson.dat[select.markers,],cl.med[select.markers,])) 40 | tmp.cor[is.na(tmp.cor)] = 0 41 | map.df$cor = tmp.cor 42 | save(map.df, file="map.linnarsson.df.rda") 43 | 44 | 45 | ss.map.df = with(map.df,map.df[tissue == "sscortex" & cor > 0.4,]) 46 | save(ss.map.df, file="map.linnarsson.ss.df.rda") 47 | 48 | 49 | tb = as.data.frame(with(ss.map.df%>%filter(prob > 0.9), table(coarse_cl, pred_cluster_label))) %>% filter(Freq > 1) 50 | 51 | tb = as.data.frame(with(ss.map.df%>%filter(prob > 0.9), table(cl, pred_cluster_label))) %>% filter(Freq > 1) 52 | 53 | -------------------------------------------------------------------------------- /RNA-seq Analysis/compare_Tolias.R: -------------------------------------------------------------------------------- 1 | library(iterclust) 2 | load("norm.dat.rda") 3 | load("cl.final.rda") 4 | tmp=load("V1.cl.rda") 5 | load("cl.med.rda") 6 | 7 | d = "/allen/programs/celltypes/workgroups/rnaseqanalysis/osnat/patchseq_simple_mapping/results/Tolias" 8 | tmp=load(file.path(d, "quick_map.Tolias.rda")) 9 | 10 | shiny.d="/allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/patch_seq/tolias_patchseq_20161102" 11 | Tolias.df = as.data.frame(read_feather(file.path(shiny.d, "anno.feather"))) 12 | row.names(Tolias.df) = Tolias.df$sample_id 13 | 14 | 15 | Tolias.counts = as.matrix(read.csv(file.path(d,"counts.csv"),row.names=1)) 16 | 17 | genes=row.names(Tolias.counts) 18 | genes[genes=="6330527O06Rik"]="Lamp5" 19 | genes[genes=="A930038C07Rik"]="Ndnf" 20 | genes[genes=="Tmem90b"]="Syndig1" 21 | genes[genes=="Cxcr7"]="Ackr3" 22 | row.names(Tolias.counts)=genes 23 | 24 | 25 | common.genes= intersect(genes, row.names(norm.dat)) 26 | Tolias.counts <- as.matrix(Tolias.counts[common.genes,]) 27 | Tolias.dat =log2(t(t(Tolias.counts)*10^6/colSums(Tolias.counts))+1) 28 | 29 | 30 | 31 | common.markers= intersect(V1.markers, common.genes) 32 | map.result = map_sampling(as.matrix(norm.dat[common.markers,V1.cells]), droplevels(cl[V1.cells]), Tolias.dat, markers = common.markers) 33 | map.df = map.result$map.df 34 | map.freq = map.result$map.freq 35 | 36 | tmp.cor = rowMaxs(cor(Tolias.dat[common.markers,], cl.med[common.markers, droplevels(cl[V1.cells])])) 37 | tmp.cor[is.na(tmp.cor)]=0 38 | map.df$cor = tmp.cor 39 | 40 | map.df$cluster_label = as.character(cl.df[as.character(map.df$pred.cl),"cluster_label"]) 41 | tmp = is.na(map.df$cluster_label) 42 | map.df$cluster_label[tmp] = map.df$cl[tmp] 43 | map.df$class_label = Tolias.df[row.names(map.df), "class_label"] 44 | 45 | dend=readRDS("dend.RData") 46 | labels(dend) = row.names(cl.df)[match(labels(dend), cl.df$cluster_label)] 47 | V1.dend = prune_dend(dend, setdiff(labels(dend), levels(V1.cl))) 48 | 49 | 50 | summarize_cl <- function(dend, map.freq,conf.th=0.7) 51 | { 52 | node.height=setNames(get_nodes_attr(dend, "height"),get_nodes_attr(dend, "label")) 53 | dend.list = dend_list(dend) 54 | dend.list = dend.list[sapply(dend.list, length)>1] 55 | memb = sapply(names(dend.list),function(x){ 56 | rowSums(map.freq[,intersect(labels(dend.list[[x]]),colnames(map.freq)),drop=F]) 57 | }) 58 | memb = cbind(memb,map.freq) 59 | memb.th= lapply(row.names(memb),function(cell){ 60 | ###Check all the node with confidence > conf.th 61 | x = memb[cell,] 62 | mapped.node = colnames(memb)[which(x>conf.th)] 63 | 64 | ###mapped nodes not met the minimal gene number/ratio constraints 65 | ###Choose the deepest nodes that pass all the criteria. 66 | mapped.node=mapped.node[order(node.height[mapped.node])] 67 | i=mapped.node[1] 68 | ###Get the markers on every mapped nodes. 69 | c(cl=i, score=x[i]) 70 | }) 71 | memb.th = do.call("rbind",memb.th) 72 | row.names(memb.th) = row.names(memb) 73 | colnames(memb.th)=c("cl","score") 74 | memb.df = as.data.frame(memb.th) 75 | memb.df$resolution.index = 1- (node.height[memb.df$cl]/attr(dend,"height")) 76 | return(memb.df) 77 | } 78 | 79 | 80 | map.tree.df = summarize_cl(V1.dend, map.freq/100, 0.7) 81 | map.tree.df$cluster_label = as.character(cl.df[as.character(map.tree.df$cl),"cluster_label"]) 82 | tmp = is.na(map.tree.df$cluster_label) 83 | map.tree.df$cluster_label[tmp] = as.character(map.tree.df$cl[tmp]) 84 | map.tree.df$class_label = Tolias.df[row.names(map.tree.df), "class_label"] 85 | with(map.tree.df, table(class_label, cl)) 86 | 87 | save(map.tree.df, V1.dend, file="map.tree.df.rda") 88 | 89 | -------------------------------------------------------------------------------- /RNA-seq Analysis/consensus.clust.R: -------------------------------------------------------------------------------- 1 | library(iterclust) 2 | library(matrixStats) 3 | 4 | load("norm.dat.rda") 5 | load("select.cells.rda") 6 | load("top.cl.rda") 7 | load("rm.eigen.rda") 8 | 9 | ###Use previous version of clustering for annotation. 10 | library(feather) 11 | anno= read_feather("/allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20180312/anno.feather") 12 | ref.cl = setNames(anno$cluster_id, anno$sample_id) 13 | ref.cl.df = unique(as.data.frame(anno[,c("cluster_id","cluster_color","cluster_label","class_id","class_label","class_color","subclass_id","subclass_label","subclass_color")])) 14 | ref.cl.df = ref.cl.df[order(ref.cl.df$cluster_id),] 15 | row.names(ref.cl.df) = ref.cl.df$cluster_id 16 | load("../process_22679/select.markers.rda") 17 | ref.markers= select.markers 18 | 19 | 20 | ####Prepare parameters for clustering. 21 | all.cells=select.cells 22 | top.result = list(cl=as.factor(top.cl[select.cells]),markers=NULL) 23 | de.param = de_param(q1.th=0.5, q.diff.th=0.7, de.score.th=150, min.cells=4) 24 | 25 | 26 | 27 | ####Computing PCA-based consensus clustering result 28 | d = "subsample_pca/" 29 | result.files=file.path(d, dir(d, "result.*.rda")) 30 | co.result <- collect_co_matrix_sparseM(norm.dat, result.files, all.cells) 31 | save(co.result, file="pca.co.result.rda") 32 | co.ratio = co.result[[1]] 33 | 34 | consensus.result = iter_consensus_clust(co.ratio, co.result$cl.list, norm.dat, select.cells=all.cells, de.param = de.param) 35 | 36 | save(consensus.result, file="pca.consensus.result.rda") 37 | cl = consensus.result$cl 38 | 39 | 40 | refine.result = refine_cl(consensus.result$cl, co.ratio=co.ratio, tol.th=0.005, confusion.th=0.8) 41 | cl = refine.result$cl 42 | merge.result= merge_cl(norm.dat=norm.dat, cl=cl, rd.dat=t(norm.dat[consensus.result$markers,]), de.param = de.param,return.markers=FALSE) 43 | compare.result = compare_annotate(merge.result$cl, ref.cl, ref.cl.df) 44 | ggsave("map.PCA.pdf", compare.result$g) 45 | cl = compare.result$cl 46 | cl.df = compare.result$cl.df 47 | cl = setNames(factor(as.character(cl),levels=row.names(cl.df)), names(cl)) 48 | save(cl, cl.df, file="pca.cl.final.rda") 49 | 50 | 51 | ####Computing WGCNA-based consensus clustering result 52 | d = "subsample_WGCNA/" 53 | result.files=file.path(d, dir(d, "result.*.rda")) 54 | co.result <- iterclust::collect_co_matrix_sparseM(norm.dat, result.files, all.cells) 55 | save(co.result, file="WGCNA.co.result.rda") 56 | co.ratio = co.result[[1]] 57 | 58 | consensus.result = iter_consensus_clust(co.ratio, co.result$cl.list, norm.dat, select.cells=all.cells, de.param = de.param) 59 | save(consensus.result, file="WGCNA.consensus.result.rda") 60 | cl = consensus.result$cl 61 | refine.result = refine_cl(consensus.result$cl, co.ratio=co.ratio, tol.th=0.005, confusion.th=0.8) 62 | cl = refine.result$cl 63 | 64 | merge.result= merge_cl(norm.dat=norm.dat, cl=cl, rd.dat=t(norm.dat[consensus.result$markers,]), de.param = de.param,return.markers=FALSE,verbose=1) 65 | compare.result = compare_annotate(merge.result$cl, ref.cl, ref.cl.df) 66 | ggsave("map.WGCNA.pdf", compare.result$g,height=10, width=10) 67 | cl = compare.result$cl 68 | cl.df = compare.result$cl.df 69 | save(cl, cl.df, file="WGCNA.cl.final.rda") 70 | 71 | 72 | WGCNA.cl = cl 73 | WGCNA.cl.df = cl.df 74 | 75 | load("pca.cl.final.rda") 76 | write.csv(cl.df, "PCA.cl.df.csv") 77 | PCA.cl = cl 78 | PCA.cl.df = cl.df 79 | 80 | compare.result = compare_annotate(WGCNA.cl, PCA.cl, PCA.cl.df) 81 | ggsave("WGCNA.PCA.compare.pdf", compare.result$g, height=10, width=10) 82 | 83 | 84 | ####Reach consensus clusters 85 | load("pca.co.result.rda") 86 | PCA.co.result = co.result 87 | load("WGCNA.co.result.rda") 88 | WGCNA.co.result = co.result 89 | 90 | 91 | co.ratio.min = pmin(PCA.co.result$co.ratio, WGCNA.co.result$co.ratio) 92 | save(co.ratio.min, file="co.ratio.min.rda") 93 | 94 | cl.list = c(PCA.co.result$cl.list, WGCNA.co.result$cl.list) 95 | cl.mat = cbind(PCA.co.result$cl.mat, WGCNA.co.result$cl.mat) 96 | co.ratio.comb = crossprod(t(cl.mat)) 97 | co.ratio.comb@x = co.ratio.comb@x/length(cl.list) 98 | save(co.ratio.comb, file="co.ratio.comb.rda") 99 | 100 | de.param = de_param(q1.th=0.5, q.diff.th=0.7, de.score.th=150, min.cells=4) 101 | consensus.result = iter_consensus_clust(co.ratio.min, cl.list, norm.dat, select.cells=all.cells, de.param = de.param) 102 | 103 | refine.result = refine_cl(consensus.result$cl, co.ratio=co.ratio.min, tol.th=0.005, confusion.th=0.6) 104 | merge.result= merge_cl(norm.dat=norm.dat, cl=refine.result$cl, rd.dat=t(norm.dat[consensus.result$markers,]), de.param = de.param,return.markers=FALSE) 105 | save(merge.result, file="merge.result.rda") 106 | 107 | -------------------------------------------------------------------------------- /RNA-seq Analysis/copy_bam.R: -------------------------------------------------------------------------------- 1 | select.cl = c(121, 90, 85, 57, 42, 31, 18, 6) 2 | tmp.cl = droplevels(cl[cl %in% as.character(select.cl)]) 3 | 4 | d = "select_bam" 5 | dir.create(d) 6 | 7 | 8 | 9 | 10 | 11 | select.cells = sample_cells(tmp.cl, 10) 12 | bam_dir1 = setNames(file.path("/allen/programs/celltypes/workgroups/rnaseqanalysis/STARforLIMS/Mouse/star_out/", select.cells, paste0(select.cells,"_Aligned.sortedByCoord.out.bam")), select.cells) 13 | tmp1=file.exists(bam_dir1) 14 | 15 | bam_dir2 = setNames(with(samp.dat[select.cells,], file.path(fpkm_dir,paste0("ar_", ar_id, "_STAR_Aligned.sortedByCoord.out.bam"))), select.cells) 16 | tmp2= file.exists(bam_dir2) 17 | 18 | bam_dir = bam_dir1 19 | bam_dir[!tmp1] = bam_dir2[!tmp1] 20 | table(file.exists(bam_dir)) 21 | 22 | 23 | tmp=file.copy(bam_dir, file.path(d, paste0(select.cells,".bam"))) 24 | write.csv(cl.df, file="cl.df.csv") 25 | write.csv(cl, file="cl.csv") 26 | 27 | -------------------------------------------------------------------------------- /RNA-seq Analysis/fast_tsne.R: -------------------------------------------------------------------------------- 1 | fast_tsne <- function(dat, ...) 2 | { 3 | result <- fftRtsne(dat,fast_tsne_path = "~/src/FIt-SNE/bin/fast_tsne", ...) 4 | df = as.data.frame(result) 5 | row.names(df) = row.names(dat) 6 | colnames(df)=c("Lim1","Lim2") 7 | df 8 | } 9 | 10 | -------------------------------------------------------------------------------- /RNA-seq Analysis/intron.exon.ratio.R: -------------------------------------------------------------------------------- 1 | load(file.path(d, "20180228_RSC-11-142_mouse_star_exon.Rdata")) 2 | load(file.path(d, "20180228_RSC-11-142_mouse_star_intron.Rdata")) 3 | load(file.path(d, "20180228_RSC-11-142_mouse_star_samp.dat.Rdata")) 4 | row.names(samp.dat) = samp.dat[[1]] 5 | 6 | cell.cl = cl 7 | cell.cl.df = cl.df 8 | 9 | 10 | load("~/zizhen/projects/Mouse/Nuclei/M1_V1/cl.final.rda") 11 | nuclei.cl = cl 12 | nuclei.cl.df = cl.df 13 | 14 | L45.cells = names(cell.cl)[cell.cl %in% as.character(c(58:61,130,137))] 15 | L45.nuclei = names(nuclei.cl)[nuclei.cl %in% as.character(c(37:39,42))] 16 | 17 | gene.cell.intron.ratio <- rowSums(intron[,L45.cells])/rowSums(exon[,L45.cells]+intron[,L45.cells]) 18 | 19 | gene.nuclei.intron.ratio <- rowSums(intron[,L45.nuclei])/rowSums(exon[,L45.nuclei]+intron[,L45.nuclei]) 20 | 21 | nuclei.ratio = gene.nuclei.intron.ratio/gene.cell.intron.ratio 22 | nuclei.ratio=nuclei.ratio[!is.na(nuclei.ratio)] 23 | 24 | 25 | exon.cpm = t(t(exon)*10^6/Matrix::colSums(exon)) 26 | cell.exon = rowMeans(exon.cpm[,L45.cells]) 27 | nuclei.exon = rowMeans(exon.cpm[,L45.nuclei]) 28 | cell.nuclei.exon = data.frame(cell.exon, nuclei.exon) 29 | cell.nuclei.exon$cell.nuclei.ratio = cell.nuclei.exon[,1]/cell.nuclei.exon[,2] 30 | 31 | exon.ratio = rowMeans(exon[,L45.cells])/ 32 | 33 | cell.norm.dat = norm.dat 34 | 35 | tmp=load("~/zizhen/projects/Mouse/Nuclei/M1_V1/norm.dat.rda") 36 | tmp=load("~/zizhen/projects/Mouse/Nuclei/M1_V1/all.col.rda") 37 | all.col[is.na(all.col)]="black" 38 | nuclei.norm.dat= norm.dat 39 | nuclei.all.col = all.col[,L45.nuclei] 40 | tmp=display_cl(droplevels(nuclei.cl[L45.nuclei]), nuclei.norm.dat[,L45.nuclei],prefix="L45.nuclei.new",col=nuclei.all.col) 41 | -------------------------------------------------------------------------------- /RNA-seq Analysis/markers.R: -------------------------------------------------------------------------------- 1 | Inh.markers <- c("Gad2","Slc32a1","Prox1","Adarb2","Nfix","Nfib","Cacna2d1", 2 | "Cxcl14","Tnfaip8l3","Cplx3","Lamp5","Cd34","Pax6","Krt73", 3 | "Scrg1","Egln3","Ndnf","Tmem182","Ntn1","Pde11a","Pdlim5", 4 | "Lsp1","Slc35d3","Nkx2-1","Serpinf1","Col14a1","Vip","Sncg", 5 | "Crabp1","Slc10a4","Cldn10","Bhlhe22","Crispld2","Slc17a8", 6 | "Cyb5r2","Nr1h4","Wnt7b","Prss12","Igfbp6","Calb2","Grpr", 7 | "Pthlh","Elfn1","Rspo1","Slc18a3","Lmo1","Rspo4","Sostdc1", 8 | "Chat","Cbln4","Gsx2","Gpc3","Mab21l1","C1ql1","Itih5","Mybpc1", 9 | "Myl1","Lhx6","Sox6","Sst","Chodl","Calb1","Cbln4","Etv1","Edn1", 10 | "Kl","Il1rapl2","Myh8","Ptprk","Chrna2","Myh13","Ptgdr","Crhr2", 11 | "Hpse","Igsf9","C1ql3","Tacstd2","Th","Col6a1","Nts","Tac1","Pvalb", 12 | "Gabrg1","Il7","Bche","Prdm8","Syt2","Ostn","Pdlim3","C1ql1", 13 | "Gpr149","Vipr2","Meis2","Adamts19","Cpa6","Lgr6") 14 | Inh.comb.markers <- c("Reln","Cnr1","Nr2f2","Cck","Npy","Crh","Tac2") 15 | 16 | 17 | 18 | Ex.markers <- unique(c("Slc17a7","Rtn4rl2","Slc30a3","Cux2","Stard8","Otof","Rrad","Penk","Agmat", 19 | "Emx2","S100a3","Macc1","Rorb","Scnn1a","Whrn","Endou","Col26a1", 20 | "Rspo1","Fezf2","Hsd11b1","Batf3","Arhgap25","Colq","Pld5","Olfr78", 21 | "Tcap","Fgf17","Wfdc18","Wfdc17","Aldh1a7","Tgfb1","Ctsc","Rxfp2", 22 | "Prss35","Rgs12","Osr1","Oprk1","Cd52","Col23a1","Col18a1","Car1", 23 | "Car3","Fam84b","Chrna6","Chrnb3","Fn1","Tac1","Lce3c","Erg", 24 | "Cdc42ep5","Bmp5","Pvalb","Depdc7","Stac","C1ql2","Ptgfr","Slco2a1", 25 | "Pappa2","Dppa1","Npsr1","Htr2c","Hpgd","Nxph3","Sla2","Tshz2", 26 | "Rapgef3","Slc17a8","Trh","Nxph2","Foxp2","Col12a1","Syt6","Col5a1", 27 | "Gpr139","Ly6d","Sla","Cpa6","Ppp1r18","Faim3","Ctxn3","Nxph4", 28 | "Cplx3","Ctgf","Col8a1","Mup5","Ngf","Fam150a","F2r","Serpinb11","Fbxl7","P2ry12", 29 | "Crh","Kynu","Hsd17b2","Mup3","Tlcd1","Lhx5","Trp73","Cpa6","Gkn1","Col18a1","Lce3c","Erg","Bmp5","Stac","C1ql2","Slco2a1","Lrrc9","Trhr","Myzap","Krt80","H60b","Fam150a","Clic5","Kcnj5","Olfr110","Olfr111")) 30 | Ex.comb.markers <- c("Reln","Cdh13","Cpne7","Alcam","Rprm","Marcksl1") 31 | 32 | 33 | 34 | Global.markers <- c("Fez1","Phyhipl","Aplp1","Gnao1","Caly","Snap25","Atp1a3","Camk2b", 35 | "Syt1","Gabrg2","Fabp3","Stmn2","Kif5c","Slc32a1","Gad2","Dlx1","Dlx5","Dlx2","Dlx6os1", 36 | "Slc6a1","Sox2","Slc17a7","Nrn1","Neurod2","Sv2b","Satb2","Tbr1","Vsig2","Cmtm5","Kcnj10", 37 | "S100a16","S100a13","S1pr1","Gja1","Gjb6","Aqp4","Lcat","Acsbg1","Olig1","Sox10","Neu4", 38 | "Sapcd2","Gpr17","Plp1","Cldn11","Mag","Mog","Nkx6-2","Enpp6","9630013A20Rik","Brca1", 39 | "Mog","Opalin","Gjb1","Hapln2","Cyba","Ctsh","Ifitm3","Sparc","S100a11","Dcn","Col1a1", 40 | "Pltp","Vtn","Slc6a13","Spp1","Slc13a3","Col15a1","Slc47a1","Tgtp2","Ifi47","Esam", 41 | "Slco1a4","Slc38a5","Cldn5","H2-Q7","Slc38a11","Art3","Ace2","Acta2","Myh11","Pln", 42 | "Gja5","Kcnj8","Atp13a5","Aoc3","Ctss","C1qb","C1qc","C1qa","Cbr2","F13a1","Pf4", 43 | "Mrc1","Siglech","Selplg") 44 | 45 | 46 | 47 | cl.present = get_cl_means(norm.dat > 1, cl.clean) 48 | Ex.markers=Ex.markers[order(apply(cl.present[Ex.markers, row.names(cl.df)[c(53:109,111)]]>0.4 , 1, function(x)which(x)[1]))] 49 | write.csv(Ex.markers, file="markers_plot2.csv") 50 | 51 | Inh.markers=Inh.markers[order(apply(cl.present[Inh.markers, row.names(cl.df)[c(1:52,110)]]>0.4 , 1, function(x)which(x)[1]))] 52 | write.csv(Inh.markers, file="markers_plot3.csv") 53 | 54 | -------------------------------------------------------------------------------- /RNA-seq Analysis/retrograde.R: -------------------------------------------------------------------------------- 1 | tmp=with(samp.dat, is.na(Injection_type) | Injection_type == "" ) 2 | samp.dat[tmp,"Injection_type"]="0" 3 | tb = table(ex.cl, samp.dat[names(ex.cl),"Injection_type"]) 4 | select.cl = row.names(tb)[rowMins(tb) > 20] 5 | select.cells= names(cl)[cl %in% select.cl] 6 | 7 | de.param = de_param(q1.th=0.5, q.diff.th=0.7, de.score.th=150, min.cells=4) 8 | ###DEG between V1 and ALM within a mixed Inhibitory types. 9 | 10 | retro.cl = setNames(paste0(cl[select.cells], samp.dat[select.cells, "Injection_type"]), select.cells) 11 | 12 | tmp= levels(droplevels(cl[select.cells])) 13 | retro.pairs = data.frame(null=paste0(tmp,"0"), retro=paste0(tmp,"retrograde"), stringsAsFactors=FALSE) 14 | 15 | row.names(retro.pairs) = paste(retro.pairs[,1], retro.pairs[,2], sep="_") 16 | 17 | load("select.genes.rda") 18 | retro.de.genes = de_score_pairs(norm.dat[select.genes,select.cells], cl=retro.cl, pairs=retro.pairs, de.param = de.param)$de.genes 19 | unlist(sapply(retro.de.genes, function(x)x$num)) 20 | 21 | tmp=sapply(retro.de.genes, function(x)x$genes) 22 | tmp[sapply(tmp,length)>0] 23 | tmp=sapply(retro.de.genes, function(x)x$up.genes) 24 | tmp[sapply(tmp,length)>0] 25 | tmp=sapply(retro.de.genes, function(x)x$down.genes) 26 | tmp[sapply(tmp,length)>0] 27 | 28 | 29 | retro.cells = intersect(row.names(samp.dat)[samp.dat$Injection_type=="retrograde"], names(cl.clean)) 30 | tb=table(droplevels(cl.clean[retro.cells]), samp.dat[retro.cells, "injection_roi"]) 31 | 32 | 33 | 34 | tb = tb[rowSums(tb > 20) > 1,] 35 | select.cl= row.names(tb) 36 | de.df=sapply(select.cl, function(x){ 37 | select.roi = colnames(tb)[tb[x, ] > 20] 38 | tmp.cells= retro.cells[as.character(cl.clean[retro.cells])==x & samp.dat[retro.cells,"injection_roi"] %in% select.roi] 39 | select.roi = setNames(samp.dat[tmp.cells, "injection_roi"], tmp.cells) 40 | de.df = DE_genes_pw(norm.dat, select.roi, counts=as.matrix(exon[select.genes, names(select.roi)], use.voom=TRUE)) 41 | de.df=sapply(de.df, function(x){ 42 | x=x[order(x$padj,-abs(x$lfc)),] 43 | x=x[x$padj < 0.05 & abs(x$lfc)>1,] 44 | },simplify=F) 45 | },simplify=F) 46 | save(de.df, file="retro.roi.de.df.rda") 47 | tmp=sapply(de.df, function(x)sapply(x, nrow)) 48 | 49 | cl.low = droplevels(cl[!names(cl) %in% names(cl.clean)]) 50 | save(cl.low, file="cl.low.rda") 51 | 52 | retro.cells = intersect(names(cl.clean), row.names(samp.dat)[samp.dat$Injection_type=="retrograde"]), 53 | non.retro.cells = setdiff(names(cl.clean),retro.cells) 54 | 55 | clean.cells=split(non.retro.cells, paste(samp.dat[non.retro.cells,"Region"],cl.df[as.character(cl.clean[non.retro.cells]),"class_label"],sep="_")) 56 | clean.cells=split(non.retro.cells, paste(samp.dat[non.retro.cells,"Region"],samp.dat[non.retro.cells, "full_genotype"])) 57 | 58 | cells.list = list(control=control.cells, fail.qc = fail.qc.cells, doublet = rm.cells, cl.low = names(cl.low), retro = retro.cells, non.retro.cells = non.retro.cells) 59 | save(cells.list, file="cells.list.rda") 60 | 61 | 62 | old.cl = as.data.frame(read_feather("/allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/facs_seq/mouse_V1_ALM_20170913/anno.feather")) 63 | row.names(old.cl) = old.cl[,1] 64 | old.cl[[1]]=NULL 65 | old.cl= old.cl[,c("class_label","cluster_label","region_label","inj_type_label")] 66 | 67 | absent.cells= setdiff(row.names(old.cl), names(cl)) 68 | tb= as.data.frame(table(old.cl[absent.cells, "cluster_label"])) 69 | tb2 = as.data.frame(table(old.cl[,"cluster_label"])) 70 | 71 | absent.df = inner_join(tb, tb2, by="Var1") 72 | colnames(absent.df)=c("cluster","absent","total") 73 | write.csv(absent.df, "absent.cells.csv") 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /RNA-seq Analysis/saturation_test.R: -------------------------------------------------------------------------------- 1 | library(iterclust) 2 | 3 | subsample.de.genes=list() 4 | load("norm.dat.rda") 5 | load("cl.final.rda") 6 | load("select.genes.rda") 7 | load("cl.med.rda") 8 | load("cl.clean.rda") 9 | 10 | dir.create("subsample_merge") 11 | load("de.param.rda") 12 | n.bin = 10 13 | bins = sample(1:n.bin, length(cl.clean),replace=TRUE) 14 | for(i in 1:n.bin){ 15 | ####subsample cells and recompute de.genes 16 | select.cells = names(cl.clean)[bins<= i] 17 | print(length(select.cells)) 18 | rd.dat = t(as.matrix(norm.dat[select.genes, select.cells])) 19 | merge.result= merge_cl(norm.dat, cl.clean[select.cells], rd.dat=rd.dat, de.param=de.param) 20 | save(merge.result, file=file.path("subsample_merge/",paste0(i,".result.rda"))) 21 | } 22 | 23 | 24 | 25 | merge.files <- dir("subsample_merge/",pattern="result.rda") 26 | merge.stats <- do.call("rbind",sapply(merge.files, function(f){ 27 | load(file.path("subsample_merge/",f)) 28 | tmp.cl = merge.result$cl 29 | cl.size = table(cl[names(tmp.cl)]) 30 | tb=table(tmp.cl, cl.clean[names(tmp.cl)]) 31 | tb.df = as.data.frame(tb) 32 | tb.df = tb.df[tb.df$Freq>0,] 33 | tmp=split(tb.df[,2],tb.df[,1]) 34 | merge.size=sapply(tmp, length) 35 | merge.cl =names(merge.size)[merge.size>1] 36 | if(length(merge.cl)>0){ 37 | tb.df = tb.df[tb.df[,1]%in% merge.cl,] 38 | tb.df =tb.df[order(tb.df[,1]),] 39 | #tb.df[,"mergeCl"] = cl.df[as.character(tb.df[,1]),"cluster_label"] 40 | #tb.df[,"orgCl"] = cl.df[as.character(tb.df[,2]),"cluster_label"] 41 | colnames(tb.df) = c("mergeCl", "orgCl") 42 | absent.cl = colnames(tb)[colSums(tb)==0] 43 | merge.df = data.frame(org.cl = as.character(tb.df$orgCl), merge.cl = as.character(tb.df$mergeCl),stringsAsFactors=FALSE) 44 | merge.df = rbind(merge.df, data.frame(org.cl = as.character(absent.cl), merge.cl=rep("absent", length(absent.cl)),stringsAsFactors=FALSE)) 45 | 46 | nochange.cl = setdiff(colnames(tb), c(absent.cl, as.character(tb.df[,2]))) 47 | merge.df = rbind(merge.df, data.frame(org.cl = as.character(nochange.cl,"cluster_label"), merge.cl=rep("nochange", length(nochange.cl)),stringsAsFactors=FALSE)) 48 | merge.df$sample_size = length(tmp.cl) 49 | merge.df$cl_size = cl.size[as.character(merge.df$org.cl)] 50 | return(merge.df) 51 | } 52 | return(NULL) 53 | },simplify=F)) 54 | 55 | 56 | merge.stats$org.cl = factor(as.character(merge.stats$org.cl), labels(dend)) 57 | merge.stats$merge.cl = droplevels(factor(merge.stats$merge.cl, levels=c("absent","nochange",labels(dend)))) 58 | 59 | merge.stats$org.cl_label = merge.stats$org.cl 60 | levels(merge.stats$org.cl_label) = cl.df[levels(merge.stats$org.cl_label),"cluster_label"] 61 | merge.stats$merge.cl_label = merge.stats$merge.cl 62 | levels(merge.stats$merge.cl_label) = c("absent", "nochange", as.character(cl.df[levels(merge.stats$merge.cl_label)[-(1:2)],"cluster_label"])) 63 | 64 | merge.stats$sample_size = factor(merge.stats$sample_size) 65 | saturation.df = merge.stats 66 | save(saturation.df, file="saturation.df.rda") 67 | 68 | cl.color = setNames(cl.df$cluster_color, cl.df$cluster_label) 69 | cl.color = c("black","white",as.character(cl.color[levels(merge.stats$merge.cl)[-(1:2)]])) 70 | 71 | g=ggplot(saturation.df, aes(org.cl, sample_size, fill = merge.cl)) + geom_raster() + scale_fill_manual(values=cl.color) 72 | g = g + theme(axis.text.x = element_text(angle=90, hjust=1)) 73 | pdf("saturation.pdf",height=6,width=14) 74 | g 75 | dev.off() 76 | 77 | 78 | -------------------------------------------------------------------------------- /RNA-seq Analysis/subsample.reads.R: -------------------------------------------------------------------------------- 1 | load("exon.rda") 2 | source("~/zizhen/My_R/subsample.R") 3 | library(parallel) 4 | frac= seq(0.1, 0.9, by=0.1) 5 | tmp = mclapply(frac, function(f){ 6 | print(f) 7 | dat = subsample_frac(exon, f) 8 | colSums(dat > 0) 9 | },mc.cores=9) 10 | tmp[[10]] = colSums(exon>0) 11 | tmp.df = do.call("cbind",tmp) 12 | colnames(tmp.df) = seq(0.1,1,by=0.1) 13 | subsample.reads.df <- as.data.frame(as.table(tmp.df)) 14 | colnames(subsample.reads.df)= c("sample_id","Frac","gene.counts") 15 | 16 | subsample.reads.df$class = cl.df[as.character(cl[as.character(subsample.reads.df$sample_id)]),"class_label"] 17 | subsample.reads.df[subsample.reads.df$class=="Endothelial", "class"] = "Non-Neuronal" 18 | 19 | subsample.reads.df= droplevels(subsample.reads.df) 20 | subsample.reads.df$Frac = factor(subsample.reads.df$Frac) 21 | 22 | g=ggplot(subsample.reads.df, 23 | aes(x = Frac, 24 | y = gene.counts)) + 25 | geom_violin(aes(fill = class),size = 0.1) + 26 | stat_summary(aes(x = Frac, 27 | y = gene.counts), 28 | fun.ymin = function(z) { quantile(z,0.25) }, 29 | fun.ymax = function(z) { quantile(z,0.75) }, 30 | fun.y = median, 31 | color = "black", 32 | size = 0.1) + 33 | facet_wrap(~ class, nrow=3) + 34 | scale_fill_manual(values=c("dodgerblue", "skyblue","gray80")) + 35 | scale_y_continuous(limits = c(0, 16100), breaks = c(0, 4000, 8000, 12000, 16000)) + 36 | theme_bw(base_size = 5) + 37 | theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3), 38 | panel.grid.major.x = element_blank(), 39 | panel.grid.minor.y = element_blank(), 40 | panel.border = element_blank(), 41 | legend.position="none") + 42 | xlab("Sampling rate") + 43 | ylab("Gene counts") 44 | 45 | ggsave("subsample.genes_detected.pdf", g, width=3, height=2.5, useDingbats=F) 46 | save(subsample.reads.df, file="subsample.reads.df.rda") 47 | -------------------------------------------------------------------------------- /RNA-seq Analysis/tune_parameter.R: -------------------------------------------------------------------------------- 1 | library(iterclust) 2 | load("cl.final.rda") 3 | load("norm.dat.rda") 4 | load("top.cl.rda") 5 | load("rm.eigen.rda") 6 | ref.cl = cl 7 | ref.cl.df = cl.df 8 | 9 | 10 | 11 | select.cells=names(cl)[cl %in% row.names(cl.df)[cl.df$subclass_label=="Sst"]] 12 | save(select.cells, file="Sst.select.cells.rda") 13 | all.cells=select.cells 14 | 15 | top.result = list(cl=as.factor(top.cl[select.cells]),markers=NULL) 16 | de.param = de_param(q1.th=0.5, q.diff.th=0.7, min.cells=4) 17 | 18 | library(parallel) 19 | nodes <- makeCluster(12, type="FORK") 20 | clusterExport(nodes,c("norm.dat", "all.cells","rm.eigen","top.result")) 21 | 22 | for(de.score.th in c(80,300)){ 23 | de.param$de.score.th = de.score.th 24 | clusterExport(nodes,c("de.param")) 25 | d = paste0("subsample_pca_",de.score.th) 26 | dir.create(d) 27 | clusterExport(nodes,c("d")) 28 | tmp= parSapply(nodes, 1:100, function(i){ 29 | library(iterclust) 30 | prefix = paste("iter",i,sep=".") 31 | outfile= file.path(d, paste0("result.",i,".rda")) 32 | if(file.exists(outfile)){ 33 | return(NULL) 34 | } 35 | select.cells=sample(all.cells, round(length(all.cells)*0.8)) 36 | save(select.cells, file=file.path(d, paste0("cells.",i,".rda"))) 37 | result <- iter_clust(norm.dat=norm.dat, select.cells=select.cells,prefix=prefix, split.size = 10, de.param = de.param, dim.method="pca",result= top.result,rm.eigen=rm.eigen, rm.th=0.7) 38 | save(result, file=outfile) 39 | }) 40 | result.files=file.path(d, dir(d, "result.*.rda")) 41 | PCA.co.result <- collect_co_matrix_sparseM(norm.dat, result.files, all.cells) 42 | 43 | d = paste0("subsample_WGCNA_", de.score.th) 44 | dir.create(d) 45 | clusterExport(nodes,c("d")) 46 | tmp= parSapply(nodes, 1:100, function(i){ 47 | library(iterclust) 48 | prefix = paste("iter",i,sep=".") 49 | outfile= file.path(d, paste0("result.",i,".rda")) 50 | if(file.exists(outfile)){ 51 | return(NULL) 52 | } 53 | select.cells=sample(all.cells, round(length(all.cells)*0.8)) 54 | save(select.cells, file=file.path(d, paste0("cells.",i,".rda"))) 55 | result <- iter_clust(norm.dat=norm.dat, select.cells=select.cells,prefix=prefix, split.size = 10, de.param = de.param, dim.method="WGCNA",result= top.result,rm.eigen=rm.eigen, rm.th=0.7) 56 | save(result, file=outfile) 57 | }) 58 | result.files=file.path(d, dir(d, "result.*.rda")) 59 | WGCNA.co.result <- collect_co_matrix_sparseM(norm.dat, result.files, all.cells) 60 | 61 | cl.list = c(PCA.co.result$cl.list, WGCNA.co.result$cl.list) 62 | co.ratio.min = pmin(PCA.co.result$co.ratio, WGCNA.co.result$co.ratio) 63 | cl.mat = cbind(PCA.co.result$cl.mat, WGCNA.co.result$cl.mat) 64 | 65 | consensus.result = iter_consensus_clust(co.ratio.min, cl.list, norm.dat, select.cells=all.cells, de.param = de.param) 66 | 67 | refine.result = refine_cl(consensus.result$cl, co.ratio=co.ratio.min, tol.th=0.005, confusion.th=0.8) 68 | merge.result= merge_cl(norm.dat=norm.dat, cl=refine.result$cl, rd.dat=t(norm.dat[consensus.result$markers,]), de.param = de.param,return.markers=FALSE) 69 | compare.result = compare_annotate(merge.result$cl, ref.cl, ref.cl.df) 70 | cl = compare.result$cl 71 | cl.df = compare.result$cl.df 72 | cl = setNames(factor(as.character(cl),levels=row.names(cl.df)), names(cl)) 73 | save(cl, cl.df, file=paste0("Sst.cl.", de.score.th, ".rda")) 74 | } 75 | 76 | ref.cl = droplevels(ref.cl[all.cells]) 77 | ref.cl.df = ref.cl.df[levels(ref.cl),] 78 | 79 | 80 | 81 | 82 | source("~/zizhen/My_R/map_river_plot.R") 83 | source("~/zizhen/My_R/sankey_functions.R") 84 | map.df = ref.cl.df[as.character(ref.cl),c(2,3,5:11)] 85 | row.names(map.df) = names(ref.cl) 86 | colnames(map.df) = paste0("map_",colnames(map.df)) 87 | 88 | load("Sst.cl.80.rda") 89 | cl.df$cluster_id = as.integer(row.names(cl.df)) 90 | compare.result = compare_annotate(cl, ref.cl, ref.cl.df) 91 | 92 | ggsave("Sst.cl.80.map.pdf", compare.result$g) 93 | map.80.df = cbind(map.df, cl.df[as.character(cl[row.names(map.80.df)]),]) 94 | save(map.80.df, file="map.80.df.rda") 95 | 96 | 97 | g80 <- river_plot(map.80.df, min.cells=4, min.frac=0.15) 98 | ggsave("Sst.80.river.pdf", g80) 99 | 100 | load("Sst.cl.300.rda") 101 | map.300.df = cbind(map.df, cl.df[as.character(cl[row.names(map.80.df)]),c(3:4,6:12)]) 102 | 103 | 104 | g300 <- river_plot(map.300.df, min.cells=4, min.frac=0.1) 105 | ggsave("Sst.300.river.pdf", g300) 106 | save(map.300.df, file="map.300.df.rda") 107 | 108 | 109 | source("~/zizhen/My_R/hicat/R/cl.transition.R") 110 | for(de.score.th in c(80,300)){ 111 | load(paste0("Sst.cl.", de.score.th, ".rda")) 112 | select.markers= select_markers(norm.dat, cl, n.markers=50, de.param = de.param)$markers 113 | print(de.score.th) 114 | print(length(cl)) 115 | print(length(select.markers)) 116 | cell.cl.map.df = get_core_transition(norm.dat, cl, select.markers, n.bin=5, n.iter=100, mc.cores=10) 117 | save(cell.cl.map.df, file=paste0("Sst.cell.cl.map.", de.score.th, ".rda")) 118 | } 119 | 120 | 121 | -------------------------------------------------------------------------------- /tasic2018analysis.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | --------------------------------------------------------------------------------